merged with master

This commit is contained in:
Evghenii
2013-12-25 21:32:34 +01:00
29 changed files with 633 additions and 112 deletions

View File

@@ -89,7 +89,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
if version_LLVM == "trunk":
SVN_PATH="trunk"
if version_LLVM == "3.4":
SVN_PATH="tags/RELEASE_34/rc2"
SVN_PATH="tags/RELEASE_34/final"
version_LLVM = "3_4"
if version_LLVM == "3.3":
SVN_PATH="tags/RELEASE_33/final"
@@ -129,8 +129,23 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
try_do_LLVM("load clang from http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " clang",
from_validation)
os.chdir("..")
if current_OS == "MacOS" and int(current_OS_version.split(".")[0]) >= 13:
# Starting with MacOS 10.9 Maverics, the system doesn't contain headers for standard C++ library and
# the default library is libc++, bit libstdc++. The headers are part of XCode now. But we are checking out
# headers as part of LLVM source tree, so they will be installed in clang location and clang will be able
# to find them. Though they may not match to the library installed in the system, but seems that this should
# not happen.
# Note, that we can also build a libc++ library, but it must be on system default location or should be passed
# to the linker explicitly (either through command line or environment variables). So we are not doing it
# currently to make the build process easier.
os.chdir("projects")
try_do_LLVM("load libcxx http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " libcxx",
from_validation)
os.chdir("..")
if extra == True:
os.chdir("./clang/tools")
os.chdir("tools/clang/tools")
try_do_LLVM("load extra clang extra tools ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/clang-tools-extra/" + SVN_PATH + " extra",
from_validation)
@@ -138,7 +153,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
try_do_LLVM("load extra clang compiler-rt ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/compiler-rt/" + SVN_PATH + " compiler-rt",
from_validation)
os.chdir("../")
os.chdir("..")
else:
tar = tarball.split(" ")
os.makedirs(LLVM_SRC)
@@ -563,6 +578,8 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
def Main():
global current_OS
global current_OS_version
current_OS_version = platform.release()
if (platform.system() == 'Windows' or 'CYGWIN_NT' in platform.system()) == True:
current_OS = "Windows"
else:

View File

@@ -3,13 +3,13 @@
define(`MASK',`i32')
define(`WIDTH',`1')
include(`util.m4')
rdrand_decls()
; Define some basics for a 1-wide target
stdlib_core()
packed_load_and_store()
scans()
int64minmax()
aossoa()
rdrand_decls()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; masked store
@@ -653,10 +653,121 @@ define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alw
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; svml stuff
include(`svml.m4')
svml_stubs(float,f,WIDTH)
svml_stubs(double,d,WIDTH)
declare <1 x float> @__svml_sind(<1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_asind(<1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_cosd(<1 x float>) nounwind readnone alwaysinline
declare void @__svml_sincosd(<1 x float>, <1 x double> *, <1 x double> *) nounwind readnone alwaysinline
declare <1 x float> @__svml_tand(<1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_atand(<1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_atan2d(<1 x float>, <1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_expd(<1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_logd(<1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_powd(<1 x float>, <1 x float>) nounwind readnone alwaysinline
define <1 x float> @__svml_sinf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_sinf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm.sin.f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
unary1to1(float,@llvm.sin.f32)
}
define <1 x float> @__svml_asinf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_asinf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm.asin.f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
unary1to1(float,@llvm.asin.f32)
}
define <1 x float> @__svml_cosf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_cosf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm.cos.f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
unary1to1(float, @llvm.cos.f32)
}
define void @__svml_sincosf(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
; %s = call <1 x float> @__svml_sincosf4(<1 x float> * %2, <1 x float> %0)
; store <1 x float> %s, <1 x float> * %1
; ret void
%sin = call <1 x float> @__svml_sinf(<1 x float> %0)
%cos = call <1 x float> @__svml_cosf(<1 x float> %0)
store <1 x float> %sin, <1 x float> * %1
store <1 x float> %cos, <1 x float> * %2
ret void
}
define <1 x float> @__svml_tanf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_tanf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm_tan_f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
;unasry1to1(float, @llvm.tan.f32)
; UNSUPPORTED!
ret <1 x float > %0
}
define <1 x float> @__svml_atanf(<1 x float>) nounwind readnone alwaysinline {
; %ret = call <1 x float> @__svml_atanf4(<1 x float> %0)
; ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm_atan_f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
;unsary1to1(float,@llvm.atan.f32)
;UNSUPPORTED!
ret <1 x float > %0
}
define <1 x float> @__svml_atan2f(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_atan2f4(<1 x float> %0, <1 x float> %1)
;ret <1 x float> %ret
;%y = extractelement <1 x float> %0, i32 0
;%x = extractelement <1 x float> %1, i32 0
;%q = fdiv float %y, %x
;%a = call float @llvm.atan.f32 (float %q)
;%rv = insertelement <1 x float> undef, float %a, i32 0
;ret <1 x float> %rv
; UNSUPPORTED!
ret <1 x float > %0
}
define <1 x float> @__svml_expf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_expf4(<1 x float> %0)
;ret <1 x float> %ret
unary1to1(float, @llvm.exp.f32)
}
define <1 x float> @__svml_logf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_logf4(<1 x float> %0)
;ret <1 x float> %ret
unary1to1(float, @llvm.log.f32)
}
define <1 x float> @__svml_powf(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_powf4(<1 x float> %0, <1 x float> %1)
;ret <1 x float> %ret
%r = extractelement <1 x float> %0, i32 0
%e = extractelement <1 x float> %1, i32 0
%s = call float @llvm.pow.f32(float %r,float %e)
%rv = insertelement <1 x float> undef, float %s, i32 0
ret <1 x float> %rv
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float min/max
@@ -881,14 +992,3 @@ declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind read
define_avgs()
;;;;;;; nvptx64
declare i32 @__tid_x() nounwind readnone alwaysinline
declare i32 @__warpsize() nounwind readnone alwaysinline
declare i32 @__ctaid_x() nounwind readnone alwaysinline
declare i32 @__ctaid_y() nounwind readnone alwaysinline
declare i32 @__ctaid_z() nounwind readnone alwaysinline
declare i32 @__nctaid_x() nounwind readnone alwaysinline
declare i32 @__nctaid_y() nounwind readnone alwaysinline
declare i32 @__nctaid_z() nounwind readnone alwaysinline

View File

@@ -371,6 +371,8 @@ declare i32 @__packed_load_active(i32 * nocapture, <WIDTH x i32> * nocapture,
<WIDTH x i1>) nounwind
declare i32 @__packed_store_active(i32 * nocapture, <WIDTH x i32> %vals,
<WIDTH x i1>) nounwind
declare i32 @__packed_store_active2(i32 * nocapture, <WIDTH x i32> %vals,
<WIDTH x i1>) nounwind
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -18,6 +18,7 @@ syn keyword ispcConditional cif
syn keyword ispcRepeat cdo cfor cwhile
syn keyword ispcBuiltin programCount programIndex
syn keyword ispcType export uniform varying int8 int16 int32 int64
syn keyword ispcOperator operator
"double precision floating point number, with dot, optional exponent
syn match cFloat display contained "\d\+\.\d*d[-+]\=\d*\>"
@@ -33,6 +34,7 @@ HiLink ispcConditional Conditional
HiLink ispcRepeat Repeat
HiLink ispcBuiltin Statement
HiLink ispcType Type
HiLink ispcOperator Operator
delcommand HiLink
let b:current_syntax = "ispc"

View File

@@ -1,3 +1,47 @@
=== v1.6.0 === (19 December 2013)
A major new version of ISPC with major improvements in performance and
stability. Linux and MacOS binaries are based on patched version of LLVM 3.3,
while Windows version is based on LLVM 3.4rc3. LLVM 3.4 significantly improves
stability on Win32 platform, so we've decided not to wait for official LLVM 3.4
release.
The list of the most significant changes is:
* New avx1-i32x4 target was added. It may play well for you, if you are focused
on integer computations or FP unit in your hardware is 128 bit wide.
* Support for calculations in double precision was extended with two new
targets avx1.1-i64x4 and avx2-i64x4.
* Language support for overloaded operators was added.
* New library shift() function was added, which is similar to rotate(), but is
non-circular.
* The language was extended to accept 3 dimensional tasking - a syntactic sugar,
which may facilitate programming of some tasks.
* Regression, which broke --opt=force-aligned-memory is fixed.
If you are not using pre-built binaries, you may notice the following changes:
* VS2012/VS2013 are supported.
* alloy.py (with -b switch) can build LLVM for you on any platform now
(except MacOS 10.9, but we know about the problem and working on it).
This is a preferred way to build LLVM for ISPC, as all required patches for
better performance and stability will automatically apply.
* LLVM 3.5 (current trunk) is supported.
There are also multiple fixes for better performance and stability, most
notable are:
* Fixed performance problem for x2 targets.
* Fixed a problem with incorrect vzeroupper insertion on AVX target on Win32.
=== v1.5.0 === (27 September 2013)
A major new version of ISPC with several new targets and important bug fixes.

View File

@@ -48,6 +48,8 @@ Contents:
+ `Updating ISPC Programs For Changes In ISPC 1.1`_
+ `Updating ISPC Programs For Changes In ISPC 1.2`_
+ `Updating ISPC Programs For Changes In ISPC 1.3`_
+ `Updating ISPC Programs For Changes In ISPC 1.5.0`_
+ `Updating ISPC Programs For Changes In ISPC 1.6.0`_
* `Getting Started with ISPC`_
@@ -97,6 +99,9 @@ Contents:
* `Short Vector Types`_
* `Array Types`_
* `Struct Types`_
+ `Operators Overloading`_
* `Structure of Array Types`_
+ `Declarations and Initializers`_
@@ -279,6 +284,15 @@ Double precision floating point constants are floating point number with
31.4d-1, 1.d, 1.0d, 1d-2. Note that floating point number without suffix is
treated as single precision constant.
Updating ISPC Programs For Changes In ISPC 1.6.0
------------------------------------------------
This release adds support for `Operators Overloading`_, so a word ``operator``
becomes a keyword and it potentially creates a conflict with existing user
function. Also a new library function packed_store_active2() was introduced,
which also may create a conflict with existing user functions.
Getting Started with ISPC
=========================
@@ -1325,6 +1339,7 @@ in C:
* Function overloading by parameter type
* Hexadecimal floating-point constants
* Dynamic memory allocation with ``new`` and ``delete``.
* Limited support for overloaded operators (`Operators Overloading`_).
``ispc`` also adds a number of new features that aren't in C89, C99, or
C++:
@@ -2125,6 +2140,34 @@ still has only a single ``a`` member, since ``a`` was declared with
indexing operation in the last line results in an error.
Operators Overloading
---------------------
ISPC has limited support for overloaded operators for ``struct`` types. Only
binary operators are supported currently, namely they are: ``*, /, %, +, -, >>
and <<``. Operators overloading support is similar to the one in C++ language.
To overload an operator for ``struct S``, you need to declare and implement a
function using keyword ``operator``, which accepts two parameters of type
``struct S`` or ``struct S&`` and returns either of these types. For example:
::
struct S { float re, im;};
struct S operator*(struct S a, struct S b) {
struct S result;
result.re = a.re * b.re - a.im * b.im;
result.im = a.re * b.im + a.im * b.re;
return result;
}
void foo(struct S a, struct S b) {
struct S mul = a*b;
print("a.re: %\na.im: %\n", a.re, a.im);
print("b.re: %\nb.im: %\n", b.re, b.im);
print("mul.re: %\nmul.im: %\n", mul.re, mul.im);
}
Structure of Array Types
------------------------
@@ -4050,6 +4093,14 @@ They return the total number of values stored.
unsigned int val)
There are also ``packed_store_active2()`` functions with exactly the same
signatures and the same semantic except that they may write one extra
element to the output array (but still returning the same value as
``packed_store_active()``). These functions suggest different branch free
implementation on most of supported targets, which usually (but not always)
performs better than ``packed_store_active()``. It's advised to test function
performance on user's scenarios on particular target hardware before using it.
As an example of how these functions can be used, the following code shows
the use of ``packed_store_active()``.

View File

@@ -2,6 +2,16 @@
ispc News
=========
ispc 1.6.0 is Released
----------------------
A major update of ``ispc`` has been released. The main focus is on improved
performance and stability. Several new targets were added. There are also
a number of language and library extensions. Released binaries are based on
patched LLVM 3.3 on Linux and MacOS and LLVM 3.4rc3 on Windows. Please refer
to Release Notes for complete set of changes.
ispc 1.5.0 is Released
----------------------

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 1.5.1dev
PROJECT_NUMBER = 1.6.1dev
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

View File

@@ -146,24 +146,24 @@
<PropertyGroup Label="User">
<ISPC_compiler Condition=" '$(ISPC_compiler)' == '' ">ispc</ISPC_compiler>
<Target_str Condition=" '$(Target_str)' == '' ">$(default_targets)</Target_str>
<Target_out>$(TargetDir)$(ISPC_file).obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse2')))">$(Target_out);$(TargetDir)$(ISPC_file)_sse2.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse4')))">$(Target_out);$(TargetDir)$(ISPC_file)_sse4.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1-')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1.1')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx11.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx2')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx2.obj</Target_out>
<Target_out>$(ISPC_file).obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse2')))">$(Target_out);$(ISPC_file)_sse2.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse4')))">$(Target_out);$(ISPC_file)_sse4.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1-')))">$(Target_out);$(ISPC_file)_avx.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1.1')))">$(Target_out);$(ISPC_file)_avx11.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx2')))">$(Target_out);$(ISPC_file)_avx2.obj</Target_out>
</PropertyGroup>
<ItemGroup>
<CustomBuild Include='$(ISPC_file).ispc'>
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str)</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str)</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str)</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Target_out)</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Target_out)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str)</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Target_out)</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Target_out)</Outputs>
</CustomBuild>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{87f53c53-957e-4e91-878a-bc27828fb9eb}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

View File

@@ -1472,31 +1472,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec16_i32 val,
return count;
}
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec16_i32 val,
__vec16_i1 mask) {
int count = 0;
int32_t *ptr_ = ptr;
for (int i = 0; i < 16; ++i) {
*ptr = val.v[i];
ptr += mask.v & 1;
mask.v = mask.v >> 1;
}
return ptr - ptr_;
}
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
__vec16_i32 *val,
__vec16_i1 mask) {
int count = 0;
for (int i = 0; i < 16; ++i) {
if ((mask.v & (1 << i)) != 0) {
val->v[i] = *ptr++;
++count;
}
}
return count;
return __packed_load_active((int32_t *)ptr, val, mask);
}
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
__vec16_i32 val,
__vec16_i1 mask) {
int count = 0;
for (int i = 0; i < 16; ++i) {
if ((mask.v & (1 << i)) != 0) {
*ptr++ = val.v[i];
++count;
}
}
return count;
return __packed_store_active((int32_t *)ptr, val, mask);
}
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
__vec16_i32 val,
__vec16_i1 mask) {
return __packed_store_active2((int32_t *)ptr, val, mask);
}

View File

@@ -1523,31 +1523,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec32_i32 val,
return count;
}
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec32_i32 val,
__vec32_i1 mask) {
int count = 0;
int32_t *ptr_ = ptr;
for (int i = 0; i < 32; ++i) {
*ptr = val.v[i];
ptr += mask.v & 1;
mask.v = mask.v >> 1;
}
return ptr - ptr_;
}
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
__vec32_i32 *val,
__vec32_i1 mask) {
int count = 0;
for (int i = 0; i < 32; ++i) {
if ((mask.v & (1 << i)) != 0) {
val->v[i] = *ptr++;
++count;
}
}
return count;
return __packed_load_active((int32_t *)ptr, val, mask);
}
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
__vec32_i32 val,
__vec32_i1 mask) {
int count = 0;
for (int i = 0; i < 32; ++i) {
if ((mask.v & (1 << i)) != 0) {
*ptr++ = val.v[i];
++count;
}
}
return count;
return __packed_store_active((int32_t *)ptr, val, mask);
}
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
__vec32_i32 val,
__vec32_i1 mask) {
return __packed_store_active2((int32_t *)ptr, val, mask);
}

View File

@@ -1656,31 +1656,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec64_i32 val,
return count;
}
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec64_i32 val,
__vec64_i1 mask) {
int count = 0;
int32_t *ptr_ = ptr;
for (int i = 0; i < 64; ++i) {
*ptr = val.v[i];
ptr += mask.v & 1;
mask.v = mask.v >> 1;
}
return ptr - ptr_;
}
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
__vec64_i32 *val,
__vec64_i1 mask) {
int count = 0;
for (int i = 0; i < 64; ++i) {
if ((mask.v & (1ull << i)) != 0) {
val->v[i] = *ptr++;
++count;
}
}
return count;
return __packed_load_active((int32_t *) ptr, val, mask);
}
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
__vec64_i32 val,
__vec64_i1 mask) {
int count = 0;
for (int i = 0; i < 64; ++i) {
if ((mask.v & (1ull << i)) != 0) {
*ptr++ = val.v[i];
++count;
}
}
return count;
return __packed_store_active((int32_t *) ptr, val, mask);
}
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
__vec64_i32 val,
__vec64_i1 mask) {
return __packed_store_active2((int32_t *) ptr, val, mask);
}

View File

@@ -2451,20 +2451,24 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val, _
return _mm_countbits_32(uint32_t(mask));
}
static FORCEINLINE int32_t __packed_store_active2(uint32_t *p, __vec16_i32 val, __vec16_i1 mask)
{
return __packed_store_active(p, val, mask);
}
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec16_i32 *val, __vec16_i1 mask)
{
__vec16_i32 v = __load<64>(val);
v = _mm512_mask_extloadunpacklo_epi32(v, mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
v = _mm512_mask_extloadunpackhi_epi32(v, mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
__store<64>(val, v);
return _mm_countbits_32(uint32_t(mask));
return __packed_load_active((uint32_t *)p, val, mask);
}
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec16_i32 val, __vec16_i1 mask)
{
_mm512_mask_extpackstorelo_epi32(p, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
return _mm_countbits_32(uint32_t(mask));
return __packed_store_active((uint32_t *)p, val, mask);
}
static FORCEINLINE int32_t __packed_store_active2(int32_t *p, __vec16_i32 val, __vec16_i1 mask)
{
return __packed_store_active(p, val, mask);
}
///////////////////////////////////////////////////////////////////////////

View File

@@ -2496,20 +2496,23 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec8_i32 val,
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
return _mm_countbits_32(uint32_t(0xFF & mask));
}
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr, __vec4_i32 val,
__vec4_i1 mask) {
return __packed_store_active(ptr, val, mask);
}
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec8_i32 *val,
__vec8_i1 mask) {
__vec8_i32 v = __load<64>(val);
v = _mm512_mask_extloadunpacklo_epi32(v, 0xFF & mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
v = _mm512_mask_extloadunpackhi_epi32(v, 0xFF & mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
__store<64>(val, v);
return _mm_countbits_32(uint32_t(0xFF & mask));
return __packed_load_active((uint32_t *)p, val, mask);
}
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec8_i32 val,
__vec8_i1 mask) {
_mm512_mask_extpackstorelo_epi32(p, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
return _mm_countbits_32(uint32_t(0xFF & mask));
return __packed_store_active((uint32_t *)p, val, mask);
}
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec4_i32 val,
__vec4_i1 mask) {
return __packed_store_active(ptr, val, mask);
}
#endif
///////////////////////////////////////////////////////////////////////////

View File

@@ -1260,6 +1260,13 @@ static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i3
return __vec16_i64(val.v, _mm512_setzero_epi32());
}
static FORCEINLINE __vec16_i32 __cast_sext(const __vec16_i32 &, const __vec16_i1 &val)
{
__vec16_i32 ret = _mm512_setzero_epi32();
__vec16_i32 one = _mm512_set1_epi32(-1);
return _mm512_mask_mov_epi32(ret, val, one);
}
static FORCEINLINE __vec16_i32 __cast_zext(const __vec16_i32 &, const __vec16_i1 &val)
{
__vec16_i32 ret = _mm512_setzero_epi32();
@@ -1878,6 +1885,11 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val,
return _mm_countbits_32(uint32_t(mask));
}
static FORCEINLINE int32_t __packed_store_active2(uint32_t *p, __vec16_i32 val, __vec16_i1 mask)
{
return __packed_store_active(p, val, mask);
}
///////////////////////////////////////////////////////////////////////////
// prefetch
///////////////////////////////////////////////////////////////////////////

View File

@@ -3798,6 +3798,25 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec4_i32 val,
return count;
}
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec4_i32 val,
__vec4_i1 mask) {
int count = 0;
ptr[count] = _mm_extract_epi32(val.v, 0);
count -= _mm_extract_ps(mask.v, 0);
ptr[count] = _mm_extract_epi32(val.v, 1);
count -= _mm_extract_ps(mask.v, 1);
ptr[count] = _mm_extract_epi32(val.v, 2);
count -= _mm_extract_ps(mask.v, 2);
ptr[count] = _mm_extract_epi32(val.v, 3);
count -= _mm_extract_ps(mask.v, 3);
return count;
}
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, __vec4_i32 *val,
__vec4_i1 mask) {
return __packed_load_active((int32_t *)ptr, val, mask);
@@ -3808,6 +3827,11 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr, __vec4_i32 val,
return __packed_store_active((int32_t *)ptr, val, mask);
}
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr, __vec4_i32 val,
__vec4_i1 mask) {
return __packed_store_active2((int32_t *)ptr, val, mask);
}
///////////////////////////////////////////////////////////////////////////
// aos/soa

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{2ef070a1-f62f-4e6a-944b-88d140945c3c}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

View File

@@ -170,17 +170,44 @@
// Signature of ispc-generated 'task' functions
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
int taskIndex, int taskCount);
int taskIndex, int taskCount,
int taskIndex0, int taskIndex1, int taskIndex2,
int taskCount0, int taskCount1, int taskCount2);
// Small structure used to hold the data for each task
#ifdef _MSC_VER
__declspec(align(16))
#endif
struct TaskInfo {
TaskFuncType func;
void *data;
int taskIndex, taskCount;
int taskIndex;
int taskCount3d[3];
#if defined(ISPC_IS_WINDOWS)
event taskEvent;
#endif
};
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
int taskIndex0() const
{
return taskIndex % taskCount3d[0];
}
int taskIndex1() const
{
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
}
int taskIndex2() const
{
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
}
int taskCount0() const { return taskCount3d[0]; }
int taskCount1() const { return taskCount3d[1]; }
int taskCount2() const { return taskCount3d[2]; }
TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
}
#ifndef _MSC_VER
__attribute__((aligned(32)));
#endif
;
// ispc expects these functions to have C linkage / not be mangled
extern "C" {
@@ -518,7 +545,9 @@ lRunTask(void *ti) {
// Actually run the task
taskInfo->func(taskInfo->data, threadIndex, threadCount,
taskInfo->taskIndex, taskInfo->taskCount);
taskInfo->taskIndex, taskInfo->taskCount(),
taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(),
taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2());
}
@@ -559,7 +588,9 @@ lRunTask(LPVOID param) {
// will cause bugs in code that uses those.
int threadIndex = 0;
int threadCount = 1;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
// Signal the event that this task is done
ti->taskEvent.set();
@@ -660,7 +691,9 @@ lTaskEntry(void *arg) {
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
myTask->taskCount);
myTask->taskCount(),
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
//
// Decrement the "number of unfinished tasks" counter in the task
@@ -871,7 +904,9 @@ TaskGroup::Sync() {
// Do work for _myTask_
//
// FIXME: bogus values for thread index/thread count here as well..
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount);
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(),
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
//
// Decrement the number of unfinished tasks counter
@@ -901,7 +936,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task.
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
}
}
@@ -930,7 +967,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task.
int threadIndex = omp_get_thread_num();
int threadCount = omp_get_num_threads();
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
}
}
@@ -961,7 +1000,9 @@ TaskGroup::Launch(int baseIndex, int count) {
int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
});
}
@@ -988,7 +1029,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
});
}
}
@@ -1041,7 +1084,8 @@ FreeTaskGroup(TaskGroup *tg) {
///////////////////////////////////////////////////////////////////////////
void
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) {
const int count = count0*count1*count2;
TaskGroup *taskGroup;
if (*taskGroupPtr == NULL) {
InitTaskSystem();
@@ -1057,7 +1101,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
ti->func = (TaskFuncType)func;
ti->data = data;
ti->taskIndex = i;
ti->taskCount = count;
ti->taskCount3d[0] = count0;
ti->taskCount3d[1] = count1;
ti->taskCount3d[2] = count2;
}
taskGroup->Launch(baseIndex, count);
}

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{dee5733a-e93e-449d-9114-9bffcaeb4df9}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

2
ispc.h
View File

@@ -38,7 +38,7 @@
#ifndef ISPC_H
#define ISPC_H
#define ISPC_VERSION "1.5.1dev"
#define ISPC_VERSION "1.6.1dev"
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
#error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported"

View File

@@ -5153,6 +5153,11 @@ FixBooleanSelectPass::runOnFunction(llvm::Function &F) {
// LLVM 3.3 only
#if defined(LLVM_3_3)
// Don't optimize generic targets.
if (g->target->getISA() == Target::GENERIC) {
return false;
}
for (llvm::Function::iterator I = F.begin(), E = F.end();
I != E; ++I) {
llvm::BasicBlock* bb = &*I;