31 Commits

Author SHA1 Message Date
Matt Pharr
32764e7639 Update release notes, doxygen version number 2011-07-01 05:12:57 +01:00
Matt Pharr
bcae21dbca Update examples to use fpmath:fast and to enable intrinsics on Windows 2011-06-30 13:17:14 -07:00
Matt Pharr
eb22fa6173 Generalize FunctionEmitContext::PtrToIntInst and IntToPtrInst to
do the right thing if given a varying lvalue (i.e. an array of
pointers).  Fixes issue #34.
2011-06-29 12:38:12 +01:00
Matt Pharr
5f7e61f9b5 Another stdlib dependency improvement 2011-06-29 12:26:44 +01:00
Matt Pharr
28a68e3c1f More code simplifications from using CollectionType. Finishes Issue #37 2011-06-29 09:32:31 +01:00
Matt Pharr
6b153566f3 Simplify a bunch of code by using CollectionType to collect struct
codepaths in with array/vector codepaths. (Issue #37).
2011-06-29 07:59:43 +01:00
Matt Pharr
214fb3197a Initial plumbing to add CollectionType base-class as common ancestor
to StructTypes, ArrayTypes, and VectorTypes.  Issue #37.
2011-06-29 07:42:09 +01:00
Matt Pharr
b4068efcfb Fixes to run_tests.sh script
- Use bash, not zsh (don't make people install zsh for no good reason)
- Print help if -h command line option is given
- Allow specifying the compilation target to use on the command line
- If one or more filenames are provided, just run those tests.  Otherwise,
  run everything in the tests/ directory.
2011-06-29 07:25:01 +01:00
Matt Pharr
24216d841f Update release notes for 1.0.2 stuff so far 2011-06-29 07:00:17 +01:00
Matt Pharr
be45beb54b Implement our own routine to turn C99-style hexadecimal float constants in strong form into floating-point values. With this, we can correctly handle hex float constants on Windows, where the builtin atof() routine just returns zero for them. Fixes issue #16. 2011-06-29 06:57:39 +01:00
Matt Pharr
cb58c78c1a Pipe through source file locations of structure element declarations; these are now supplied to the llvm::DIBuilder::createMemberType() method rather than giving it the position of the overall struct declaration for each one. Fixes issue #31 2011-06-29 05:38:42 +01:00
Matt Pharr
86de910ecd Improve implementation of __masked_store_blend_64() for AVX target by doing two 8-wide 32-bit blends rather than serializing. Fixes issue #29 2011-06-28 20:52:06 -07:00
Matt Pharr
ce7978ae74 Align stack-allocated arrays of uniform types to the target vector alignment (they will often be accessed in programCount-sized chunks and this should make that a bit more efficient in the common case). Fixes issue #15 2011-06-28 20:42:18 -07:00
Matt Pharr
7aec7486f8 Make SSE2 the default target on Atom CPUs unless explicitly overridden. (Fixes issue #45 2011-06-28 08:32:58 -07:00
Daniel Schubert
b6d6ee6fc2 Fixed typos. 2011-06-28 07:38:00 -07:00
Matt Pharr
cb74346d36 Fix typo (thx jsimmons) 2011-06-27 19:51:46 -07:00
Matt Pharr
2709c354d7 Add support for broadcast(), rotate(), and shuffle() stdlib routines 2011-06-27 17:31:44 -07:00
Matt Pharr
36063bae79 Update call to llvm::DIBuilder::createMemberType to fix building with LLVM dev TOT 2011-06-26 08:00:00 -07:00
Matt Pharr
e6d6a82484 Merge pull request #41 from benharper123/master
Update docs on store/load int8/16
2011-06-25 17:28:21 -07:00
Ben Harper
f830e21cfa Updated docs for store/load int8/int16 2011-06-26 02:02:18 +02:00
Matt Pharr
ae2c24c3c1 Merge branch 'master' of github.com:ispc/ispc 2011-06-24 17:06:08 -07:00
Andreas Wendleder
6dfd74c74c Add verbose flag and report progress. 2011-06-24 17:05:24 -07:00
Matt Pharr
7055888cb7 Merge branch 'master' of github.com:ispc/ispc 2011-06-24 16:21:54 -07:00
Matt Pharr
7854a71ea9 Merge branch 'master' of github.com:ispc/ispc 2011-06-24 16:21:06 -07:00
Matt Pharr
b7519d1268 fix date in ReleaseNotes.txt 2011-06-24 16:20:36 -07:00
Matt Pharr
f2758f0831 Merge branch 'master' of github.com:ispc/ispc 2011-06-24 16:20:06 -07:00
Matt Pharr
ff76c2334e small doc fix, removed incorrect comment from example 2011-06-24 16:19:51 -07:00
Matt Pharr
9b6bf5dabc Add release notes doc 2011-06-24 16:11:46 -07:00
Matt Pharr
ab33afaea4 Merge branch 'master' of home:/Users/mmp/git/ispc 2011-06-23 18:54:14 -07:00
Matt Pharr
fab5794faf Merge branch 'master' of github.com:ispc/ispc 2011-06-23 18:25:44 -07:00
Matt Pharr
3c3cd88692 initial alignment work 2011-06-23 17:36:44 -07:00
38 changed files with 881 additions and 348 deletions

1
.gitignore vendored
View File

@@ -4,3 +4,4 @@ depend
ispc
ispc_test
objs
docs/doxygen

View File

@@ -94,9 +94,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
@echo Compiling $<
@$(CXX) $(CXXFLAGS) -o $@ -c $<
$(STDLIB_SRC): stdlib.m4
objs/stdlib-%.cpp: stdlib-%.ll
objs/stdlib-%.cpp: stdlib-%.ll stdlib.m4 stdlib-sse.ll
@echo Creating C++ source from stdlib file $<
@m4 stdlib.m4 $< | ./bitcode2cpp.py $< > $@

103
ctx.cpp
View File

@@ -1109,7 +1109,7 @@ FunctionEmitContext::BitCastInst(llvm::Value *value, const llvm::Type *type,
}
llvm::Instruction *
llvm::Value *
FunctionEmitContext::PtrToIntInst(llvm::Value *value, const llvm::Type *type,
const char *name) {
if (value == NULL) {
@@ -1117,16 +1117,31 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const llvm::Type *type,
return NULL;
}
// TODO: we should probably handle the array case as in
// e.g. BitCastInst(), but we don't currently need that functionality
llvm::Instruction *inst =
new llvm::PtrToIntInst(value, type, name ? name : "ptr2int", bblock);
AddDebugPos(inst);
return inst;
const llvm::Type *valType = value->getType();
const llvm::ArrayType *at = llvm::dyn_cast<const llvm::ArrayType>(valType);
if (at && llvm::isa<const llvm::PointerType>(at->getElementType())) {
// varying lvalue -> apply ptr to int to the individual pointers
assert((int)at->getNumElements() == g->target.vectorWidth);
llvm::Value *ret =
llvm::UndefValue::get(llvm::ArrayType::get(type, g->target.vectorWidth));
for (int i = 0; i < g->target.vectorWidth; ++i) {
llvm::Value *elt = ExtractInst(value, i);
llvm::Value *p2i = PtrToIntInst(elt, type, name);
ret = InsertInst(ret, p2i, i);
}
return ret;
}
else {
llvm::Instruction *inst =
new llvm::PtrToIntInst(value, type, name ? name : "ptr2int", bblock);
AddDebugPos(inst);
return inst;
}
}
llvm::Instruction *
llvm::Value *
FunctionEmitContext::IntToPtrInst(llvm::Value *value, const llvm::Type *type,
const char *name) {
if (value == NULL) {
@@ -1134,12 +1149,27 @@ FunctionEmitContext::IntToPtrInst(llvm::Value *value, const llvm::Type *type,
return NULL;
}
// TODO: we should probably handle the array case as in
// e.g. BitCastInst(), but we don't currently need that functionality
llvm::Instruction *inst =
new llvm::IntToPtrInst(value, type, name ? name : "int2ptr", bblock);
AddDebugPos(inst);
return inst;
const llvm::Type *valType = value->getType();
const llvm::ArrayType *at = llvm::dyn_cast<const llvm::ArrayType>(valType);
if (at && llvm::isa<const llvm::PointerType>(at->getElementType())) {
// varying lvalue -> apply int to ptr to the individual pointers
assert((int)at->getNumElements() == g->target.vectorWidth);
llvm::Value *ret =
llvm::UndefValue::get(llvm::ArrayType::get(type, g->target.vectorWidth));
for (int i = 0; i < g->target.vectorWidth; ++i) {
llvm::Value *elt = ExtractInst(value, i);
llvm::Value *i2p = IntToPtrInst(elt, type, name);
ret = InsertInst(ret, i2p, i);
}
return ret;
}
else {
llvm::Instruction *inst =
new llvm::IntToPtrInst(value, type, name ? name : "int2ptr", bblock);
AddDebugPos(inst);
return inst;
}
}
@@ -1359,10 +1389,10 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
// If we're gathering structures, do an element-wise gather
// recursively.
llvm::Value *retValue = llvm::UndefValue::get(retType);
for (int i = 0; i < st->NumElements(); ++i) {
for (int i = 0; i < st->GetElementCount(); ++i) {
llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i);
// This in turn will be another gather
llvm::Value *eltValues = LoadInst(eltPtrs, st->GetMemberType(i),
llvm::Value *eltValues = LoadInst(eltPtrs, st->GetElementType(i),
name);
retValue = InsertInst(retValue, eltValues, i, "set_value");
}
@@ -1482,6 +1512,16 @@ FunctionEmitContext::AllocaInst(const llvm::Type *llvmType, const char *name,
// current basic block
inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock);
// If no alignment was specified but we have an array of a uniform
// type, then align it to 4 * the native vector width; it's not
// unlikely that this array will be loaded into varying variables with
// what will be aligned accesses if the uniform -> varying load is done
// in regular chunks.
const llvm::ArrayType *arrayType = llvm::dyn_cast<const llvm::ArrayType>(llvmType);
if (align == 0 && arrayType != NULL &&
!llvm::isa<const llvm::VectorType>(arrayType->getElementType()))
align = 4 * g->target.nativeVectorWidth;
if (align != 0)
inst->setAlignment(align);
// Don't add debugging info to alloca instructions
@@ -1506,29 +1546,18 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
assert(llvm::isa<const llvm::PointerType>(lvalue->getType()));
const StructType *structType = dynamic_cast<const StructType *>(rvalueType);
if (structType != NULL) {
// Assigning a structure
for (int i = 0; i < structType->NumElements(); ++i) {
const CollectionType *collectionType =
dynamic_cast<const CollectionType *>(rvalueType);
if (collectionType != NULL) {
// Assigning a structure / array / vector. Handle each element
// individually with what turns into a recursive call to
// makedStore()
for (int i = 0; i < collectionType->GetElementCount(); ++i) {
llvm::Value *eltValue = ExtractInst(rvalue, i, "rvalue_member");
llvm::Value *eltLValue = GetElementPtrInst(lvalue, 0, i,
"struct_lvalue_ptr");
StoreInst(eltValue, eltLValue, storeMask,
structType->GetMemberType(i));
}
return;
}
const SequentialType *sequentialType =
dynamic_cast<const SequentialType *>(rvalueType);
if (sequentialType != NULL) {
// Assigning arrays and vectors. Handle each element individually
// with what turns into a recursive call to makedStore()
for (int i = 0; i < sequentialType->GetElementCount(); ++i) {
llvm::Value *eltLValue = GetElementPtrInst(lvalue, 0, i, "lval_i_ptr");
llvm::Value *eltValue = ExtractInst(rvalue, i, "array_i_val");
StoreInst(eltValue, eltLValue, storeMask,
sequentialType->GetElementType());
collectionType->GetElementType(i));
}
return;
}
@@ -1588,10 +1617,10 @@ FunctionEmitContext::scatter(llvm::Value *rvalue, llvm::Value *lvalue,
const StructType *structType = dynamic_cast<const StructType *>(rvalueType);
if (structType) {
// Scatter the struct elements individually
for (int i = 0; i < structType->NumElements(); ++i) {
for (int i = 0; i < structType->GetElementCount(); ++i) {
llvm::Value *lv = GetElementPtrInst(lvalue, 0, i);
llvm::Value *rv = ExtractInst(rvalue, i);
scatter(rv, lv, storeMask, structType->GetMemberType(i));
scatter(rv, lv, storeMask, structType->GetElementType(i));
}
return;
}

8
ctx.h
View File

@@ -305,10 +305,10 @@ public:
llvm::Value *BitCastInst(llvm::Value *value, const llvm::Type *type,
const char *name = NULL);
llvm::Instruction *PtrToIntInst(llvm::Value *value, const llvm::Type *type,
const char *name = NULL);
llvm::Instruction *IntToPtrInst(llvm::Value *value, const llvm::Type *type,
const char *name = NULL);
llvm::Value *PtrToIntInst(llvm::Value *value, const llvm::Type *type,
const char *name = NULL);
llvm::Value *IntToPtrInst(llvm::Value *value, const llvm::Type *type,
const char *name = NULL);
llvm::Instruction *TruncInst(llvm::Value *value, const llvm::Type *type,
const char *name = NULL);
llvm::Instruction *CastInst(llvm::Instruction::CastOps op, llvm::Value *value,

View File

@@ -318,9 +318,10 @@ Declaration::Print() const {
///////////////////////////////////////////////////////////////////////////
void
GetStructTypesAndNames(const std::vector<StructDeclaration *> &sd,
std::vector<const Type *> *elementTypes,
std::vector<std::string> *elementNames) {
GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
std::vector<const Type *> *elementTypes,
std::vector<std::string> *elementNames,
std::vector<SourcePos> *elementPositions) {
for (unsigned int i = 0; i < sd.size(); ++i) {
const Type *type = sd[i]->type;
// FIXME: making this fake little DeclSpecs here is really
@@ -343,6 +344,7 @@ GetStructTypesAndNames(const std::vector<StructDeclaration *> &sd,
elementTypes->push_back(d->sym->type);
elementNames->push_back(d->sym->name);
elementPositions->push_back(d->sym->pos);
}
}
}

7
decl.h
View File

@@ -196,8 +196,9 @@ struct StructDeclaration {
/** Given a set of StructDeclaration instances, this returns the types of
the elements of the corresponding struct and their names. */
extern void GetStructTypesAndNames(const std::vector<StructDeclaration *> &sd,
std::vector<const Type *> *elementTypes,
std::vector<std::string> *elementNames);
extern void GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
std::vector<const Type *> *elementTypes,
std::vector<std::string> *elementNames,
std::vector<SourcePos> *elementPositions);
#endif // ISPC_DECL_H

26
docs/ReleaseNotes.txt Normal file
View File

@@ -0,0 +1,26 @@
=== v1.0.2 ===
Floating-point hexidecimal constants are now parsed correctly on Windows
(fixes issue #16).
SSE2 is now the default target if --cpu=atom is given in the command line
arguments and another target isn't explicitly specified.
The standard library now provides broadcast(), rotate(), and shuffle()
routines for efficient communication between program instances.
The MSVC solution files to build the examples on Windows now use
/fpmath:fast when building.
=== v1.0.1 === (24 June 2011)
ispc no longer requires that pointers to memory that are passed in to ispc
have alignment equal to the targets vector width; now alignment just has to
be the regular element alignment (e.g. 4 bytes for floats, etc.) This
change also fixed a number of cases where it previously incorrectly
generated aligned load/store instructions in cases where the address wasn't
actually aligned (even if the base address passed into ispc code was).
=== v1.0 === (21 June 2011)
Initial Release

View File

@@ -74,7 +74,8 @@ Contents:
+ `Math Functions`_
+ `Output Functions`_
+ `Cross-Lane Operations`_
+ `Cross-Program Instance Operations`_
+ `Packed Load and Store Operations`_
+ `Low-Level Bits`_
* `Interoperability with the Application`_
@@ -136,7 +137,7 @@ Linux\* and Mac OS\* available for download. Alternatively, you can
download the source code from that page and build it yourself; see see the
`ispc wiki`_ for instructions about building ``ispc`` from source.
.. _ispc downloads web page:downloads.html
.. _ispc downloads web page: downloads.html
.. _ispc wiki: http://github.com/ispc/ispc/wiki
Once you have an executable for your system, copy it into a directory
@@ -340,7 +341,7 @@ before it's compiled. On Windows®, pre-processor definitions should be
provided to the ``cl`` call.
By default, the compiler generates x86-64 Intel® SSE4 code. To generate
32-bit code, you can use the the ``--arch=x86`` command-line flag. To
32-bit code, you can use the ``--arch=x86`` command-line flag. To
select Intel® SSE2, use ``--target=sse2``.
``ispc`` supports an alternative method for generating Intel® SSE4 code,
@@ -1246,7 +1247,7 @@ section.)
For ``if`` statements where the different running SPMD program instances
don't have coherent values for the boolean ``if`` test, using ``cif``
introduces some additional overhead from the ``all`` and ``any`` tests as
well as the corresponding branches. For cases where the the program
well as the corresponding branches. For cases where the program
instances often do compute the same boolean value, this overhead is
worthwhile. If the control flow is in fact usually incoherent, this
overhead only costs performance.
@@ -1659,14 +1660,14 @@ values for the inactive program instances aren't printed. (In other cases,
they may have garbage values or be otherwise undefined.)
Cross-Lane Operations
---------------------
Cross-Program Instance Operations
---------------------------------
Usually, ``ispc`` code expresses independent computation on separate data
elements. There are, however, a number of cases where it's useful for the
program instances to be able to cooperate in computing results. The
cross-lane operations described in this section provide primitives for
communication between the running program instances.
Usually, ``ispc`` code expresses independent programs performing
computation on separate data elements. There are, however, a number of
cases where it's useful for the program instances to be able to cooperate
in computing results. The cross-lane operations described in this section
provide primitives for communication between the running program instances.
A few routines that evaluate conditions across the running program
instances. For example, ``any()`` returns ``true`` if the given value
@@ -1678,6 +1679,47 @@ and ``all()`` returns ``true`` if it true for all of them.
uniform bool any(bool v)
uniform bool all(bool v)
To broadcast a value from one program instance to all of the others, a
``broadcast()`` function is available. It broadcasts the value of the
``value`` parameter for the program instance given by ``index`` to all of
the running program instances.
::
float broadcast(float value, uniform int index)
int32 broadcast(int32 value, uniform int index)
double broadcast(double value, uniform int index)
int64 broadcast(int64 value, uniform int index)
The ``rotate()`` function allows each program instance to find the value of
the given value that their neighbor ``offset`` steps away has. For
example, on an 8-wide target, if ``offset`` has the value (1, 2, 3, 4, 5,
6, 7, 8) in each of the running program instances, then ``rotate(value,
-1)`` causes the first program instance to get the value 8, the second
program instance to get the value 1, the third 2, and so forth. The
provided offset value can be positive or negative, and may be greater than
``programCount`` (it is masked to ensure valid offsets).
::
float rotate(float value, uniform int offset)
int32 rotate(int32 value, uniform int offset)
double rotate(double value, uniform int offset)
int64 rotate(int64 value, uniform int offset)
Finally, ``shuffle()`` allows fully general shuffling of values among the
program instances. Each program instance's value of permutation gives the
program instance from which to get the value of ``value``. The provided
values for ``permutation`` must all be between 0 and ``programCount-1``.
::
float shuffle(float value, int permutation)
int32 shuffle(int32 value, int permutation)
double shuffle(double value, int permutation)
int64 shuffle(int64 value, int permutation)
The various variants of ``popcnt()`` return the population count--the
number of bits set in the given value.
@@ -1719,8 +1761,12 @@ given value across all of the currently-executing vector lanes.
uniform unsigned int reduce_max(unsigned int a, unsigned int b)
Finally, there are routines for writing out and reading in values from
linear memory locations for the active program instances.
Packed Load and Store Operations
--------------------------------
The standard library also offers routines for writing out and reading in
values from linear memory locations for the active program instances.
``packed_load_active()`` loads consecutive values from the given array,
starting at ``a[offset]``, loading one value for each currently-executing
program instance and storing it into that program instance's ``val``
@@ -1797,14 +1843,15 @@ and this conversion step are necessary because ``ispc`` doesn't have native
void store_to_int16(uniform int a[], uniform int offset,
unsigned int val)
There are two things to note in these functions. First, note that these
There are three things to note in these functions. First, note that these
functions take ``unsigned int`` arrays as parameters; you need
to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side to
``unsigned int`` when passing them to ``ispc`` code. Second, although the
arrays are passed as ``unsigned int``, in the array indexing calculation,
with the ``offset`` parameter, they are treated as if they were ``int8`` or
``int16`` types. (i.e. the offset treated as being in terms of number of 8
or 16-bit elements.)
or 16-bit elements.) Third, note that programIndex is implicitly added
to offset.
The ``intbits()`` and ``floatbits()`` functions can be used to implement
low-level floating-point bit twiddling. For example, ``intbits()`` returns
@@ -2279,21 +2326,11 @@ elements to work with and then proceeds with the computation.
Communicating Between SPMD Program Instances
--------------------------------------------
The ``programIndex`` built-in variable (see `Mapping Data To Program
Instances`_) can be used to communicate between the set of executing
program instances. Consider the following code, which shows all of the
program instances writing into unique locations in an array.
::
float x = ...;
uniform float allX[programCount];
allX[programIndex] = x;
In this code, a program instance that reads ``allX[0]`` finds the value of
``x`` that was computed by the first of the running program instances, and
so forth. Program instances can communicate with their neighbor instances
with indexing like ``allX[(programIndex+1)%programCount]``.
The ``broadcast()``, ``rotate()``, and ``shuffle()`` standard library
routines provide a variety of mechanisms for the running program instances
to communicate values to each other during execution. See the section
`Cross-Program Instance Operations`_ for more information about their
operation.
Gather and Scatter

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 1.0
PROJECT_NUMBER = 1.0.2
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

View File

@@ -102,6 +102,8 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -115,6 +117,8 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -130,6 +134,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -147,6 +152,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>

View File

@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@@ -81,6 +81,8 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -94,6 +96,8 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -109,6 +113,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -126,6 +131,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -158,4 +164,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@@ -81,6 +81,8 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -94,6 +96,8 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -109,6 +113,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -126,6 +131,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>

View File

@@ -54,8 +54,6 @@ extern void binomial_put_serial(float Sa[], float Xa[], float Ta[],
float result[], int count);
int main() {
// Pointers passed to ispc code must have alignment of the target's
// vector width at minimum.
float *S = new float[N_OPTIONS];
float *X = new float[N_OPTIONS];
float *T = new float[N_OPTIONS];

View File

@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@@ -82,6 +82,8 @@
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -96,6 +98,8 @@
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -112,6 +116,7 @@
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -130,6 +135,7 @@
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -165,4 +171,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@@ -81,6 +81,8 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -94,6 +96,8 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -109,6 +113,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -126,6 +131,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -162,4 +168,4 @@ cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

121
expr.cpp
View File

@@ -1526,7 +1526,7 @@ AssignExpr::GetValue(FunctionEmitContext *ctx) const {
if (st != NULL) {
bool anyUniform = false;
for (int i = 0; i < st->NumElements(); ++i) {
if (st->GetMemberType(i)->IsUniformType())
if (st->GetElementType(i)->IsUniformType())
anyUniform = true;
}
@@ -2489,71 +2489,57 @@ ExprList::TypeCheck() {
llvm::Constant *
ExprList::GetConstant(const Type *type) const {
const StructType *structType = dynamic_cast<const StructType *>(type);
const SequentialType *sequentialType =
dynamic_cast<const SequentialType *>(type);
const CollectionType *collectionType =
dynamic_cast<const CollectionType *>(type);
if (collectionType == NULL)
return NULL;
if (structType != NULL) {
// We can potentially return an llvm::ConstantStruct if we have the
// same number of elements in the ExprList as the struct has
// members (and the various elements line up with the shape of the
// corresponding struct elements).
if ((int)exprs.size() != structType->NumElements()) {
Error(pos, "Initializer list for struct \"%s\" must have %d "
"elements (has %d).", structType->GetString().c_str(),
(int)exprs.size(), structType->NumElements());
std::string name;
if (dynamic_cast<const StructType *>(type) != NULL)
name = "struct";
else if (dynamic_cast<const ArrayType *>(type) != NULL)
name = "array";
else if (dynamic_cast<const VectorType *>(type) != NULL)
name = "vector";
else
FATAL("Unexpected CollectionType in ExprList::GetConstant()");
if ((int)exprs.size() != collectionType->GetElementCount()) {
Error(pos, "Initializer list for %s \"%s\" must have %d elements "
"(has %d).", name.c_str(), collectionType->GetString().c_str(),
collectionType->GetElementCount(), (int)exprs.size());
return NULL;
}
std::vector<llvm::Constant *> cv;
for (unsigned int i = 0; i < exprs.size(); ++i) {
if (exprs[i] == NULL)
return NULL;
}
std::vector<llvm::Constant *> cv;
for (unsigned int i = 0; i < exprs.size(); ++i) {
if (exprs[i] == NULL)
return NULL;
const Type *elementType = structType->GetMemberType(i);
llvm::Constant *c = exprs[i]->GetConstant(elementType);
if (c == NULL)
// If this list element couldn't convert to the right
// constant type for the corresponding struct member, then
// give up
return NULL;
cv.push_back(c);
}
const Type *elementType = collectionType->GetElementType(i);
llvm::Constant *c = exprs[i]->GetConstant(elementType);
if (c == NULL)
// If this list element couldn't convert to the right constant
// type for the corresponding collection member, then give up.
return NULL;
cv.push_back(c);
}
if (dynamic_cast<const StructType *>(type) != NULL) {
#if defined(LLVM_2_8) || defined(LLVM_2_9)
return llvm::ConstantStruct::get(*g->ctx, cv, false);
#else
const llvm::StructType *llvmStructType =
llvm::dyn_cast<const llvm::StructType>(structType->LLVMType(g->ctx));
llvm::dyn_cast<const llvm::StructType>(collectionType->LLVMType(g->ctx));
assert(llvmStructType != NULL);
return llvm::ConstantStruct::get(llvmStructType, cv);
#endif
}
else if (sequentialType) {
// Similarly, if we have an array or vector type, we may be able to
// return the corresponding llvm constant value.
if ((int)exprs.size() != sequentialType->GetElementCount()) {
bool isArray = (dynamic_cast<const ArrayType *>(type) != NULL);
Error(pos, "Initializer list for %s \"%s\" must have %d elements (has %d).",
isArray ? "array" : "vector", sequentialType->GetString().c_str(),
(int)exprs.size(), sequentialType->GetElementCount());
return NULL;
}
std::vector<llvm::Constant *> cv;
for (unsigned int i = 0; i < exprs.size(); ++i) {
if (exprs[i] == NULL)
return NULL;
const Type *elementType = sequentialType->GetElementType();
llvm::Constant *c = exprs[i]->GetConstant(elementType);
if (c == NULL)
return NULL;
cv.push_back(c);
}
else {
const llvm::Type *lt = type->LLVMType(g->ctx);
const llvm::ArrayType *lat = llvm::dyn_cast<const llvm::ArrayType>(lt);
// FIXME: should the assert below validly fail for uniform vectors
// now?
// now? Need a test case to reproduce it and then to be sure we
// have the right fix; leave the assert until we can hit it...
assert(lat != NULL);
return llvm::ConstantArray::get(lat, cv);
}
@@ -2832,7 +2818,7 @@ MemberExpr::GetType() const {
// Otherwise it's a struct, and the result type is the element
// type, possibly promoted to varying if the struct type / lvalue
// is varying.
const Type *elementType = structType->GetMemberType(identifier);
const Type *elementType = structType->GetElementType(identifier);
if (!elementType)
Error(identifierPos, "Element name \"%s\" not present in struct type \"%s\".%s",
identifier.c_str(), structType->GetString().c_str(),
@@ -2912,7 +2898,7 @@ MemberExpr::getElementNumber() const {
}
}
else {
elementNumber = structType->GetMemberNumber(identifier);
elementNumber = structType->GetElementNumber(identifier);
if (elementNumber == -1)
Error(identifierPos, "Element name \"%s\" not present in struct type \"%s\".%s",
identifier.c_str(), structType->GetString().c_str(),
@@ -3004,7 +2990,7 @@ MemberExpr::getCandidateNearMatches() const {
return "";
std::vector<std::string> elementNames;
for (int i = 0; i < structType->NumElements(); ++i)
for (int i = 0; i < structType->GetElementCount(); ++i)
elementNames.push_back(structType->GetElementName(i));
std::vector<std::string> alternates = MatchStrings(identifier, elementNames);
if (!alternates.size())
@@ -3900,25 +3886,14 @@ lUniformValueToVarying(FunctionEmitContext *ctx, llvm::Value *value,
const llvm::Type *llvmType = type->GetAsVaryingType()->LLVMType(g->ctx);
llvm::Value *retValue = llvm::UndefValue::get(llvmType);
// for structs, just recursively make their elements varying (if
// needed) and populate the return struct
const StructType *structType = dynamic_cast<const StructType *>(type);
if (structType != NULL) {
for (int i = 0; i < structType->NumElements(); ++i) {
llvm::Value *v = ctx->ExtractInst(value, i, "struct_element");
v = lUniformValueToVarying(ctx, v, structType->GetMemberType(i));
retValue = ctx->InsertInst(retValue, v, i, "set_struct_element");
}
return retValue;
}
// And similarly do the elements of arrays and vectors individually
const SequentialType *sequentialType =
dynamic_cast<const SequentialType *>(type);
if (sequentialType != NULL) {
for (int i = 0; i < sequentialType->GetElementCount(); ++i) {
// for structs/arrays/vectors, just recursively make their elements
// varying (if needed) and populate the return value.
const CollectionType *collectionType =
dynamic_cast<const CollectionType *>(type);
if (collectionType != NULL) {
for (int i = 0; i < collectionType->GetElementCount(); ++i) {
llvm::Value *v = ctx->ExtractInst(value, i, "get_element");
v = lUniformValueToVarying(ctx, v, sequentialType->GetElementType());
v = lUniformValueToVarying(ctx, v, collectionType->GetElementType(i));
retValue = ctx->InsertInst(retValue, v, i, "set_element");
}
return retValue;

91
lex.ll
View File

@@ -45,6 +45,7 @@ static void lCComment(SourcePos *);
static void lCppComment(SourcePos *);
static void lHandleCppHash(SourcePos *);
static void lStringConst(YYSTYPE *, SourcePos *);
static double lParseHexFloat(const char *ptr);
#define YY_USER_ACTION \
yylloc->first_line = yylloc->last_line; \
@@ -65,7 +66,8 @@ inline int isatty(int) { return 0; }
WHITESPACE [ \t\r]+
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))
FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?)|([-]?0x[01]\.?[0-9a-fA-F]+p[-+]?[0-9]+[fF]?)
FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?)
HEX_FLOAT_NUMBER (0x[01](\.[0-9a-fA-F]*)?p[-+]?[0-9]+[fF]?)
IDENT [a-zA-Z_][a-zA-Z_0-9]*
@@ -182,13 +184,15 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
}
{FLOAT_NUMBER} {
/* FIXME: need to implement a hex float constant parser so that we can
support them on Windows (which doesn't handle them in its atof()
implementation... */
yylval->floatVal = atof(yytext);
return TOKEN_FLOAT_CONSTANT;
}
{HEX_FLOAT_NUMBER} {
yylval->floatVal = lParseHexFloat(yytext);
return TOKEN_FLOAT_CONSTANT;
}
"++" { return TOKEN_INC_OP; }
"--" { return TOKEN_DEC_OP; }
"<<" { return TOKEN_LEFT_OP; }
@@ -424,3 +428,82 @@ lStringConst(YYSTYPE *yylval, SourcePos *pos)
}
yylval->stringVal = new std::string(str);
}
/** Compute the value 2^n, where the exponent is given as an integer.
There are more efficient ways to do this, for example by just slamming
the bits into the appropriate bits of the double, but let's just do the
obvious thing.
*/
static double
ipow2(int exponent) {
if (exponent < 0)
return 1. / ipow2(-exponent);
double ret = 1.;
while (exponent > 16) {
ret *= 65536.;
exponent -= 16;
}
while (exponent-- > 0)
ret *= 2.;
return ret;
}
/** Parse a hexadecimal-formatted floating-point number (C99 hex float
constant-style).
*/
static double
lParseHexFloat(const char *ptr) {
assert(ptr != NULL);
assert(ptr[0] == '0' && ptr[1] == 'x');
ptr += 2;
// Start initializing the mantissa
assert(*ptr == '0' || *ptr == '1');
double mantissa = (*ptr == '1') ? 1. : 0.;
++ptr;
if (*ptr == '.') {
// Is there a fraction part? If so, the i'th digit we encounter
// gives the 1/(16^i) component of the mantissa.
++ptr;
double scale = 1. / 16.;
// Keep going until we come to the 'p', which indicates that we've
// come to the exponent
while (*ptr != 'p') {
// Figure out the raw value from 0-15
int digit;
if (*ptr >= '0' && *ptr <= '9')
digit = *ptr - '0';
else if (*ptr >= 'a' && *ptr <= 'f')
digit = 10 + *ptr - 'a';
else {
assert(*ptr >= 'A' && *ptr <= 'F');
digit = 10 + *ptr - 'A';
}
// And add its contribution to the mantissa
mantissa += scale * digit;
scale /= 16.;
++ptr;
}
}
else
// If there's not a '.', then we better be going straight to the
// exponent
assert(*ptr == 'p');
++ptr; // skip the 'p'
// interestingly enough, the exponent is provided base 10..
int exponent = (int)strtol(ptr, (char **)NULL, 10);
// Does stdlib exp2() guarantee exact results for integer n where can
// be represented exactly as doubles? I would hope so but am not sure,
// so let's be sure.
return mantissa * ipow2(exponent);
}

View File

@@ -91,7 +91,7 @@ static void usage(int ret) {
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
printf(" [--target={sse2,sse4,sse4x2,avx}] Select target ISA (SSE4 is default)\n");
printf(" [--target={sse2,sse4,sse4x2,avx}] Select target ISA (SSE4 is default unless compiling for atom; then SSE2 is.)\n");
printf(" [--version]\t\t\t\tPrint ispc version\n");
printf(" [--woff]\t\t\t\tDisable warnings\n");
printf(" [--wno-perf]\t\t\tDon't issue warnings related to performance-related issues\n");
@@ -192,7 +192,7 @@ int main(int Argc, char *Argv[]) {
// as we're parsing below
g = new Globals;
bool debugSet = false, optSet = false;
bool debugSet = false, optSet = false, targetSet = false;
Module::OutputType ot = Module::Object;
for (int i = 1; i < argc; ++i) {
@@ -226,6 +226,7 @@ int main(int Argc, char *Argv[]) {
else if (!strcmp(argv[i], "--target")) {
if (++i == argc) usage(1);
lDoTarget(argv[i]);
targetSet = true;
}
else if (!strncmp(argv[i], "--target=", 9)) {
const char *target = argv[i] + 9;
@@ -315,6 +316,11 @@ int main(int Argc, char *Argv[]) {
if (debugSet && !optSet)
g->opt.level = 0;
// Make SSE2 the default target on atom unless the target has been set
// explicitly.
if (!targetSet && (g->target.cpu == "atom"))
lDoTarget("sse2");
m = new Module(file);
if (m->CompileFile() == 0) {
if (outFileName != NULL)

View File

@@ -248,8 +248,8 @@ lRecursiveCheckVarying(const Type *t) {
const StructType *st = dynamic_cast<const StructType *>(t);
if (st) {
for (int i = 0; i < st->NumElements(); ++i)
if (lRecursiveCheckVarying(st->GetMemberType(i)))
for (int i = 0; i < st->GetElementCount(); ++i)
if (lRecursiveCheckVarying(st->GetElementType(i)))
return true;
}
return false;
@@ -1041,8 +1041,8 @@ Module::writeObjectFileOrAssembly(OutputType outputType, const char *outFileName
static void
lRecursiveAddStructs(const StructType *structType,
std::vector<const StructType *> &structParamTypes) {
for (int i = 0; i < structType->NumElements(); ++i) {
const Type *elementBaseType = structType->GetMemberType(i)->GetBaseType();
for (int i = 0; i < structType->GetElementCount(); ++i) {
const Type *elementBaseType = structType->GetElementType(i)->GetBaseType();
const StructType *elementStructType =
dynamic_cast<const StructType *>(elementBaseType);
if (elementStructType != NULL) {
@@ -1112,9 +1112,9 @@ lEmitStructDecls(std::vector<const StructType *> &structTypes, FILE *file) {
StructDAGNode *node = new StructDAGNode;
structToNode[st] = node;
for (int j = 0; j < st->NumElements(); ++j) {
for (int j = 0; j < st->GetElementCount(); ++j) {
const StructType *elementStructType =
dynamic_cast<const StructType *>(st->GetMemberType(j));
dynamic_cast<const StructType *>(st->GetElementType(j));
// If this element is a struct type and we haven't already
// processed it for the current struct type, then upate th
// dependencies and record that this element type has other
@@ -1144,8 +1144,8 @@ lEmitStructDecls(std::vector<const StructType *> &structTypes, FILE *file) {
for (unsigned int i = 0; i < sortedTypes.size(); ++i) {
const StructType *st = sortedTypes[i];
fprintf(file, "struct %s {\n", st->GetStructName().c_str());
for (int j = 0; j < st->NumElements(); ++j) {
const Type *type = st->GetMemberType(j)->GetAsNonConstType();
for (int j = 0; j < st->GetElementCount(); ++j) {
const Type *type = st->GetElementType(j)->GetAsNonConstType();
std::string d = type->GetCDeclaration(st->GetElementName(j));
fprintf(file, " %s;\n", d.c_str());
}
@@ -1210,8 +1210,8 @@ lGetVectorsFromStructs(const std::vector<const StructType *> &structParamTypes,
std::vector<const VectorType *> *vectorParamTypes) {
for (unsigned int i = 0; i < structParamTypes.size(); ++i) {
const StructType *structType = structParamTypes[i];
for (int j = 0; j < structType->NumElements(); ++j) {
const Type *elementType = structType->GetMemberType(j);
for (int j = 0; j < structType->GetElementCount(); ++j) {
const Type *elementType = structType->GetElementType(j);
const ArrayType *at = dynamic_cast<const ArrayType *>(elementType);
if (at)

27
opt.cpp
View File

@@ -2116,11 +2116,12 @@ CreateLowerGatherScatterPass() {
// IsCompileTimeConstantPass
/** LLVM IR implementations of target-specific functions may include calls
to a function "bool __is_compile_time_constant_mask(mask type)"; this
allows them to have specialied code paths for where the mask is known
at compile time but not incurring the cost of a MOVMSK call at runtime
to compute its value in cases where the mask value isn't known until
runtime.
to the functions "bool __is_compile_time_constant_mask(mask type)" and
"bool __is_compile_time_constant_int32(i32)"; these allow them to have
specialied code paths for where the corresponding value is known at
compile time. For masks, for example, this allows them to not incur
the cost of a MOVMSK call at runtime to compute its value in cases
where the mask value isn't known until runtime.
This pass resolves these calls into either 'true' or 'false' values so
that later optimization passes can operate with these as constants.
@@ -2148,17 +2149,17 @@ llvm::RegisterPass<IsCompileTimeConstantPass>
bool
IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::Function *func = m->module->getFunction("__is_compile_time_constant_mask");
if (!func)
return false;
llvm::Function *maskFunc = m->module->getFunction("__is_compile_time_constant_mask");
llvm::Function *int32Func = m->module->getFunction("__is_compile_time_constant_int32");
bool modifiedAny = false;
restart:
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
// Iterate through the instructions looking for calls to
// __is_compile_time_constant_mask().
// Iterate through the instructions looking for calls to the
// __is_compile_time_constant_*() functions
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
if (!callInst || callInst->getCalledFunction() != func)
if (!callInst || (callInst->getCalledFunction() != maskFunc &&
callInst->getCalledFunction() != int32Func))
continue;
// This optimization pass can be disabled with the (poorly named)
@@ -2171,8 +2172,8 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
// Is it a constant? Bingo, turn the call's value into a constant
// true value.
llvm::Value *mask = callInst->getArgOperand(0);
if (llvm::isa<llvm::Constant>(mask)) {
llvm::Value *operand = callInst->getArgOperand(0);
if (llvm::isa<llvm::Constant>(operand)) {
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, LLVMTrue);
modifiedAny = true;
goto restart;

View File

@@ -564,9 +564,11 @@ struct_or_union_specifier
{
std::vector<const Type *> elementTypes;
std::vector<std::string> elementNames;
GetStructTypesAndNames(*$4, &elementTypes, &elementNames);
std::vector<SourcePos> elementPositions;
GetStructTypesNamesPositions(*$4, &elementTypes, &elementNames,
&elementPositions);
StructType *st = new StructType($2, elementTypes, elementNames,
false, true, @2);
elementPositions, false, true, @2);
m->symbolTable->AddType($2, st, @2);
$$ = st;
}
@@ -574,8 +576,11 @@ struct_or_union_specifier
{
std::vector<const Type *> elementTypes;
std::vector<std::string> elementNames;
GetStructTypesAndNames(*$3, &elementTypes, &elementNames);
$$ = new StructType("", elementTypes, elementNames, false, true, @1);
std::vector<SourcePos> elementPositions;
GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames,
&elementPositions);
$$ = new StructType("", elementTypes, elementNames, elementPositions,
false, true, @1);
}
| struct_or_union '{' '}'
{

View File

@@ -1,40 +1,86 @@
#!/bin/zsh
#!/bin/bash
surprises=0
verbose=false
number=$(ls -1 tests/*.ispc|wc -l)
counter=1
target=sse4
echo Running correctness tests
while getopts ":vth" opt;do
case $opt in
v) verbose=true
;;
t) target=$OPTARG
;;
h) cat <<EOF
usage: run_tests.sh [-v] [-t target] [filenames]
-v # verbose output
-t # specify compilation target (SSE4 is the default).
[filenames] # (optional) files to run through testing infrastructure
# if none are provided, all in tests/ will be run.
EOF
exit 1
esac
done
for i in tests/*.ispc; do
bc=${i%%ispc}bc
ispc -O2 $i -woff -o $bc --emit-llvm --target=sse4
if [[ $? != 0 ]]; then
surprises=1
echo Test $i FAILED ispc compile
echo
else
ispc_test $bc
shift $(( $OPTIND - 1 ))
if [[ "$1" > 0 ]]; then
while [[ "$1" > 0 ]]; do
i=$1
shift
echo Running test $i
bc=${i%%ispc}bc
ispc -O2 $i -woff -o $bc --emit-llvm --target=$target
if [[ $? != 0 ]]; then
surprises=1
echo Test $i FAILED ispc_test
echo Test $i FAILED ispc compile
echo
else
ispc_test $bc
if [[ $? != 0 ]]; then
surprises=1
echo Test $i FAILED ispc_test
echo
fi
fi
/bin/rm $bc
done
else
echo Running all correctness tests
for i in tests/*.ispc; do
if $verbose; then
echo -en "Running test $counter of $number.\r"
fi
(( counter++ ))
bc=${i%%ispc}bc
ispc -O2 $i -woff -o $bc --emit-llvm --target=$target
if [[ $? != 0 ]]; then
surprises=1
echo Test $i FAILED ispc compile
echo
else
ispc_test $bc
if [[ $? != 0 ]]; then
surprises=1
echo Test $i FAILED ispc_test
echo
fi
fi
/bin/rm $bc
done
echo Running failing tests
for i in failing_tests/*.ispc; do
(ispc -O2 $i -woff -o - --emit-llvm | ispc_test -) 2>/dev/null 1>/dev/null
if [[ $? == 0 ]]; then
surprises=1
echo Test $i UNEXPECTEDLY PASSED
echo
fi
# cmp $bc tests_bitcode${bc##tests}
# if [[ $? == 0 ]]; then
# /bin/rm $bc
# fi
fi
/bin/rm $bc
done
echo Running failing tests
for i in failing_tests/*.ispc; do
(ispc -O2 $i -woff -o - --emit-llvm | ispc_test -) 2>/dev/null 1>/dev/null
if [[ $? == 0 ]]; then
surprises=1
echo Test $i UNEXPECTEDLY PASSED
echo
fi
done
done
fi
if [[ $surprises == 0 ]]; then
echo No surprises.

View File

@@ -525,12 +525,53 @@ define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
}
define void @__masked_store_blend_64(<8 x i64>* nocapture, <8 x i64>,
<8 x i32>) nounwind alwaysinline {
; always just serialize it
; FIXME: should implement the "do two 32-bit masked stores" stuff that
; other targets do...
call void @__masked_store_64(<8 x i64>* nocapture %0, <8 x i64> %1, <8 x i32> %2)
define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
<8 x i32> %i32mask) nounwind alwaysinline {
%oldValue = load <8 x i64>* %ptr, align 8
%mask = bitcast <8 x i32> %i32mask to <8 x float>
; Do 4x64-bit blends by doing two <8 x i32> blends, where the <8 x i32> values
; are actually bitcast <4 x i64> values
;
; set up the first four 64-bit values
%old01 = shufflevector <8 x i64> %oldValue, <8 x i64> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
%old01f = bitcast <4 x i64> %old01 to <8 x float>
%new01 = shufflevector <8 x i64> %new, <8 x i64> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
%new01f = bitcast <4 x i64> %new01 to <8 x float>
; compute mask--note that the indices are all doubled-up
%mask01 = shufflevector <8 x float> %mask, <8 x float> undef,
<8 x i32> <i32 0, i32 0, i32 1, i32 1,
i32 2, i32 2, i32 3, i32 3>
; and blend them
%result01f = call <8 x float> @llvm.x86.avx.blendvps(<8 x float> %old01f,
<8 x float> %new01f,
<8 x float> %mask01)
%result01 = bitcast <8 x float> %result01f to <4 x i64>
; and again
%old23 = shufflevector <8 x i64> %oldValue, <8 x i64> undef,
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
%old23f = bitcast <4 x i64> %old23 to <8 x float>
%new23 = shufflevector <8 x i64> %new, <8 x i64> undef,
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
%new23f = bitcast <4 x i64> %new23 to <8 x float>
; compute mask--note that the values are doubled-up...
%mask23 = shufflevector <8 x float> %mask, <8 x float> undef,
<8 x i32> <i32 4, i32 4, i32 5, i32 5,
i32 6, i32 6, i32 7, i32 7>
; and blend them
%result23f = call <8 x float> @llvm.x86.avx.blendvps(<8 x float> %old23f,
<8 x float> %new23f,
<8 x float> %mask23)
%result23 = bitcast <8 x float> %result23f to <4 x i64>
; reconstruct the final <8 x i64> vector
%final = shufflevector <4 x i64> %result01, <4 x i64> %result23,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 4, i32 5, i32 6, i32 7>
store <8 x i64> %final, <8 x i64> * %ptr, align 8
ret void
}

View File

@@ -81,6 +81,54 @@ static inline uniform unsigned int64 intbits(uniform double d) {
return __intbits_uniform_double(d);
}
static inline float broadcast(float v, uniform int i) {
return __broadcast_float(v, i);
}
static inline int32 broadcast(int32 v, uniform int i) {
return __broadcast_int32(v, i);
}
static inline double broadcast(double v, uniform int i) {
return __broadcast_double(v, i);
}
static inline int64 broadcast(int64 v, uniform int i) {
return __broadcast_int64(v, i);
}
static inline float rotate(float v, uniform int i) {
return __rotate_float(v, i);
}
static inline int32 rotate(int32 v, uniform int i) {
return __rotate_int32(v, i);
}
static inline double rotate(double v, uniform int i) {
return __rotate_double(v, i);
}
static inline int64 rotate(int64 v, uniform int i) {
return __rotate_int64(v, i);
}
static inline float shuffle(float v, int i) {
return __shuffle_float(v, i);
}
static inline int32 shuffle(int32 v, int i) {
return __shuffle_int32(v, i);
}
static inline double shuffle(double v, int i) {
return __shuffle_double(v, i);
}
static inline int64 shuffle(int64 v, int i) {
return __shuffle_int64(v, i);
}
// x[i]
static inline uniform float extract(float x, uniform int i) {
return __extract(x, i);

View File

@@ -34,6 +34,8 @@
;; builtins for various targets can use macros from this file to simplify
;; generating code for their implementations of those builtins.
declare i1 @__is_compile_time_constant_int32(i32)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -284,6 +286,22 @@ ret <8 x float> %ret
'
)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; forloop macro
divert(`-1')
# forloop(var, from, to, stmt) - improved version:
# works even if VAR is not a strict macro name
# performs sanity check that FROM is larger than TO
# allows complex numerical expressions in TO and FROM
define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
`pushdef(`$1', eval(`$2'))_$0(`$1',
eval(`$3'), `$4')popdef(`$1')')')
define(`_forloop',
`$3`'ifelse(indir(`$1'), `$2', `',
`define(`$1', incr(indir(`$1')))$0($@)')')
divert`'dnl
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; stdlib_core
;;
@@ -291,8 +309,67 @@ ret <8 x float> %ret
;; target's vector width, which it takes as its first parameter.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define(`shuffles', `
define internal <$1 x $2> @__broadcast_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
%v = extractelement <$1 x $2> %0, i32 %1
%r_0 = insertelement <$1 x $2> undef, $2 %v, i32 0
forloop(i, 1, eval($1-1), ` %r_`'i = insertelement <$1 x $2> %r_`'eval(i-1), $2 %v, i32 i
')
ret <$1 x $2> %r_`'eval($1-1)
}
define internal <$1 x $2> @__rotate_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
%isc = call i1 @__is_compile_time_constant_int32(i32 %1)
br i1 %isc, label %is_const, label %not_const
is_const:
; though verbose, this turms into tight code if %1 is a constant
forloop(i, 0, eval($1-1), `
%delta_`'i = add i32 %1, i
%delta_clamped_`'i = and i32 %delta_`'i, eval($1-1)
%v_`'i = extractelement <$1 x $2> %0, i32 %delta_clamped_`'i')
%ret_0 = insertelement <$1 x $2> undef, $2 %v_0, i32 0
forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1), $2 %v_`'i, i32 i
')
ret <$1 x $2> %ret_`'eval($1-1)
not_const:
; store two instances of the vector into memory
%ptr = alloca <$1 x $2>, i32 2
%ptr0 = getelementptr <$1 x $2> * %ptr, i32 0
store <$1 x $2> %0, <$1 x $2> * %ptr0
%ptr1 = getelementptr <$1 x $2> * %ptr, i32 1
store <$1 x $2> %0, <$1 x $2> * %ptr1
; compute offset in [0,vectorwidth-1], then index into the doubled-up vector
%offset = and i32 %1, eval($1-1)
%ptr_as_elt_array = bitcast <$1 x $2> * %ptr to [eval(2*$1) x $2] *
%load_ptr = getelementptr [eval(2*$1) x $2] * %ptr_as_elt_array, i32 0, i32 %offset
%load_ptr_vec = bitcast $2 * %load_ptr to <$1 x $2> *
%result = load <$1 x $2> * %load_ptr_vec, align $4
ret <$1 x $2> %result
}
define internal <$1 x $2> @__shuffle_$3(<$1 x $2>, <$1 x i32>) nounwind readnone alwaysinline {
forloop(i, 0, eval($1-1), `
%index_`'i = extractelement <$1 x i32> %1, i32 i')
forloop(i, 0, eval($1-1), `
%v_`'i = extractelement <$1 x $2> %0, i32 %index_`'i')
%ret_0 = insertelement <$1 x $2> undef, $2 %v_0, i32 0
forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1), $2 %v_`'i, i32 i
')
ret <$1 x $2> %ret_`'eval($1-1)
}
')
define(`stdlib_core', `
declare i1 @__is_compile_time_constant_mask(<$1 x i32> %mask)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; vector ops
@@ -307,6 +384,10 @@ define internal <$1 x float> @__insert(<$1 x float>, i32,
ret <$1 x float> %insert
}
shuffles($1, float, float, 4)
shuffles($1, i32, int32, 4)
shuffles($1, double, double, 8)
shuffles($1, i64, int64, 8)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; various bitcasts from one type to another
@@ -524,7 +605,6 @@ define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32
;; FIXME: use the per_lane macro, defined below, to implement these!
define(`packed_load_and_store', `
declare i1 @__is_compile_time_constant_mask(<$1 x i32> %mask)
define i32 @__packed_load_active([0 x i32] *, i32 %start_offset, <$1 x i32> * %val_ptr,
<$1 x i32> %full_mask) nounwind alwaysinline {
@@ -661,19 +741,6 @@ done:
;; Inside this code, any instances of the text "LANE" are replaced
;; with an i32 value that represents the current lane number
divert(`-1')
# forloop(var, from, to, stmt) - improved version:
# works even if VAR is not a strict macro name
# performs sanity check that FROM is larger than TO
# allows complex numerical expressions in TO and FROM
define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
`pushdef(`$1', eval(`$2'))_$0(`$1',
eval(`$3'), `$4')popdef(`$1')')')
define(`_forloop',
`$3`'ifelse(indir(`$1'), `$2', `',
`define(`$1', incr(indir(`$1')))$0($@)')')
divert`'dnl
; num lanes, mask, code block to do per lane
define(`per_lane', `
br label %pl_entry

103
stmt.cpp
View File

@@ -178,88 +178,59 @@ lInitSymbol(llvm::Value *lvalue, const char *symName, const Type *type,
return;
}
// There are two cases for initializing arrays and vectors; either a single
// initializer may be provided (float foo[3] = 0;), in which case all
// of the array elements are initialized to the given value, or an
// initializer list may be provided (float foo[3] = { 1,2,3 }), in
// which case the array elements are initialized with the corresponding
// There are two cases for initializing structs, arrays and vectors;
// either a single initializer may be provided (float foo[3] = 0;), in
// which case all of the elements are initialized to the given value,
// or an initializer list may be provided (float foo[3] = { 1,2,3 }),
// in which case the elements are initialized with the corresponding
// values.
const SequentialType *seqType = dynamic_cast<const SequentialType *>(type);
if (seqType != NULL) {
ExprList *exprList = dynamic_cast<ExprList *>(initExpr);
if (exprList == NULL) {
// We have single expression; loop over the elements of the
// array/vector and initialize each of them with it
// individually.
for (int i = 0; i < seqType->GetElementCount(); ++i) {
llvm::Value *ptr = ctx->GetElementPtrInst(lvalue, 0, i, "offset");
lInitSymbol(ptr, symName, seqType->GetElementType(), initExpr,
ctx, pos);
}
}
else {
// Otherwise make sure that we have the same number of elements
// in the { } initializer expression as we have in the
// array/vector
int nInits = exprList->exprs.size();
if (nInits != seqType->GetElementCount()) {
const char *actualType = dynamic_cast<const ArrayType *>(type) ?
"Array" : "Vector";
Error(initExpr->pos, "%s initializer for variable \"%s\" requires "
"%d values; %d provided.", actualType, symName,
seqType->GetElementCount(), nInits);
}
else {
// And initialize each of the array/vector elements with
// the corresponding expression from the ExprList
for (int i = 0; i < nInits; ++i) {
llvm::Value *ptr = ctx->GetElementPtrInst(lvalue, 0, i, "offset");
lInitSymbol(ptr, symName, seqType->GetElementType(),
exprList->exprs[i], ctx, pos);
}
}
}
return;
}
const CollectionType *collectionType =
dynamic_cast<const CollectionType *>(type);
if (collectionType != NULL) {
std::string name;
if (dynamic_cast<const StructType *>(type) != NULL)
name = "struct";
else if (dynamic_cast<const ArrayType *>(type) != NULL)
name = "array";
else if (dynamic_cast<const VectorType *>(type) != NULL)
name = "vector";
else
FATAL("Unexpected CollectionType in lInitSymbol()");
// Structs can similarly be initialized in one of two ways; either with
// a list of expressions in braces, one expression per struct member,
// or with a single expression that is used to initialize all struct
// members.
const StructType *st = dynamic_cast<const StructType *>(type);
if (st) {
ExprList *exprList = dynamic_cast<ExprList *>(initExpr);
if (exprList != NULL) {
// The { ... } case; make sure we have the same number of
// expressions in the ExprList as we have struct members
int nInits = exprList->exprs.size();
if (nInits != st->NumElements())
Error(initExpr->pos,
"Initializer for struct \"%s\" requires %d values; %d provided.",
symName, st->NumElements(), nInits);
else {
// Initialize each struct member with the corresponding
// value from the ExprList
for (int i = 0; i < nInits; ++i) {
llvm::Value *ep = ctx->GetElementPtrInst(lvalue, 0, i, "structelement");
lInitSymbol(ep, symName, st->GetMemberType(i), exprList->exprs[i],
ctx, pos);
}
if (nInits != collectionType->GetElementCount()) {
Error(initExpr->pos, "Initializer for %s \"%s\" requires "
"%d values; %d provided.", name.c_str(), symName,
collectionType->GetElementCount(), nInits);
return;
}
// Initialize each element with the corresponding value from
// the ExprList
for (int i = 0; i < nInits; ++i) {
llvm::Value *ep = ctx->GetElementPtrInst(lvalue, 0, i, "element");
lInitSymbol(ep, symName, collectionType->GetElementType(i),
exprList->exprs[i], ctx, pos);
}
}
else if (initExpr->GetType()->IsNumericType() ||
initExpr->GetType()->IsBoolType()) {
// Otherwise initialize all of the struct elements in turn with
// the initExpr.
for (int i = 0; i < st->NumElements(); ++i) {
llvm::Value *ep = ctx->GetElementPtrInst(lvalue, 0, i, "structelement");
lInitSymbol(ep, symName, st->GetMemberType(i), initExpr, ctx, pos);
// Otherwise initialize all of the elements in turn with the
// initExpr.
for (int i = 0; i < collectionType->GetElementCount(); ++i) {
llvm::Value *ep = ctx->GetElementPtrInst(lvalue, 0, i, "element");
lInitSymbol(ep, symName, collectionType->GetElementType(i),
initExpr, ctx, pos);
}
}
else {
Error(initExpr->pos, "Can't assign type \"%s\" to \"%s\".",
initExpr->GetType()->GetString().c_str(),
st->GetString().c_str());
collectionType->GetString().c_str());
}
return;
}

12
tests/broadcast-1.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex];
int br = broadcast(a, (uniform int)b-2);
RET[programIndex] = br;
}
export void result(uniform float RET[]) {
RET[programIndex] = 4;
}

12
tests/broadcast.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = broadcast(a, 2);
RET[programIndex] = b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 3;
}

12
tests/rotate-1.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex];
int rot = rotate(a, -1);
RET[programIndex] = rot;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
}

13
tests/rotate-2.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex];
uniform int delta = b - 6; // -1
int rot = rotate(a, delta);
RET[programIndex] = rot;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
}

13
tests/rotate-3.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int64 a = aFOO[programIndex];
uniform int delta = b - 6; // -1
int64 rot = rotate(a, delta);
RET[programIndex] = rot;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
}

12
tests/rotate-4.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int64 a = aFOO[programIndex];
int64 rot = rotate(a, -1);
RET[programIndex] = rot;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
}

12
tests/rotate.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex];
int rot = rotate(a, 2);
RET[programIndex] = rot;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1 + (programIndex + 2) % programCount;
}

13
tests/shuffle-1.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
int reverse = programCount - 1 - programIndex;
float shuf = shuffle(a, reverse);
RET[programIndex] = shuf;
}
export void result(uniform float RET[]) {
RET[programIndex] = programCount - programIndex;
}

13
tests/shuffle-2.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
int reverse = programCount - 1 - programIndex + (int)b - 5;
float shuf = shuffle(a, reverse);
RET[programIndex] = shuf;
}
export void result(uniform float RET[]) {
RET[programIndex] = programCount - programIndex;
}

12
tests/shuffle.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int32 a = aFOO[programIndex];
int32 shuf = shuffle(a, 1);
RET[programIndex] = shuf;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2;
}

View File

@@ -410,6 +410,14 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const {
}
///////////////////////////////////////////////////////////////////////////
// SequentialType
const Type *SequentialType::GetElementType(int index) const {
return GetElementType();
}
///////////////////////////////////////////////////////////////////////////
// ArrayType
@@ -961,9 +969,10 @@ VectorType::getVectorMemoryCount() const {
StructType::StructType(const std::string &n, const std::vector<const Type *> &elts,
const std::vector<std::string> &en,
const std::vector<SourcePos> &ep,
bool ic, bool iu, SourcePos p)
: name(n), elementTypes(elts), elementNames(en), isUniform(iu), isConst(ic),
pos(p) {
: name(n), elementTypes(elts), elementNames(en), elementPositions(ep),
isUniform(iu), isConst(ic), pos(p) {
}
@@ -1014,8 +1023,8 @@ StructType::GetAsVaryingType() const {
if (IsVaryingType())
return this;
else
return new StructType(name, elementTypes, elementNames, isConst,
false, pos);
return new StructType(name, elementTypes, elementNames, elementPositions,
isConst, false, pos);
}
@@ -1024,8 +1033,8 @@ StructType::GetAsUniformType() const {
if (IsUniformType())
return this;
else
return new StructType(name, elementTypes, elementNames, isConst,
true, pos);
return new StructType(name, elementTypes, elementNames, elementPositions,
isConst, true, pos);
}
@@ -1034,11 +1043,12 @@ StructType::GetSOAType(int width) const {
std::vector<const Type *> et;
// The SOA version of a structure is just a structure that holds SOAed
// versions of its elements
for (int i = 0; i < NumElements(); ++i) {
const Type *t = GetMemberType(i);
for (int i = 0; i < GetElementCount(); ++i) {
const Type *t = GetElementType(i);
et.push_back(t->GetSOAType(width));
}
return new StructType(name, et, elementNames, isConst, isUniform, pos);
return new StructType(name, et, elementNames, elementPositions,
isConst, isUniform, pos);
}
@@ -1047,8 +1057,8 @@ StructType::GetAsConstType() const {
if (IsConstType())
return this;
else
return new StructType(name, elementTypes, elementNames, true,
isUniform, pos);
return new StructType(name, elementTypes, elementNames,
elementPositions, true, isUniform, pos);
}
@@ -1057,8 +1067,8 @@ StructType::GetAsNonConstType() const {
if (!IsConstType())
return this;
else
return new StructType(name, elementTypes, elementNames, false,
isUniform, pos);
return new StructType(name, elementTypes, elementNames, elementPositions,
false, isUniform, pos);
}
@@ -1123,8 +1133,8 @@ StructType::GetCDeclaration(const std::string &n) const {
const llvm::Type *
StructType::LLVMType(llvm::LLVMContext *ctx) const {
std::vector<const llvm::Type *> llvmTypes;
for (int i = 0; i < NumElements(); ++i) {
const Type *type = GetMemberType(i);
for (int i = 0; i < GetElementCount(); ++i) {
const Type *type = GetElementType(i);
llvmTypes.push_back(type->LLVMType(ctx));
}
return llvm::StructType::get(*ctx, llvmTypes);
@@ -1138,14 +1148,13 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
return llvm::DIType();
#else
uint64_t currentSize = 0, align = 0;
llvm::DIFile diFile = pos.GetDIFile();
std::vector<llvm::Value *> elementLLVMTypes;
// Walk through the elements of the struct; for each one figure out its
// alignment and size, using that to figure out its offset w.r.t. the
// start of the structure.
for (unsigned int i = 0; i < elementTypes.size(); ++i) {
llvm::DIType eltType = GetMemberType(i)->GetDIType(scope);
llvm::DIType eltType = GetElementType(i)->GetDIType(scope);
uint64_t eltAlign = eltType.getAlignInBits();
uint64_t eltSize = eltType.getSizeInBits();
@@ -1159,12 +1168,19 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
currentSize += eltAlign - (currentSize % eltAlign);
assert((currentSize == 0) || (currentSize % eltAlign) == 0);
// FIXME: we should pass this actual file/line number for the
// member, not the position of the struct declaration
llvm::DIFile diFile = elementPositions[i].GetDIFile();
int line = elementPositions[i].first_line;
#ifdef LLVM_2_9
llvm::DIType fieldType =
m->diBuilder->createMemberType(elementNames[i], diFile, pos.first_line,
m->diBuilder->createMemberType(elementNames[i], diFile, line,
eltSize, eltAlign, currentSize, 0,
eltType);
#else
llvm::DIType fieldType =
m->diBuilder->createMemberType(scope, elementNames[i], diFile,
line, eltSize, eltAlign,
currentSize, 0, eltType);
#endif // LLVM_2_9
elementLLVMTypes.push_back(fieldType);
currentSize += eltSize;
@@ -1181,6 +1197,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
#else
llvm::DIArray elements = m->diBuilder->getOrCreateArray(elementLLVMTypes);
#endif
llvm::DIFile diFile = pos.GetDIFile();
return m->diBuilder->createStructType(scope, name, diFile, pos.first_line, currentSize,
align, 0, elements);
#endif // LLVM_2_8
@@ -1188,7 +1205,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
const Type *
StructType::GetMemberType(int i) const {
StructType::GetElementType(int i) const {
assert(i < (int)elementTypes.size());
// If the struct is uniform qualified, then each member comes out with
// the same type as in the original source file. If it's varying, then
@@ -1200,7 +1217,7 @@ StructType::GetMemberType(int i) const {
const Type *
StructType::GetMemberType(const std::string &n) const {
StructType::GetElementType(const std::string &n) const {
for (unsigned int i = 0; i < elementNames.size(); ++i)
if (elementNames[i] == n) {
const Type *ret = isUniform ? elementTypes[i] :
@@ -1212,7 +1229,7 @@ StructType::GetMemberType(const std::string &n) const {
int
StructType::GetMemberNumber(const std::string &n) const {
StructType::GetElementNumber(const std::string &n) const {
for (unsigned int i = 0; i < elementNames.size(); ++i)
if (elementNames[i] == n)
return i;
@@ -1766,10 +1783,10 @@ Type::Equal(const Type *a, const Type *b) {
const StructType *sta = dynamic_cast<const StructType *>(a);
const StructType *stb = dynamic_cast<const StructType *>(b);
if (sta && stb) {
if (sta->NumElements() != stb->NumElements())
if (sta->GetElementCount() != stb->GetElementCount())
return false;
for (int i = 0; i < sta->NumElements(); ++i)
if (!Equal(sta->GetMemberType(i), stb->GetMemberType(i)))
for (int i = 0; i < sta->GetElementCount(); ++i)
if (!Equal(sta->GetElementType(i), stb->GetElementType(i)))
return false;
return true;
}

51
type.h
View File

@@ -243,19 +243,42 @@ private:
};
/** @brief Abstract base class for tpyes that represent sequences
/** @brief Abstract base class for types that represent collections of
other types.
This is a common base class that StructTypes, ArrayTypes, and
VectorTypes all inherit from.
*/
class CollectionType : public Type {
public:
/** Returns the total number of elements in the collection. */
virtual int GetElementCount() const = 0;
/** Returns the type of the element given by index. (The value of
index must be between 0 and GetElementCount()-1.
*/
virtual const Type *GetElementType(int index) const = 0;
};
/** @brief Abstract base class for types that represent sequences
SequentialType is an abstract base class that adds interface routines
for types that represent linear sequences of other types (i.e., arrays
and vectors).
*/
class SequentialType : public Type {
class SequentialType : public CollectionType {
public:
/** Returns the total number of elements in the sequence. */
virtual int GetElementCount() const = 0;
/** Returns the Type of the elements that the sequence stores. */
/** Returns the Type of the elements that the sequence stores; for
SequentialTypes, all elements have the same type . */
virtual const Type *GetElementType() const = 0;
/** SequentialType provides an implementation of this CollectionType
method, just passing the query on to the GetElementType(void)
implementation, since all of the elements of a SequentialType have
the same type.
*/
const Type *GetElementType(int index) const;
};
@@ -439,10 +462,11 @@ private:
/** @brief Representation of a structure holding a number of members.
*/
class StructType : public Type {
class StructType : public CollectionType {
public:
StructType(const std::string &name, const std::vector<const Type *> &elts,
const std::vector<std::string> &eltNames, bool isConst,
const std::vector<std::string> &eltNames,
const std::vector<SourcePos> &eltPositions, bool isConst,
bool isUniform, SourcePos pos);
bool IsUniformType() const;
@@ -468,21 +492,21 @@ public:
/** Returns the type of the structure element with the given name (if any).
Returns NULL if there is no such named element. */
const Type *GetMemberType(const std::string &name) const;
const Type *GetElementType(const std::string &name) const;
/** Returns the type of the i'th structure element. The value of \c i must
be between 0 and NumElements()-1. */
const Type *GetMemberType(int i) const;
const Type *GetElementType(int i) const;
/** Returns which structure element number (starting from zero) that
has the given name. If there is no such element, return -1. */
int GetMemberNumber(const std::string &name) const;
int GetElementNumber(const std::string &name) const;
/** Returns the name of the i'th element of the structure. */
const std::string GetElementName(int i) const { return elementNames[i]; }
/** Returns the total number of elements in the structure. */
int NumElements() const { return int(elementTypes.size()); }
int GetElementCount() const { return int(elementTypes.size()); }
/** Returns the name of the structure type. (e.g. struct Foo -> "Foo".) */
const std::string &GetStructName() const { return name; }
@@ -501,6 +525,9 @@ private:
*/
const std::vector<const Type *> elementTypes;
const std::vector<std::string> elementNames;
/** Source file position at which each structure element declaration
appeared. */
const std::vector<SourcePos> elementPositions;
const bool isUniform;
const bool isConst;
const SourcePos pos;