Compare commits
31 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
32764e7639 | ||
|
|
bcae21dbca | ||
|
|
eb22fa6173 | ||
|
|
5f7e61f9b5 | ||
|
|
28a68e3c1f | ||
|
|
6b153566f3 | ||
|
|
214fb3197a | ||
|
|
b4068efcfb | ||
|
|
24216d841f | ||
|
|
be45beb54b | ||
|
|
cb58c78c1a | ||
|
|
86de910ecd | ||
|
|
ce7978ae74 | ||
|
|
7aec7486f8 | ||
|
|
b6d6ee6fc2 | ||
|
|
cb74346d36 | ||
|
|
2709c354d7 | ||
|
|
36063bae79 | ||
|
|
e6d6a82484 | ||
|
|
f830e21cfa | ||
|
|
ae2c24c3c1 | ||
|
|
6dfd74c74c | ||
|
|
7055888cb7 | ||
|
|
7854a71ea9 | ||
|
|
b7519d1268 | ||
|
|
f2758f0831 | ||
|
|
ff76c2334e | ||
|
|
9b6bf5dabc | ||
|
|
ab33afaea4 | ||
|
|
fab5794faf | ||
|
|
3c3cd88692 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -4,3 +4,4 @@ depend
|
||||
ispc
|
||||
ispc_test
|
||||
objs
|
||||
docs/doxygen
|
||||
|
||||
4
Makefile
4
Makefile
@@ -94,9 +94,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
$(STDLIB_SRC): stdlib.m4
|
||||
|
||||
objs/stdlib-%.cpp: stdlib-%.ll
|
||||
objs/stdlib-%.cpp: stdlib-%.ll stdlib.m4 stdlib-sse.ll
|
||||
@echo Creating C++ source from stdlib file $<
|
||||
@m4 stdlib.m4 $< | ./bitcode2cpp.py $< > $@
|
||||
|
||||
|
||||
103
ctx.cpp
103
ctx.cpp
@@ -1109,7 +1109,7 @@ FunctionEmitContext::BitCastInst(llvm::Value *value, const llvm::Type *type,
|
||||
}
|
||||
|
||||
|
||||
llvm::Instruction *
|
||||
llvm::Value *
|
||||
FunctionEmitContext::PtrToIntInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name) {
|
||||
if (value == NULL) {
|
||||
@@ -1117,16 +1117,31 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const llvm::Type *type,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// TODO: we should probably handle the array case as in
|
||||
// e.g. BitCastInst(), but we don't currently need that functionality
|
||||
llvm::Instruction *inst =
|
||||
new llvm::PtrToIntInst(value, type, name ? name : "ptr2int", bblock);
|
||||
AddDebugPos(inst);
|
||||
return inst;
|
||||
const llvm::Type *valType = value->getType();
|
||||
const llvm::ArrayType *at = llvm::dyn_cast<const llvm::ArrayType>(valType);
|
||||
if (at && llvm::isa<const llvm::PointerType>(at->getElementType())) {
|
||||
// varying lvalue -> apply ptr to int to the individual pointers
|
||||
assert((int)at->getNumElements() == g->target.vectorWidth);
|
||||
|
||||
llvm::Value *ret =
|
||||
llvm::UndefValue::get(llvm::ArrayType::get(type, g->target.vectorWidth));
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
||||
llvm::Value *elt = ExtractInst(value, i);
|
||||
llvm::Value *p2i = PtrToIntInst(elt, type, name);
|
||||
ret = InsertInst(ret, p2i, i);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
else {
|
||||
llvm::Instruction *inst =
|
||||
new llvm::PtrToIntInst(value, type, name ? name : "ptr2int", bblock);
|
||||
AddDebugPos(inst);
|
||||
return inst;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
llvm::Instruction *
|
||||
llvm::Value *
|
||||
FunctionEmitContext::IntToPtrInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name) {
|
||||
if (value == NULL) {
|
||||
@@ -1134,12 +1149,27 @@ FunctionEmitContext::IntToPtrInst(llvm::Value *value, const llvm::Type *type,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// TODO: we should probably handle the array case as in
|
||||
// e.g. BitCastInst(), but we don't currently need that functionality
|
||||
llvm::Instruction *inst =
|
||||
new llvm::IntToPtrInst(value, type, name ? name : "int2ptr", bblock);
|
||||
AddDebugPos(inst);
|
||||
return inst;
|
||||
const llvm::Type *valType = value->getType();
|
||||
const llvm::ArrayType *at = llvm::dyn_cast<const llvm::ArrayType>(valType);
|
||||
if (at && llvm::isa<const llvm::PointerType>(at->getElementType())) {
|
||||
// varying lvalue -> apply int to ptr to the individual pointers
|
||||
assert((int)at->getNumElements() == g->target.vectorWidth);
|
||||
|
||||
llvm::Value *ret =
|
||||
llvm::UndefValue::get(llvm::ArrayType::get(type, g->target.vectorWidth));
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
||||
llvm::Value *elt = ExtractInst(value, i);
|
||||
llvm::Value *i2p = IntToPtrInst(elt, type, name);
|
||||
ret = InsertInst(ret, i2p, i);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
else {
|
||||
llvm::Instruction *inst =
|
||||
new llvm::IntToPtrInst(value, type, name ? name : "int2ptr", bblock);
|
||||
AddDebugPos(inst);
|
||||
return inst;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1359,10 +1389,10 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
|
||||
// If we're gathering structures, do an element-wise gather
|
||||
// recursively.
|
||||
llvm::Value *retValue = llvm::UndefValue::get(retType);
|
||||
for (int i = 0; i < st->NumElements(); ++i) {
|
||||
for (int i = 0; i < st->GetElementCount(); ++i) {
|
||||
llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i);
|
||||
// This in turn will be another gather
|
||||
llvm::Value *eltValues = LoadInst(eltPtrs, st->GetMemberType(i),
|
||||
llvm::Value *eltValues = LoadInst(eltPtrs, st->GetElementType(i),
|
||||
name);
|
||||
retValue = InsertInst(retValue, eltValues, i, "set_value");
|
||||
}
|
||||
@@ -1482,6 +1512,16 @@ FunctionEmitContext::AllocaInst(const llvm::Type *llvmType, const char *name,
|
||||
// current basic block
|
||||
inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock);
|
||||
|
||||
// If no alignment was specified but we have an array of a uniform
|
||||
// type, then align it to 4 * the native vector width; it's not
|
||||
// unlikely that this array will be loaded into varying variables with
|
||||
// what will be aligned accesses if the uniform -> varying load is done
|
||||
// in regular chunks.
|
||||
const llvm::ArrayType *arrayType = llvm::dyn_cast<const llvm::ArrayType>(llvmType);
|
||||
if (align == 0 && arrayType != NULL &&
|
||||
!llvm::isa<const llvm::VectorType>(arrayType->getElementType()))
|
||||
align = 4 * g->target.nativeVectorWidth;
|
||||
|
||||
if (align != 0)
|
||||
inst->setAlignment(align);
|
||||
// Don't add debugging info to alloca instructions
|
||||
@@ -1506,29 +1546,18 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
|
||||
assert(llvm::isa<const llvm::PointerType>(lvalue->getType()));
|
||||
|
||||
const StructType *structType = dynamic_cast<const StructType *>(rvalueType);
|
||||
if (structType != NULL) {
|
||||
// Assigning a structure
|
||||
for (int i = 0; i < structType->NumElements(); ++i) {
|
||||
const CollectionType *collectionType =
|
||||
dynamic_cast<const CollectionType *>(rvalueType);
|
||||
if (collectionType != NULL) {
|
||||
// Assigning a structure / array / vector. Handle each element
|
||||
// individually with what turns into a recursive call to
|
||||
// makedStore()
|
||||
for (int i = 0; i < collectionType->GetElementCount(); ++i) {
|
||||
llvm::Value *eltValue = ExtractInst(rvalue, i, "rvalue_member");
|
||||
llvm::Value *eltLValue = GetElementPtrInst(lvalue, 0, i,
|
||||
"struct_lvalue_ptr");
|
||||
StoreInst(eltValue, eltLValue, storeMask,
|
||||
structType->GetMemberType(i));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const SequentialType *sequentialType =
|
||||
dynamic_cast<const SequentialType *>(rvalueType);
|
||||
if (sequentialType != NULL) {
|
||||
// Assigning arrays and vectors. Handle each element individually
|
||||
// with what turns into a recursive call to makedStore()
|
||||
for (int i = 0; i < sequentialType->GetElementCount(); ++i) {
|
||||
llvm::Value *eltLValue = GetElementPtrInst(lvalue, 0, i, "lval_i_ptr");
|
||||
llvm::Value *eltValue = ExtractInst(rvalue, i, "array_i_val");
|
||||
StoreInst(eltValue, eltLValue, storeMask,
|
||||
sequentialType->GetElementType());
|
||||
collectionType->GetElementType(i));
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -1588,10 +1617,10 @@ FunctionEmitContext::scatter(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
const StructType *structType = dynamic_cast<const StructType *>(rvalueType);
|
||||
if (structType) {
|
||||
// Scatter the struct elements individually
|
||||
for (int i = 0; i < structType->NumElements(); ++i) {
|
||||
for (int i = 0; i < structType->GetElementCount(); ++i) {
|
||||
llvm::Value *lv = GetElementPtrInst(lvalue, 0, i);
|
||||
llvm::Value *rv = ExtractInst(rvalue, i);
|
||||
scatter(rv, lv, storeMask, structType->GetMemberType(i));
|
||||
scatter(rv, lv, storeMask, structType->GetElementType(i));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
8
ctx.h
8
ctx.h
@@ -305,10 +305,10 @@ public:
|
||||
|
||||
llvm::Value *BitCastInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *PtrToIntInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *IntToPtrInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Value *PtrToIntInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Value *IntToPtrInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *TruncInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
|
||||
|
||||
8
decl.cpp
8
decl.cpp
@@ -318,9 +318,10 @@ Declaration::Print() const {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void
|
||||
GetStructTypesAndNames(const std::vector<StructDeclaration *> &sd,
|
||||
std::vector<const Type *> *elementTypes,
|
||||
std::vector<std::string> *elementNames) {
|
||||
GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
|
||||
std::vector<const Type *> *elementTypes,
|
||||
std::vector<std::string> *elementNames,
|
||||
std::vector<SourcePos> *elementPositions) {
|
||||
for (unsigned int i = 0; i < sd.size(); ++i) {
|
||||
const Type *type = sd[i]->type;
|
||||
// FIXME: making this fake little DeclSpecs here is really
|
||||
@@ -343,6 +344,7 @@ GetStructTypesAndNames(const std::vector<StructDeclaration *> &sd,
|
||||
|
||||
elementTypes->push_back(d->sym->type);
|
||||
elementNames->push_back(d->sym->name);
|
||||
elementPositions->push_back(d->sym->pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
7
decl.h
7
decl.h
@@ -196,8 +196,9 @@ struct StructDeclaration {
|
||||
|
||||
/** Given a set of StructDeclaration instances, this returns the types of
|
||||
the elements of the corresponding struct and their names. */
|
||||
extern void GetStructTypesAndNames(const std::vector<StructDeclaration *> &sd,
|
||||
std::vector<const Type *> *elementTypes,
|
||||
std::vector<std::string> *elementNames);
|
||||
extern void GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
|
||||
std::vector<const Type *> *elementTypes,
|
||||
std::vector<std::string> *elementNames,
|
||||
std::vector<SourcePos> *elementPositions);
|
||||
|
||||
#endif // ISPC_DECL_H
|
||||
|
||||
26
docs/ReleaseNotes.txt
Normal file
26
docs/ReleaseNotes.txt
Normal file
@@ -0,0 +1,26 @@
|
||||
=== v1.0.2 ===
|
||||
|
||||
Floating-point hexidecimal constants are now parsed correctly on Windows
|
||||
(fixes issue #16).
|
||||
|
||||
SSE2 is now the default target if --cpu=atom is given in the command line
|
||||
arguments and another target isn't explicitly specified.
|
||||
|
||||
The standard library now provides broadcast(), rotate(), and shuffle()
|
||||
routines for efficient communication between program instances.
|
||||
|
||||
The MSVC solution files to build the examples on Windows now use
|
||||
/fpmath:fast when building.
|
||||
|
||||
=== v1.0.1 === (24 June 2011)
|
||||
|
||||
ispc no longer requires that pointers to memory that are passed in to ispc
|
||||
have alignment equal to the targets vector width; now alignment just has to
|
||||
be the regular element alignment (e.g. 4 bytes for floats, etc.) This
|
||||
change also fixed a number of cases where it previously incorrectly
|
||||
generated aligned load/store instructions in cases where the address wasn't
|
||||
actually aligned (even if the base address passed into ispc code was).
|
||||
|
||||
=== v1.0 === (21 June 2011)
|
||||
|
||||
Initial Release
|
||||
@@ -74,7 +74,8 @@ Contents:
|
||||
|
||||
+ `Math Functions`_
|
||||
+ `Output Functions`_
|
||||
+ `Cross-Lane Operations`_
|
||||
+ `Cross-Program Instance Operations`_
|
||||
+ `Packed Load and Store Operations`_
|
||||
+ `Low-Level Bits`_
|
||||
|
||||
* `Interoperability with the Application`_
|
||||
@@ -136,7 +137,7 @@ Linux\* and Mac OS\* available for download. Alternatively, you can
|
||||
download the source code from that page and build it yourself; see see the
|
||||
`ispc wiki`_ for instructions about building ``ispc`` from source.
|
||||
|
||||
.. _ispc downloads web page:downloads.html
|
||||
.. _ispc downloads web page: downloads.html
|
||||
.. _ispc wiki: http://github.com/ispc/ispc/wiki
|
||||
|
||||
Once you have an executable for your system, copy it into a directory
|
||||
@@ -340,7 +341,7 @@ before it's compiled. On Windows®, pre-processor definitions should be
|
||||
provided to the ``cl`` call.
|
||||
|
||||
By default, the compiler generates x86-64 Intel® SSE4 code. To generate
|
||||
32-bit code, you can use the the ``--arch=x86`` command-line flag. To
|
||||
32-bit code, you can use the ``--arch=x86`` command-line flag. To
|
||||
select Intel® SSE2, use ``--target=sse2``.
|
||||
|
||||
``ispc`` supports an alternative method for generating Intel® SSE4 code,
|
||||
@@ -1246,7 +1247,7 @@ section.)
|
||||
For ``if`` statements where the different running SPMD program instances
|
||||
don't have coherent values for the boolean ``if`` test, using ``cif``
|
||||
introduces some additional overhead from the ``all`` and ``any`` tests as
|
||||
well as the corresponding branches. For cases where the the program
|
||||
well as the corresponding branches. For cases where the program
|
||||
instances often do compute the same boolean value, this overhead is
|
||||
worthwhile. If the control flow is in fact usually incoherent, this
|
||||
overhead only costs performance.
|
||||
@@ -1659,14 +1660,14 @@ values for the inactive program instances aren't printed. (In other cases,
|
||||
they may have garbage values or be otherwise undefined.)
|
||||
|
||||
|
||||
Cross-Lane Operations
|
||||
---------------------
|
||||
Cross-Program Instance Operations
|
||||
---------------------------------
|
||||
|
||||
Usually, ``ispc`` code expresses independent computation on separate data
|
||||
elements. There are, however, a number of cases where it's useful for the
|
||||
program instances to be able to cooperate in computing results. The
|
||||
cross-lane operations described in this section provide primitives for
|
||||
communication between the running program instances.
|
||||
Usually, ``ispc`` code expresses independent programs performing
|
||||
computation on separate data elements. There are, however, a number of
|
||||
cases where it's useful for the program instances to be able to cooperate
|
||||
in computing results. The cross-lane operations described in this section
|
||||
provide primitives for communication between the running program instances.
|
||||
|
||||
A few routines that evaluate conditions across the running program
|
||||
instances. For example, ``any()`` returns ``true`` if the given value
|
||||
@@ -1678,6 +1679,47 @@ and ``all()`` returns ``true`` if it true for all of them.
|
||||
uniform bool any(bool v)
|
||||
uniform bool all(bool v)
|
||||
|
||||
To broadcast a value from one program instance to all of the others, a
|
||||
``broadcast()`` function is available. It broadcasts the value of the
|
||||
``value`` parameter for the program instance given by ``index`` to all of
|
||||
the running program instances.
|
||||
|
||||
::
|
||||
|
||||
float broadcast(float value, uniform int index)
|
||||
int32 broadcast(int32 value, uniform int index)
|
||||
double broadcast(double value, uniform int index)
|
||||
int64 broadcast(int64 value, uniform int index)
|
||||
|
||||
The ``rotate()`` function allows each program instance to find the value of
|
||||
the given value that their neighbor ``offset`` steps away has. For
|
||||
example, on an 8-wide target, if ``offset`` has the value (1, 2, 3, 4, 5,
|
||||
6, 7, 8) in each of the running program instances, then ``rotate(value,
|
||||
-1)`` causes the first program instance to get the value 8, the second
|
||||
program instance to get the value 1, the third 2, and so forth. The
|
||||
provided offset value can be positive or negative, and may be greater than
|
||||
``programCount`` (it is masked to ensure valid offsets).
|
||||
|
||||
::
|
||||
|
||||
float rotate(float value, uniform int offset)
|
||||
int32 rotate(int32 value, uniform int offset)
|
||||
double rotate(double value, uniform int offset)
|
||||
int64 rotate(int64 value, uniform int offset)
|
||||
|
||||
|
||||
Finally, ``shuffle()`` allows fully general shuffling of values among the
|
||||
program instances. Each program instance's value of permutation gives the
|
||||
program instance from which to get the value of ``value``. The provided
|
||||
values for ``permutation`` must all be between 0 and ``programCount-1``.
|
||||
|
||||
::
|
||||
|
||||
float shuffle(float value, int permutation)
|
||||
int32 shuffle(int32 value, int permutation)
|
||||
double shuffle(double value, int permutation)
|
||||
int64 shuffle(int64 value, int permutation)
|
||||
|
||||
The various variants of ``popcnt()`` return the population count--the
|
||||
number of bits set in the given value.
|
||||
|
||||
@@ -1719,8 +1761,12 @@ given value across all of the currently-executing vector lanes.
|
||||
uniform unsigned int reduce_max(unsigned int a, unsigned int b)
|
||||
|
||||
|
||||
Finally, there are routines for writing out and reading in values from
|
||||
linear memory locations for the active program instances.
|
||||
|
||||
Packed Load and Store Operations
|
||||
--------------------------------
|
||||
|
||||
The standard library also offers routines for writing out and reading in
|
||||
values from linear memory locations for the active program instances.
|
||||
``packed_load_active()`` loads consecutive values from the given array,
|
||||
starting at ``a[offset]``, loading one value for each currently-executing
|
||||
program instance and storing it into that program instance's ``val``
|
||||
@@ -1797,14 +1843,15 @@ and this conversion step are necessary because ``ispc`` doesn't have native
|
||||
void store_to_int16(uniform int a[], uniform int offset,
|
||||
unsigned int val)
|
||||
|
||||
There are two things to note in these functions. First, note that these
|
||||
There are three things to note in these functions. First, note that these
|
||||
functions take ``unsigned int`` arrays as parameters; you need
|
||||
to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side to
|
||||
``unsigned int`` when passing them to ``ispc`` code. Second, although the
|
||||
arrays are passed as ``unsigned int``, in the array indexing calculation,
|
||||
with the ``offset`` parameter, they are treated as if they were ``int8`` or
|
||||
``int16`` types. (i.e. the offset treated as being in terms of number of 8
|
||||
or 16-bit elements.)
|
||||
or 16-bit elements.) Third, note that programIndex is implicitly added
|
||||
to offset.
|
||||
|
||||
The ``intbits()`` and ``floatbits()`` functions can be used to implement
|
||||
low-level floating-point bit twiddling. For example, ``intbits()`` returns
|
||||
@@ -2279,21 +2326,11 @@ elements to work with and then proceeds with the computation.
|
||||
Communicating Between SPMD Program Instances
|
||||
--------------------------------------------
|
||||
|
||||
The ``programIndex`` built-in variable (see `Mapping Data To Program
|
||||
Instances`_) can be used to communicate between the set of executing
|
||||
program instances. Consider the following code, which shows all of the
|
||||
program instances writing into unique locations in an array.
|
||||
|
||||
::
|
||||
|
||||
float x = ...;
|
||||
uniform float allX[programCount];
|
||||
allX[programIndex] = x;
|
||||
|
||||
In this code, a program instance that reads ``allX[0]`` finds the value of
|
||||
``x`` that was computed by the first of the running program instances, and
|
||||
so forth. Program instances can communicate with their neighbor instances
|
||||
with indexing like ``allX[(programIndex+1)%programCount]``.
|
||||
The ``broadcast()``, ``rotate()``, and ``shuffle()`` standard library
|
||||
routines provide a variety of mechanisms for the running program instances
|
||||
to communicate values to each other during execution. See the section
|
||||
`Cross-Program Instance Operations`_ for more information about their
|
||||
operation.
|
||||
|
||||
|
||||
Gather and Scatter
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.0
|
||||
PROJECT_NUMBER = 1.0.2
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
@@ -102,6 +102,8 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -115,6 +117,8 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -130,6 +134,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -147,6 +152,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
@@ -81,6 +81,8 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -94,6 +96,8 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -109,6 +113,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -126,6 +131,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -158,4 +164,4 @@
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
@@ -81,6 +81,8 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -94,6 +96,8 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -109,6 +113,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -126,6 +131,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
||||
@@ -54,8 +54,6 @@ extern void binomial_put_serial(float Sa[], float Xa[], float Ta[],
|
||||
float result[], int count);
|
||||
|
||||
int main() {
|
||||
// Pointers passed to ispc code must have alignment of the target's
|
||||
// vector width at minimum.
|
||||
float *S = new float[N_OPTIONS];
|
||||
float *X = new float[N_OPTIONS];
|
||||
float *T = new float[N_OPTIONS];
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
@@ -82,6 +82,8 @@
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -96,6 +98,8 @@
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -112,6 +116,7 @@
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -130,6 +135,7 @@
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -165,4 +171,4 @@
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
@@ -1,4 +1,4 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
@@ -81,6 +81,8 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -94,6 +96,8 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -109,6 +113,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -126,6 +131,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@@ -162,4 +168,4 @@ cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
121
expr.cpp
121
expr.cpp
@@ -1526,7 +1526,7 @@ AssignExpr::GetValue(FunctionEmitContext *ctx) const {
|
||||
if (st != NULL) {
|
||||
bool anyUniform = false;
|
||||
for (int i = 0; i < st->NumElements(); ++i) {
|
||||
if (st->GetMemberType(i)->IsUniformType())
|
||||
if (st->GetElementType(i)->IsUniformType())
|
||||
anyUniform = true;
|
||||
}
|
||||
|
||||
@@ -2489,71 +2489,57 @@ ExprList::TypeCheck() {
|
||||
|
||||
llvm::Constant *
|
||||
ExprList::GetConstant(const Type *type) const {
|
||||
const StructType *structType = dynamic_cast<const StructType *>(type);
|
||||
const SequentialType *sequentialType =
|
||||
dynamic_cast<const SequentialType *>(type);
|
||||
const CollectionType *collectionType =
|
||||
dynamic_cast<const CollectionType *>(type);
|
||||
if (collectionType == NULL)
|
||||
return NULL;
|
||||
|
||||
if (structType != NULL) {
|
||||
// We can potentially return an llvm::ConstantStruct if we have the
|
||||
// same number of elements in the ExprList as the struct has
|
||||
// members (and the various elements line up with the shape of the
|
||||
// corresponding struct elements).
|
||||
if ((int)exprs.size() != structType->NumElements()) {
|
||||
Error(pos, "Initializer list for struct \"%s\" must have %d "
|
||||
"elements (has %d).", structType->GetString().c_str(),
|
||||
(int)exprs.size(), structType->NumElements());
|
||||
std::string name;
|
||||
if (dynamic_cast<const StructType *>(type) != NULL)
|
||||
name = "struct";
|
||||
else if (dynamic_cast<const ArrayType *>(type) != NULL)
|
||||
name = "array";
|
||||
else if (dynamic_cast<const VectorType *>(type) != NULL)
|
||||
name = "vector";
|
||||
else
|
||||
FATAL("Unexpected CollectionType in ExprList::GetConstant()");
|
||||
|
||||
if ((int)exprs.size() != collectionType->GetElementCount()) {
|
||||
Error(pos, "Initializer list for %s \"%s\" must have %d elements "
|
||||
"(has %d).", name.c_str(), collectionType->GetString().c_str(),
|
||||
collectionType->GetElementCount(), (int)exprs.size());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
std::vector<llvm::Constant *> cv;
|
||||
for (unsigned int i = 0; i < exprs.size(); ++i) {
|
||||
if (exprs[i] == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
std::vector<llvm::Constant *> cv;
|
||||
for (unsigned int i = 0; i < exprs.size(); ++i) {
|
||||
if (exprs[i] == NULL)
|
||||
return NULL;
|
||||
const Type *elementType = structType->GetMemberType(i);
|
||||
llvm::Constant *c = exprs[i]->GetConstant(elementType);
|
||||
if (c == NULL)
|
||||
// If this list element couldn't convert to the right
|
||||
// constant type for the corresponding struct member, then
|
||||
// give up
|
||||
return NULL;
|
||||
cv.push_back(c);
|
||||
}
|
||||
const Type *elementType = collectionType->GetElementType(i);
|
||||
llvm::Constant *c = exprs[i]->GetConstant(elementType);
|
||||
if (c == NULL)
|
||||
// If this list element couldn't convert to the right constant
|
||||
// type for the corresponding collection member, then give up.
|
||||
return NULL;
|
||||
cv.push_back(c);
|
||||
}
|
||||
|
||||
if (dynamic_cast<const StructType *>(type) != NULL) {
|
||||
#if defined(LLVM_2_8) || defined(LLVM_2_9)
|
||||
return llvm::ConstantStruct::get(*g->ctx, cv, false);
|
||||
#else
|
||||
const llvm::StructType *llvmStructType =
|
||||
llvm::dyn_cast<const llvm::StructType>(structType->LLVMType(g->ctx));
|
||||
llvm::dyn_cast<const llvm::StructType>(collectionType->LLVMType(g->ctx));
|
||||
assert(llvmStructType != NULL);
|
||||
return llvm::ConstantStruct::get(llvmStructType, cv);
|
||||
#endif
|
||||
}
|
||||
else if (sequentialType) {
|
||||
// Similarly, if we have an array or vector type, we may be able to
|
||||
// return the corresponding llvm constant value.
|
||||
if ((int)exprs.size() != sequentialType->GetElementCount()) {
|
||||
bool isArray = (dynamic_cast<const ArrayType *>(type) != NULL);
|
||||
Error(pos, "Initializer list for %s \"%s\" must have %d elements (has %d).",
|
||||
isArray ? "array" : "vector", sequentialType->GetString().c_str(),
|
||||
(int)exprs.size(), sequentialType->GetElementCount());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
std::vector<llvm::Constant *> cv;
|
||||
for (unsigned int i = 0; i < exprs.size(); ++i) {
|
||||
if (exprs[i] == NULL)
|
||||
return NULL;
|
||||
const Type *elementType = sequentialType->GetElementType();
|
||||
llvm::Constant *c = exprs[i]->GetConstant(elementType);
|
||||
if (c == NULL)
|
||||
return NULL;
|
||||
cv.push_back(c);
|
||||
}
|
||||
|
||||
else {
|
||||
const llvm::Type *lt = type->LLVMType(g->ctx);
|
||||
const llvm::ArrayType *lat = llvm::dyn_cast<const llvm::ArrayType>(lt);
|
||||
// FIXME: should the assert below validly fail for uniform vectors
|
||||
// now?
|
||||
// now? Need a test case to reproduce it and then to be sure we
|
||||
// have the right fix; leave the assert until we can hit it...
|
||||
assert(lat != NULL);
|
||||
return llvm::ConstantArray::get(lat, cv);
|
||||
}
|
||||
@@ -2832,7 +2818,7 @@ MemberExpr::GetType() const {
|
||||
// Otherwise it's a struct, and the result type is the element
|
||||
// type, possibly promoted to varying if the struct type / lvalue
|
||||
// is varying.
|
||||
const Type *elementType = structType->GetMemberType(identifier);
|
||||
const Type *elementType = structType->GetElementType(identifier);
|
||||
if (!elementType)
|
||||
Error(identifierPos, "Element name \"%s\" not present in struct type \"%s\".%s",
|
||||
identifier.c_str(), structType->GetString().c_str(),
|
||||
@@ -2912,7 +2898,7 @@ MemberExpr::getElementNumber() const {
|
||||
}
|
||||
}
|
||||
else {
|
||||
elementNumber = structType->GetMemberNumber(identifier);
|
||||
elementNumber = structType->GetElementNumber(identifier);
|
||||
if (elementNumber == -1)
|
||||
Error(identifierPos, "Element name \"%s\" not present in struct type \"%s\".%s",
|
||||
identifier.c_str(), structType->GetString().c_str(),
|
||||
@@ -3004,7 +2990,7 @@ MemberExpr::getCandidateNearMatches() const {
|
||||
return "";
|
||||
|
||||
std::vector<std::string> elementNames;
|
||||
for (int i = 0; i < structType->NumElements(); ++i)
|
||||
for (int i = 0; i < structType->GetElementCount(); ++i)
|
||||
elementNames.push_back(structType->GetElementName(i));
|
||||
std::vector<std::string> alternates = MatchStrings(identifier, elementNames);
|
||||
if (!alternates.size())
|
||||
@@ -3900,25 +3886,14 @@ lUniformValueToVarying(FunctionEmitContext *ctx, llvm::Value *value,
|
||||
const llvm::Type *llvmType = type->GetAsVaryingType()->LLVMType(g->ctx);
|
||||
llvm::Value *retValue = llvm::UndefValue::get(llvmType);
|
||||
|
||||
// for structs, just recursively make their elements varying (if
|
||||
// needed) and populate the return struct
|
||||
const StructType *structType = dynamic_cast<const StructType *>(type);
|
||||
if (structType != NULL) {
|
||||
for (int i = 0; i < structType->NumElements(); ++i) {
|
||||
llvm::Value *v = ctx->ExtractInst(value, i, "struct_element");
|
||||
v = lUniformValueToVarying(ctx, v, structType->GetMemberType(i));
|
||||
retValue = ctx->InsertInst(retValue, v, i, "set_struct_element");
|
||||
}
|
||||
return retValue;
|
||||
}
|
||||
|
||||
// And similarly do the elements of arrays and vectors individually
|
||||
const SequentialType *sequentialType =
|
||||
dynamic_cast<const SequentialType *>(type);
|
||||
if (sequentialType != NULL) {
|
||||
for (int i = 0; i < sequentialType->GetElementCount(); ++i) {
|
||||
// for structs/arrays/vectors, just recursively make their elements
|
||||
// varying (if needed) and populate the return value.
|
||||
const CollectionType *collectionType =
|
||||
dynamic_cast<const CollectionType *>(type);
|
||||
if (collectionType != NULL) {
|
||||
for (int i = 0; i < collectionType->GetElementCount(); ++i) {
|
||||
llvm::Value *v = ctx->ExtractInst(value, i, "get_element");
|
||||
v = lUniformValueToVarying(ctx, v, sequentialType->GetElementType());
|
||||
v = lUniformValueToVarying(ctx, v, collectionType->GetElementType(i));
|
||||
retValue = ctx->InsertInst(retValue, v, i, "set_element");
|
||||
}
|
||||
return retValue;
|
||||
|
||||
91
lex.ll
91
lex.ll
@@ -45,6 +45,7 @@ static void lCComment(SourcePos *);
|
||||
static void lCppComment(SourcePos *);
|
||||
static void lHandleCppHash(SourcePos *);
|
||||
static void lStringConst(YYSTYPE *, SourcePos *);
|
||||
static double lParseHexFloat(const char *ptr);
|
||||
|
||||
#define YY_USER_ACTION \
|
||||
yylloc->first_line = yylloc->last_line; \
|
||||
@@ -65,7 +66,8 @@ inline int isatty(int) { return 0; }
|
||||
|
||||
WHITESPACE [ \t\r]+
|
||||
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))
|
||||
FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?)|([-]?0x[01]\.?[0-9a-fA-F]+p[-+]?[0-9]+[fF]?)
|
||||
FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?)
|
||||
HEX_FLOAT_NUMBER (0x[01](\.[0-9a-fA-F]*)?p[-+]?[0-9]+[fF]?)
|
||||
|
||||
IDENT [a-zA-Z_][a-zA-Z_0-9]*
|
||||
|
||||
@@ -182,13 +184,15 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
|
||||
}
|
||||
|
||||
{FLOAT_NUMBER} {
|
||||
/* FIXME: need to implement a hex float constant parser so that we can
|
||||
support them on Windows (which doesn't handle them in its atof()
|
||||
implementation... */
|
||||
yylval->floatVal = atof(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
{HEX_FLOAT_NUMBER} {
|
||||
yylval->floatVal = lParseHexFloat(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
"++" { return TOKEN_INC_OP; }
|
||||
"--" { return TOKEN_DEC_OP; }
|
||||
"<<" { return TOKEN_LEFT_OP; }
|
||||
@@ -424,3 +428,82 @@ lStringConst(YYSTYPE *yylval, SourcePos *pos)
|
||||
}
|
||||
yylval->stringVal = new std::string(str);
|
||||
}
|
||||
|
||||
|
||||
/** Compute the value 2^n, where the exponent is given as an integer.
|
||||
There are more efficient ways to do this, for example by just slamming
|
||||
the bits into the appropriate bits of the double, but let's just do the
|
||||
obvious thing.
|
||||
*/
|
||||
static double
|
||||
ipow2(int exponent) {
|
||||
if (exponent < 0)
|
||||
return 1. / ipow2(-exponent);
|
||||
|
||||
double ret = 1.;
|
||||
while (exponent > 16) {
|
||||
ret *= 65536.;
|
||||
exponent -= 16;
|
||||
}
|
||||
while (exponent-- > 0)
|
||||
ret *= 2.;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/** Parse a hexadecimal-formatted floating-point number (C99 hex float
|
||||
constant-style).
|
||||
*/
|
||||
static double
|
||||
lParseHexFloat(const char *ptr) {
|
||||
assert(ptr != NULL);
|
||||
|
||||
assert(ptr[0] == '0' && ptr[1] == 'x');
|
||||
ptr += 2;
|
||||
|
||||
// Start initializing the mantissa
|
||||
assert(*ptr == '0' || *ptr == '1');
|
||||
double mantissa = (*ptr == '1') ? 1. : 0.;
|
||||
++ptr;
|
||||
|
||||
if (*ptr == '.') {
|
||||
// Is there a fraction part? If so, the i'th digit we encounter
|
||||
// gives the 1/(16^i) component of the mantissa.
|
||||
++ptr;
|
||||
|
||||
double scale = 1. / 16.;
|
||||
// Keep going until we come to the 'p', which indicates that we've
|
||||
// come to the exponent
|
||||
while (*ptr != 'p') {
|
||||
// Figure out the raw value from 0-15
|
||||
int digit;
|
||||
if (*ptr >= '0' && *ptr <= '9')
|
||||
digit = *ptr - '0';
|
||||
else if (*ptr >= 'a' && *ptr <= 'f')
|
||||
digit = 10 + *ptr - 'a';
|
||||
else {
|
||||
assert(*ptr >= 'A' && *ptr <= 'F');
|
||||
digit = 10 + *ptr - 'A';
|
||||
}
|
||||
|
||||
// And add its contribution to the mantissa
|
||||
mantissa += scale * digit;
|
||||
scale /= 16.;
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
else
|
||||
// If there's not a '.', then we better be going straight to the
|
||||
// exponent
|
||||
assert(*ptr == 'p');
|
||||
|
||||
++ptr; // skip the 'p'
|
||||
|
||||
// interestingly enough, the exponent is provided base 10..
|
||||
int exponent = (int)strtol(ptr, (char **)NULL, 10);
|
||||
|
||||
// Does stdlib exp2() guarantee exact results for integer n where can
|
||||
// be represented exactly as doubles? I would hope so but am not sure,
|
||||
// so let's be sure.
|
||||
return mantissa * ipow2(exponent);
|
||||
}
|
||||
|
||||
10
main.cpp
10
main.cpp
@@ -91,7 +91,7 @@ static void usage(int ret) {
|
||||
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
|
||||
printf(" [--target={sse2,sse4,sse4x2,avx}] Select target ISA (SSE4 is default)\n");
|
||||
printf(" [--target={sse2,sse4,sse4x2,avx}] Select target ISA (SSE4 is default unless compiling for atom; then SSE2 is.)\n");
|
||||
printf(" [--version]\t\t\t\tPrint ispc version\n");
|
||||
printf(" [--woff]\t\t\t\tDisable warnings\n");
|
||||
printf(" [--wno-perf]\t\t\tDon't issue warnings related to performance-related issues\n");
|
||||
@@ -192,7 +192,7 @@ int main(int Argc, char *Argv[]) {
|
||||
// as we're parsing below
|
||||
g = new Globals;
|
||||
|
||||
bool debugSet = false, optSet = false;
|
||||
bool debugSet = false, optSet = false, targetSet = false;
|
||||
Module::OutputType ot = Module::Object;
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
@@ -226,6 +226,7 @@ int main(int Argc, char *Argv[]) {
|
||||
else if (!strcmp(argv[i], "--target")) {
|
||||
if (++i == argc) usage(1);
|
||||
lDoTarget(argv[i]);
|
||||
targetSet = true;
|
||||
}
|
||||
else if (!strncmp(argv[i], "--target=", 9)) {
|
||||
const char *target = argv[i] + 9;
|
||||
@@ -315,6 +316,11 @@ int main(int Argc, char *Argv[]) {
|
||||
if (debugSet && !optSet)
|
||||
g->opt.level = 0;
|
||||
|
||||
// Make SSE2 the default target on atom unless the target has been set
|
||||
// explicitly.
|
||||
if (!targetSet && (g->target.cpu == "atom"))
|
||||
lDoTarget("sse2");
|
||||
|
||||
m = new Module(file);
|
||||
if (m->CompileFile() == 0) {
|
||||
if (outFileName != NULL)
|
||||
|
||||
20
module.cpp
20
module.cpp
@@ -248,8 +248,8 @@ lRecursiveCheckVarying(const Type *t) {
|
||||
|
||||
const StructType *st = dynamic_cast<const StructType *>(t);
|
||||
if (st) {
|
||||
for (int i = 0; i < st->NumElements(); ++i)
|
||||
if (lRecursiveCheckVarying(st->GetMemberType(i)))
|
||||
for (int i = 0; i < st->GetElementCount(); ++i)
|
||||
if (lRecursiveCheckVarying(st->GetElementType(i)))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@@ -1041,8 +1041,8 @@ Module::writeObjectFileOrAssembly(OutputType outputType, const char *outFileName
|
||||
static void
|
||||
lRecursiveAddStructs(const StructType *structType,
|
||||
std::vector<const StructType *> &structParamTypes) {
|
||||
for (int i = 0; i < structType->NumElements(); ++i) {
|
||||
const Type *elementBaseType = structType->GetMemberType(i)->GetBaseType();
|
||||
for (int i = 0; i < structType->GetElementCount(); ++i) {
|
||||
const Type *elementBaseType = structType->GetElementType(i)->GetBaseType();
|
||||
const StructType *elementStructType =
|
||||
dynamic_cast<const StructType *>(elementBaseType);
|
||||
if (elementStructType != NULL) {
|
||||
@@ -1112,9 +1112,9 @@ lEmitStructDecls(std::vector<const StructType *> &structTypes, FILE *file) {
|
||||
StructDAGNode *node = new StructDAGNode;
|
||||
structToNode[st] = node;
|
||||
|
||||
for (int j = 0; j < st->NumElements(); ++j) {
|
||||
for (int j = 0; j < st->GetElementCount(); ++j) {
|
||||
const StructType *elementStructType =
|
||||
dynamic_cast<const StructType *>(st->GetMemberType(j));
|
||||
dynamic_cast<const StructType *>(st->GetElementType(j));
|
||||
// If this element is a struct type and we haven't already
|
||||
// processed it for the current struct type, then upate th
|
||||
// dependencies and record that this element type has other
|
||||
@@ -1144,8 +1144,8 @@ lEmitStructDecls(std::vector<const StructType *> &structTypes, FILE *file) {
|
||||
for (unsigned int i = 0; i < sortedTypes.size(); ++i) {
|
||||
const StructType *st = sortedTypes[i];
|
||||
fprintf(file, "struct %s {\n", st->GetStructName().c_str());
|
||||
for (int j = 0; j < st->NumElements(); ++j) {
|
||||
const Type *type = st->GetMemberType(j)->GetAsNonConstType();
|
||||
for (int j = 0; j < st->GetElementCount(); ++j) {
|
||||
const Type *type = st->GetElementType(j)->GetAsNonConstType();
|
||||
std::string d = type->GetCDeclaration(st->GetElementName(j));
|
||||
fprintf(file, " %s;\n", d.c_str());
|
||||
}
|
||||
@@ -1210,8 +1210,8 @@ lGetVectorsFromStructs(const std::vector<const StructType *> &structParamTypes,
|
||||
std::vector<const VectorType *> *vectorParamTypes) {
|
||||
for (unsigned int i = 0; i < structParamTypes.size(); ++i) {
|
||||
const StructType *structType = structParamTypes[i];
|
||||
for (int j = 0; j < structType->NumElements(); ++j) {
|
||||
const Type *elementType = structType->GetMemberType(j);
|
||||
for (int j = 0; j < structType->GetElementCount(); ++j) {
|
||||
const Type *elementType = structType->GetElementType(j);
|
||||
|
||||
const ArrayType *at = dynamic_cast<const ArrayType *>(elementType);
|
||||
if (at)
|
||||
|
||||
27
opt.cpp
27
opt.cpp
@@ -2116,11 +2116,12 @@ CreateLowerGatherScatterPass() {
|
||||
// IsCompileTimeConstantPass
|
||||
|
||||
/** LLVM IR implementations of target-specific functions may include calls
|
||||
to a function "bool __is_compile_time_constant_mask(mask type)"; this
|
||||
allows them to have specialied code paths for where the mask is known
|
||||
at compile time but not incurring the cost of a MOVMSK call at runtime
|
||||
to compute its value in cases where the mask value isn't known until
|
||||
runtime.
|
||||
to the functions "bool __is_compile_time_constant_mask(mask type)" and
|
||||
"bool __is_compile_time_constant_int32(i32)"; these allow them to have
|
||||
specialied code paths for where the corresponding value is known at
|
||||
compile time. For masks, for example, this allows them to not incur
|
||||
the cost of a MOVMSK call at runtime to compute its value in cases
|
||||
where the mask value isn't known until runtime.
|
||||
|
||||
This pass resolves these calls into either 'true' or 'false' values so
|
||||
that later optimization passes can operate with these as constants.
|
||||
@@ -2148,17 +2149,17 @@ llvm::RegisterPass<IsCompileTimeConstantPass>
|
||||
|
||||
bool
|
||||
IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
llvm::Function *func = m->module->getFunction("__is_compile_time_constant_mask");
|
||||
if (!func)
|
||||
return false;
|
||||
llvm::Function *maskFunc = m->module->getFunction("__is_compile_time_constant_mask");
|
||||
llvm::Function *int32Func = m->module->getFunction("__is_compile_time_constant_int32");
|
||||
|
||||
bool modifiedAny = false;
|
||||
restart:
|
||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
||||
// Iterate through the instructions looking for calls to
|
||||
// __is_compile_time_constant_mask().
|
||||
// Iterate through the instructions looking for calls to the
|
||||
// __is_compile_time_constant_*() functions
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
||||
if (!callInst || callInst->getCalledFunction() != func)
|
||||
if (!callInst || (callInst->getCalledFunction() != maskFunc &&
|
||||
callInst->getCalledFunction() != int32Func))
|
||||
continue;
|
||||
|
||||
// This optimization pass can be disabled with the (poorly named)
|
||||
@@ -2171,8 +2172,8 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
|
||||
// Is it a constant? Bingo, turn the call's value into a constant
|
||||
// true value.
|
||||
llvm::Value *mask = callInst->getArgOperand(0);
|
||||
if (llvm::isa<llvm::Constant>(mask)) {
|
||||
llvm::Value *operand = callInst->getArgOperand(0);
|
||||
if (llvm::isa<llvm::Constant>(operand)) {
|
||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, LLVMTrue);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
|
||||
13
parse.yy
13
parse.yy
@@ -564,9 +564,11 @@ struct_or_union_specifier
|
||||
{
|
||||
std::vector<const Type *> elementTypes;
|
||||
std::vector<std::string> elementNames;
|
||||
GetStructTypesAndNames(*$4, &elementTypes, &elementNames);
|
||||
std::vector<SourcePos> elementPositions;
|
||||
GetStructTypesNamesPositions(*$4, &elementTypes, &elementNames,
|
||||
&elementPositions);
|
||||
StructType *st = new StructType($2, elementTypes, elementNames,
|
||||
false, true, @2);
|
||||
elementPositions, false, true, @2);
|
||||
m->symbolTable->AddType($2, st, @2);
|
||||
$$ = st;
|
||||
}
|
||||
@@ -574,8 +576,11 @@ struct_or_union_specifier
|
||||
{
|
||||
std::vector<const Type *> elementTypes;
|
||||
std::vector<std::string> elementNames;
|
||||
GetStructTypesAndNames(*$3, &elementTypes, &elementNames);
|
||||
$$ = new StructType("", elementTypes, elementNames, false, true, @1);
|
||||
std::vector<SourcePos> elementPositions;
|
||||
GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames,
|
||||
&elementPositions);
|
||||
$$ = new StructType("", elementTypes, elementNames, elementPositions,
|
||||
false, true, @1);
|
||||
}
|
||||
| struct_or_union '{' '}'
|
||||
{
|
||||
|
||||
104
run_tests.sh
104
run_tests.sh
@@ -1,40 +1,86 @@
|
||||
#!/bin/zsh
|
||||
#!/bin/bash
|
||||
|
||||
surprises=0
|
||||
verbose=false
|
||||
number=$(ls -1 tests/*.ispc|wc -l)
|
||||
counter=1
|
||||
target=sse4
|
||||
|
||||
echo Running correctness tests
|
||||
while getopts ":vth" opt;do
|
||||
case $opt in
|
||||
v) verbose=true
|
||||
;;
|
||||
t) target=$OPTARG
|
||||
;;
|
||||
h) cat <<EOF
|
||||
usage: run_tests.sh [-v] [-t target] [filenames]
|
||||
-v # verbose output
|
||||
-t # specify compilation target (SSE4 is the default).
|
||||
[filenames] # (optional) files to run through testing infrastructure
|
||||
# if none are provided, all in tests/ will be run.
|
||||
EOF
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
|
||||
for i in tests/*.ispc; do
|
||||
bc=${i%%ispc}bc
|
||||
ispc -O2 $i -woff -o $bc --emit-llvm --target=sse4
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc compile
|
||||
echo
|
||||
else
|
||||
ispc_test $bc
|
||||
shift $(( $OPTIND - 1 ))
|
||||
if [[ "$1" > 0 ]]; then
|
||||
while [[ "$1" > 0 ]]; do
|
||||
i=$1
|
||||
shift
|
||||
echo Running test $i
|
||||
|
||||
bc=${i%%ispc}bc
|
||||
ispc -O2 $i -woff -o $bc --emit-llvm --target=$target
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc_test
|
||||
echo Test $i FAILED ispc compile
|
||||
echo
|
||||
else
|
||||
ispc_test $bc
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc_test
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
/bin/rm $bc
|
||||
done
|
||||
else
|
||||
echo Running all correctness tests
|
||||
|
||||
for i in tests/*.ispc; do
|
||||
if $verbose; then
|
||||
echo -en "Running test $counter of $number.\r"
|
||||
fi
|
||||
(( counter++ ))
|
||||
bc=${i%%ispc}bc
|
||||
ispc -O2 $i -woff -o $bc --emit-llvm --target=$target
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc compile
|
||||
echo
|
||||
else
|
||||
ispc_test $bc
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc_test
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
/bin/rm $bc
|
||||
done
|
||||
|
||||
echo Running failing tests
|
||||
for i in failing_tests/*.ispc; do
|
||||
(ispc -O2 $i -woff -o - --emit-llvm | ispc_test -) 2>/dev/null 1>/dev/null
|
||||
if [[ $? == 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i UNEXPECTEDLY PASSED
|
||||
echo
|
||||
fi
|
||||
# cmp $bc tests_bitcode${bc##tests}
|
||||
# if [[ $? == 0 ]]; then
|
||||
# /bin/rm $bc
|
||||
# fi
|
||||
fi
|
||||
/bin/rm $bc
|
||||
done
|
||||
|
||||
echo Running failing tests
|
||||
for i in failing_tests/*.ispc; do
|
||||
(ispc -O2 $i -woff -o - --emit-llvm | ispc_test -) 2>/dev/null 1>/dev/null
|
||||
if [[ $? == 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i UNEXPECTEDLY PASSED
|
||||
echo
|
||||
fi
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ $surprises == 0 ]]; then
|
||||
echo No surprises.
|
||||
|
||||
@@ -525,12 +525,53 @@ define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
||||
}
|
||||
|
||||
|
||||
define void @__masked_store_blend_64(<8 x i64>* nocapture, <8 x i64>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
; always just serialize it
|
||||
; FIXME: should implement the "do two 32-bit masked stores" stuff that
|
||||
; other targets do...
|
||||
call void @__masked_store_64(<8 x i64>* nocapture %0, <8 x i64> %1, <8 x i32> %2)
|
||||
define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
||||
<8 x i32> %i32mask) nounwind alwaysinline {
|
||||
%oldValue = load <8 x i64>* %ptr, align 8
|
||||
%mask = bitcast <8 x i32> %i32mask to <8 x float>
|
||||
|
||||
; Do 4x64-bit blends by doing two <8 x i32> blends, where the <8 x i32> values
|
||||
; are actually bitcast <4 x i64> values
|
||||
;
|
||||
; set up the first four 64-bit values
|
||||
%old01 = shufflevector <8 x i64> %oldValue, <8 x i64> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%old01f = bitcast <4 x i64> %old01 to <8 x float>
|
||||
%new01 = shufflevector <8 x i64> %new, <8 x i64> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%new01f = bitcast <4 x i64> %new01 to <8 x float>
|
||||
; compute mask--note that the indices are all doubled-up
|
||||
%mask01 = shufflevector <8 x float> %mask, <8 x float> undef,
|
||||
<8 x i32> <i32 0, i32 0, i32 1, i32 1,
|
||||
i32 2, i32 2, i32 3, i32 3>
|
||||
; and blend them
|
||||
%result01f = call <8 x float> @llvm.x86.avx.blendvps(<8 x float> %old01f,
|
||||
<8 x float> %new01f,
|
||||
<8 x float> %mask01)
|
||||
%result01 = bitcast <8 x float> %result01f to <4 x i64>
|
||||
|
||||
; and again
|
||||
%old23 = shufflevector <8 x i64> %oldValue, <8 x i64> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%old23f = bitcast <4 x i64> %old23 to <8 x float>
|
||||
%new23 = shufflevector <8 x i64> %new, <8 x i64> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%new23f = bitcast <4 x i64> %new23 to <8 x float>
|
||||
; compute mask--note that the values are doubled-up...
|
||||
%mask23 = shufflevector <8 x float> %mask, <8 x float> undef,
|
||||
<8 x i32> <i32 4, i32 4, i32 5, i32 5,
|
||||
i32 6, i32 6, i32 7, i32 7>
|
||||
; and blend them
|
||||
%result23f = call <8 x float> @llvm.x86.avx.blendvps(<8 x float> %old23f,
|
||||
<8 x float> %new23f,
|
||||
<8 x float> %mask23)
|
||||
%result23 = bitcast <8 x float> %result23f to <4 x i64>
|
||||
|
||||
; reconstruct the final <8 x i64> vector
|
||||
%final = shufflevector <4 x i64> %result01, <4 x i64> %result23,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x i64> %final, <8 x i64> * %ptr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
48
stdlib.ispc
48
stdlib.ispc
@@ -81,6 +81,54 @@ static inline uniform unsigned int64 intbits(uniform double d) {
|
||||
return __intbits_uniform_double(d);
|
||||
}
|
||||
|
||||
static inline float broadcast(float v, uniform int i) {
|
||||
return __broadcast_float(v, i);
|
||||
}
|
||||
|
||||
static inline int32 broadcast(int32 v, uniform int i) {
|
||||
return __broadcast_int32(v, i);
|
||||
}
|
||||
|
||||
static inline double broadcast(double v, uniform int i) {
|
||||
return __broadcast_double(v, i);
|
||||
}
|
||||
|
||||
static inline int64 broadcast(int64 v, uniform int i) {
|
||||
return __broadcast_int64(v, i);
|
||||
}
|
||||
|
||||
static inline float rotate(float v, uniform int i) {
|
||||
return __rotate_float(v, i);
|
||||
}
|
||||
|
||||
static inline int32 rotate(int32 v, uniform int i) {
|
||||
return __rotate_int32(v, i);
|
||||
}
|
||||
|
||||
static inline double rotate(double v, uniform int i) {
|
||||
return __rotate_double(v, i);
|
||||
}
|
||||
|
||||
static inline int64 rotate(int64 v, uniform int i) {
|
||||
return __rotate_int64(v, i);
|
||||
}
|
||||
|
||||
static inline float shuffle(float v, int i) {
|
||||
return __shuffle_float(v, i);
|
||||
}
|
||||
|
||||
static inline int32 shuffle(int32 v, int i) {
|
||||
return __shuffle_int32(v, i);
|
||||
}
|
||||
|
||||
static inline double shuffle(double v, int i) {
|
||||
return __shuffle_double(v, i);
|
||||
}
|
||||
|
||||
static inline int64 shuffle(int64 v, int i) {
|
||||
return __shuffle_int64(v, i);
|
||||
}
|
||||
|
||||
// x[i]
|
||||
static inline uniform float extract(float x, uniform int i) {
|
||||
return __extract(x, i);
|
||||
|
||||
95
stdlib.m4
95
stdlib.m4
@@ -34,6 +34,8 @@
|
||||
;; builtins for various targets can use macros from this file to simplify
|
||||
;; generating code for their implementations of those builtins.
|
||||
|
||||
declare i1 @__is_compile_time_constant_int32(i32)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
@@ -284,6 +286,22 @@ ret <8 x float> %ret
|
||||
'
|
||||
)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; forloop macro
|
||||
|
||||
divert(`-1')
|
||||
# forloop(var, from, to, stmt) - improved version:
|
||||
# works even if VAR is not a strict macro name
|
||||
# performs sanity check that FROM is larger than TO
|
||||
# allows complex numerical expressions in TO and FROM
|
||||
define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
|
||||
`pushdef(`$1', eval(`$2'))_$0(`$1',
|
||||
eval(`$3'), `$4')popdef(`$1')')')
|
||||
define(`_forloop',
|
||||
`$3`'ifelse(indir(`$1'), `$2', `',
|
||||
`define(`$1', incr(indir(`$1')))$0($@)')')
|
||||
divert`'dnl
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; stdlib_core
|
||||
;;
|
||||
@@ -291,8 +309,67 @@ ret <8 x float> %ret
|
||||
;; target's vector width, which it takes as its first parameter.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
define(`shuffles', `
|
||||
define internal <$1 x $2> @__broadcast_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
||||
%v = extractelement <$1 x $2> %0, i32 %1
|
||||
%r_0 = insertelement <$1 x $2> undef, $2 %v, i32 0
|
||||
forloop(i, 1, eval($1-1), ` %r_`'i = insertelement <$1 x $2> %r_`'eval(i-1), $2 %v, i32 i
|
||||
')
|
||||
ret <$1 x $2> %r_`'eval($1-1)
|
||||
}
|
||||
|
||||
define internal <$1 x $2> @__rotate_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
||||
%isc = call i1 @__is_compile_time_constant_int32(i32 %1)
|
||||
br i1 %isc, label %is_const, label %not_const
|
||||
|
||||
is_const:
|
||||
; though verbose, this turms into tight code if %1 is a constant
|
||||
forloop(i, 0, eval($1-1), `
|
||||
%delta_`'i = add i32 %1, i
|
||||
%delta_clamped_`'i = and i32 %delta_`'i, eval($1-1)
|
||||
%v_`'i = extractelement <$1 x $2> %0, i32 %delta_clamped_`'i')
|
||||
|
||||
%ret_0 = insertelement <$1 x $2> undef, $2 %v_0, i32 0
|
||||
forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1), $2 %v_`'i, i32 i
|
||||
')
|
||||
ret <$1 x $2> %ret_`'eval($1-1)
|
||||
|
||||
not_const:
|
||||
; store two instances of the vector into memory
|
||||
%ptr = alloca <$1 x $2>, i32 2
|
||||
%ptr0 = getelementptr <$1 x $2> * %ptr, i32 0
|
||||
store <$1 x $2> %0, <$1 x $2> * %ptr0
|
||||
%ptr1 = getelementptr <$1 x $2> * %ptr, i32 1
|
||||
store <$1 x $2> %0, <$1 x $2> * %ptr1
|
||||
|
||||
; compute offset in [0,vectorwidth-1], then index into the doubled-up vector
|
||||
%offset = and i32 %1, eval($1-1)
|
||||
%ptr_as_elt_array = bitcast <$1 x $2> * %ptr to [eval(2*$1) x $2] *
|
||||
%load_ptr = getelementptr [eval(2*$1) x $2] * %ptr_as_elt_array, i32 0, i32 %offset
|
||||
%load_ptr_vec = bitcast $2 * %load_ptr to <$1 x $2> *
|
||||
%result = load <$1 x $2> * %load_ptr_vec, align $4
|
||||
ret <$1 x $2> %result
|
||||
}
|
||||
|
||||
define internal <$1 x $2> @__shuffle_$3(<$1 x $2>, <$1 x i32>) nounwind readnone alwaysinline {
|
||||
forloop(i, 0, eval($1-1), `
|
||||
%index_`'i = extractelement <$1 x i32> %1, i32 i')
|
||||
forloop(i, 0, eval($1-1), `
|
||||
%v_`'i = extractelement <$1 x $2> %0, i32 %index_`'i')
|
||||
|
||||
%ret_0 = insertelement <$1 x $2> undef, $2 %v_0, i32 0
|
||||
forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1), $2 %v_`'i, i32 i
|
||||
')
|
||||
ret <$1 x $2> %ret_`'eval($1-1)
|
||||
}
|
||||
|
||||
')
|
||||
|
||||
|
||||
define(`stdlib_core', `
|
||||
|
||||
declare i1 @__is_compile_time_constant_mask(<$1 x i32> %mask)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; vector ops
|
||||
|
||||
@@ -307,6 +384,10 @@ define internal <$1 x float> @__insert(<$1 x float>, i32,
|
||||
ret <$1 x float> %insert
|
||||
}
|
||||
|
||||
shuffles($1, float, float, 4)
|
||||
shuffles($1, i32, int32, 4)
|
||||
shuffles($1, double, double, 8)
|
||||
shuffles($1, i64, int64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; various bitcasts from one type to another
|
||||
@@ -524,7 +605,6 @@ define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32
|
||||
;; FIXME: use the per_lane macro, defined below, to implement these!
|
||||
|
||||
define(`packed_load_and_store', `
|
||||
declare i1 @__is_compile_time_constant_mask(<$1 x i32> %mask)
|
||||
|
||||
define i32 @__packed_load_active([0 x i32] *, i32 %start_offset, <$1 x i32> * %val_ptr,
|
||||
<$1 x i32> %full_mask) nounwind alwaysinline {
|
||||
@@ -661,19 +741,6 @@ done:
|
||||
;; Inside this code, any instances of the text "LANE" are replaced
|
||||
;; with an i32 value that represents the current lane number
|
||||
|
||||
divert(`-1')
|
||||
# forloop(var, from, to, stmt) - improved version:
|
||||
# works even if VAR is not a strict macro name
|
||||
# performs sanity check that FROM is larger than TO
|
||||
# allows complex numerical expressions in TO and FROM
|
||||
define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
|
||||
`pushdef(`$1', eval(`$2'))_$0(`$1',
|
||||
eval(`$3'), `$4')popdef(`$1')')')
|
||||
define(`_forloop',
|
||||
`$3`'ifelse(indir(`$1'), `$2', `',
|
||||
`define(`$1', incr(indir(`$1')))$0($@)')')
|
||||
divert`'dnl
|
||||
|
||||
; num lanes, mask, code block to do per lane
|
||||
define(`per_lane', `
|
||||
br label %pl_entry
|
||||
|
||||
103
stmt.cpp
103
stmt.cpp
@@ -178,88 +178,59 @@ lInitSymbol(llvm::Value *lvalue, const char *symName, const Type *type,
|
||||
return;
|
||||
}
|
||||
|
||||
// There are two cases for initializing arrays and vectors; either a single
|
||||
// initializer may be provided (float foo[3] = 0;), in which case all
|
||||
// of the array elements are initialized to the given value, or an
|
||||
// initializer list may be provided (float foo[3] = { 1,2,3 }), in
|
||||
// which case the array elements are initialized with the corresponding
|
||||
// There are two cases for initializing structs, arrays and vectors;
|
||||
// either a single initializer may be provided (float foo[3] = 0;), in
|
||||
// which case all of the elements are initialized to the given value,
|
||||
// or an initializer list may be provided (float foo[3] = { 1,2,3 }),
|
||||
// in which case the elements are initialized with the corresponding
|
||||
// values.
|
||||
const SequentialType *seqType = dynamic_cast<const SequentialType *>(type);
|
||||
if (seqType != NULL) {
|
||||
ExprList *exprList = dynamic_cast<ExprList *>(initExpr);
|
||||
if (exprList == NULL) {
|
||||
// We have single expression; loop over the elements of the
|
||||
// array/vector and initialize each of them with it
|
||||
// individually.
|
||||
for (int i = 0; i < seqType->GetElementCount(); ++i) {
|
||||
llvm::Value *ptr = ctx->GetElementPtrInst(lvalue, 0, i, "offset");
|
||||
lInitSymbol(ptr, symName, seqType->GetElementType(), initExpr,
|
||||
ctx, pos);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Otherwise make sure that we have the same number of elements
|
||||
// in the { } initializer expression as we have in the
|
||||
// array/vector
|
||||
int nInits = exprList->exprs.size();
|
||||
if (nInits != seqType->GetElementCount()) {
|
||||
const char *actualType = dynamic_cast<const ArrayType *>(type) ?
|
||||
"Array" : "Vector";
|
||||
Error(initExpr->pos, "%s initializer for variable \"%s\" requires "
|
||||
"%d values; %d provided.", actualType, symName,
|
||||
seqType->GetElementCount(), nInits);
|
||||
}
|
||||
else {
|
||||
// And initialize each of the array/vector elements with
|
||||
// the corresponding expression from the ExprList
|
||||
for (int i = 0; i < nInits; ++i) {
|
||||
llvm::Value *ptr = ctx->GetElementPtrInst(lvalue, 0, i, "offset");
|
||||
lInitSymbol(ptr, symName, seqType->GetElementType(),
|
||||
exprList->exprs[i], ctx, pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
const CollectionType *collectionType =
|
||||
dynamic_cast<const CollectionType *>(type);
|
||||
if (collectionType != NULL) {
|
||||
std::string name;
|
||||
if (dynamic_cast<const StructType *>(type) != NULL)
|
||||
name = "struct";
|
||||
else if (dynamic_cast<const ArrayType *>(type) != NULL)
|
||||
name = "array";
|
||||
else if (dynamic_cast<const VectorType *>(type) != NULL)
|
||||
name = "vector";
|
||||
else
|
||||
FATAL("Unexpected CollectionType in lInitSymbol()");
|
||||
|
||||
// Structs can similarly be initialized in one of two ways; either with
|
||||
// a list of expressions in braces, one expression per struct member,
|
||||
// or with a single expression that is used to initialize all struct
|
||||
// members.
|
||||
const StructType *st = dynamic_cast<const StructType *>(type);
|
||||
if (st) {
|
||||
ExprList *exprList = dynamic_cast<ExprList *>(initExpr);
|
||||
if (exprList != NULL) {
|
||||
// The { ... } case; make sure we have the same number of
|
||||
// expressions in the ExprList as we have struct members
|
||||
int nInits = exprList->exprs.size();
|
||||
if (nInits != st->NumElements())
|
||||
Error(initExpr->pos,
|
||||
"Initializer for struct \"%s\" requires %d values; %d provided.",
|
||||
symName, st->NumElements(), nInits);
|
||||
else {
|
||||
// Initialize each struct member with the corresponding
|
||||
// value from the ExprList
|
||||
for (int i = 0; i < nInits; ++i) {
|
||||
llvm::Value *ep = ctx->GetElementPtrInst(lvalue, 0, i, "structelement");
|
||||
lInitSymbol(ep, symName, st->GetMemberType(i), exprList->exprs[i],
|
||||
ctx, pos);
|
||||
}
|
||||
if (nInits != collectionType->GetElementCount()) {
|
||||
Error(initExpr->pos, "Initializer for %s \"%s\" requires "
|
||||
"%d values; %d provided.", name.c_str(), symName,
|
||||
collectionType->GetElementCount(), nInits);
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize each element with the corresponding value from
|
||||
// the ExprList
|
||||
for (int i = 0; i < nInits; ++i) {
|
||||
llvm::Value *ep = ctx->GetElementPtrInst(lvalue, 0, i, "element");
|
||||
lInitSymbol(ep, symName, collectionType->GetElementType(i),
|
||||
exprList->exprs[i], ctx, pos);
|
||||
}
|
||||
}
|
||||
else if (initExpr->GetType()->IsNumericType() ||
|
||||
initExpr->GetType()->IsBoolType()) {
|
||||
// Otherwise initialize all of the struct elements in turn with
|
||||
// the initExpr.
|
||||
for (int i = 0; i < st->NumElements(); ++i) {
|
||||
llvm::Value *ep = ctx->GetElementPtrInst(lvalue, 0, i, "structelement");
|
||||
lInitSymbol(ep, symName, st->GetMemberType(i), initExpr, ctx, pos);
|
||||
// Otherwise initialize all of the elements in turn with the
|
||||
// initExpr.
|
||||
for (int i = 0; i < collectionType->GetElementCount(); ++i) {
|
||||
llvm::Value *ep = ctx->GetElementPtrInst(lvalue, 0, i, "element");
|
||||
lInitSymbol(ep, symName, collectionType->GetElementType(i),
|
||||
initExpr, ctx, pos);
|
||||
}
|
||||
}
|
||||
else {
|
||||
Error(initExpr->pos, "Can't assign type \"%s\" to \"%s\".",
|
||||
initExpr->GetType()->GetString().c_str(),
|
||||
st->GetString().c_str());
|
||||
collectionType->GetString().c_str());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
12
tests/broadcast-1.ispc
Normal file
12
tests/broadcast-1.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex];
|
||||
int br = broadcast(a, (uniform int)b-2);
|
||||
RET[programIndex] = br;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 4;
|
||||
}
|
||||
12
tests/broadcast.ispc
Normal file
12
tests/broadcast.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = broadcast(a, 2);
|
||||
RET[programIndex] = b;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3;
|
||||
}
|
||||
12
tests/rotate-1.ispc
Normal file
12
tests/rotate-1.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex];
|
||||
int rot = rotate(a, -1);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
|
||||
}
|
||||
13
tests/rotate-2.ispc
Normal file
13
tests/rotate-2.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex];
|
||||
uniform int delta = b - 6; // -1
|
||||
int rot = rotate(a, delta);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
|
||||
}
|
||||
13
tests/rotate-3.ispc
Normal file
13
tests/rotate-3.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int64 a = aFOO[programIndex];
|
||||
uniform int delta = b - 6; // -1
|
||||
int64 rot = rotate(a, delta);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
|
||||
}
|
||||
12
tests/rotate-4.ispc
Normal file
12
tests/rotate-4.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int64 a = aFOO[programIndex];
|
||||
int64 rot = rotate(a, -1);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
|
||||
}
|
||||
12
tests/rotate.ispc
Normal file
12
tests/rotate.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex];
|
||||
int rot = rotate(a, 2);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + 2) % programCount;
|
||||
}
|
||||
13
tests/shuffle-1.ispc
Normal file
13
tests/shuffle-1.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
int reverse = programCount - 1 - programIndex;
|
||||
float shuf = shuffle(a, reverse);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount - programIndex;
|
||||
}
|
||||
13
tests/shuffle-2.ispc
Normal file
13
tests/shuffle-2.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
int reverse = programCount - 1 - programIndex + (int)b - 5;
|
||||
float shuf = shuffle(a, reverse);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount - programIndex;
|
||||
}
|
||||
12
tests/shuffle.ispc
Normal file
12
tests/shuffle.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int32 a = aFOO[programIndex];
|
||||
int32 shuf = shuffle(a, 1);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2;
|
||||
}
|
||||
69
type.cpp
69
type.cpp
@@ -410,6 +410,14 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// SequentialType
|
||||
|
||||
const Type *SequentialType::GetElementType(int index) const {
|
||||
return GetElementType();
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// ArrayType
|
||||
|
||||
@@ -961,9 +969,10 @@ VectorType::getVectorMemoryCount() const {
|
||||
|
||||
StructType::StructType(const std::string &n, const std::vector<const Type *> &elts,
|
||||
const std::vector<std::string> &en,
|
||||
const std::vector<SourcePos> &ep,
|
||||
bool ic, bool iu, SourcePos p)
|
||||
: name(n), elementTypes(elts), elementNames(en), isUniform(iu), isConst(ic),
|
||||
pos(p) {
|
||||
: name(n), elementTypes(elts), elementNames(en), elementPositions(ep),
|
||||
isUniform(iu), isConst(ic), pos(p) {
|
||||
}
|
||||
|
||||
|
||||
@@ -1014,8 +1023,8 @@ StructType::GetAsVaryingType() const {
|
||||
if (IsVaryingType())
|
||||
return this;
|
||||
else
|
||||
return new StructType(name, elementTypes, elementNames, isConst,
|
||||
false, pos);
|
||||
return new StructType(name, elementTypes, elementNames, elementPositions,
|
||||
isConst, false, pos);
|
||||
}
|
||||
|
||||
|
||||
@@ -1024,8 +1033,8 @@ StructType::GetAsUniformType() const {
|
||||
if (IsUniformType())
|
||||
return this;
|
||||
else
|
||||
return new StructType(name, elementTypes, elementNames, isConst,
|
||||
true, pos);
|
||||
return new StructType(name, elementTypes, elementNames, elementPositions,
|
||||
isConst, true, pos);
|
||||
}
|
||||
|
||||
|
||||
@@ -1034,11 +1043,12 @@ StructType::GetSOAType(int width) const {
|
||||
std::vector<const Type *> et;
|
||||
// The SOA version of a structure is just a structure that holds SOAed
|
||||
// versions of its elements
|
||||
for (int i = 0; i < NumElements(); ++i) {
|
||||
const Type *t = GetMemberType(i);
|
||||
for (int i = 0; i < GetElementCount(); ++i) {
|
||||
const Type *t = GetElementType(i);
|
||||
et.push_back(t->GetSOAType(width));
|
||||
}
|
||||
return new StructType(name, et, elementNames, isConst, isUniform, pos);
|
||||
return new StructType(name, et, elementNames, elementPositions,
|
||||
isConst, isUniform, pos);
|
||||
}
|
||||
|
||||
|
||||
@@ -1047,8 +1057,8 @@ StructType::GetAsConstType() const {
|
||||
if (IsConstType())
|
||||
return this;
|
||||
else
|
||||
return new StructType(name, elementTypes, elementNames, true,
|
||||
isUniform, pos);
|
||||
return new StructType(name, elementTypes, elementNames,
|
||||
elementPositions, true, isUniform, pos);
|
||||
}
|
||||
|
||||
|
||||
@@ -1057,8 +1067,8 @@ StructType::GetAsNonConstType() const {
|
||||
if (!IsConstType())
|
||||
return this;
|
||||
else
|
||||
return new StructType(name, elementTypes, elementNames, false,
|
||||
isUniform, pos);
|
||||
return new StructType(name, elementTypes, elementNames, elementPositions,
|
||||
false, isUniform, pos);
|
||||
}
|
||||
|
||||
|
||||
@@ -1123,8 +1133,8 @@ StructType::GetCDeclaration(const std::string &n) const {
|
||||
const llvm::Type *
|
||||
StructType::LLVMType(llvm::LLVMContext *ctx) const {
|
||||
std::vector<const llvm::Type *> llvmTypes;
|
||||
for (int i = 0; i < NumElements(); ++i) {
|
||||
const Type *type = GetMemberType(i);
|
||||
for (int i = 0; i < GetElementCount(); ++i) {
|
||||
const Type *type = GetElementType(i);
|
||||
llvmTypes.push_back(type->LLVMType(ctx));
|
||||
}
|
||||
return llvm::StructType::get(*ctx, llvmTypes);
|
||||
@@ -1138,14 +1148,13 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
return llvm::DIType();
|
||||
#else
|
||||
uint64_t currentSize = 0, align = 0;
|
||||
llvm::DIFile diFile = pos.GetDIFile();
|
||||
|
||||
std::vector<llvm::Value *> elementLLVMTypes;
|
||||
// Walk through the elements of the struct; for each one figure out its
|
||||
// alignment and size, using that to figure out its offset w.r.t. the
|
||||
// start of the structure.
|
||||
for (unsigned int i = 0; i < elementTypes.size(); ++i) {
|
||||
llvm::DIType eltType = GetMemberType(i)->GetDIType(scope);
|
||||
llvm::DIType eltType = GetElementType(i)->GetDIType(scope);
|
||||
uint64_t eltAlign = eltType.getAlignInBits();
|
||||
uint64_t eltSize = eltType.getSizeInBits();
|
||||
|
||||
@@ -1159,12 +1168,19 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
currentSize += eltAlign - (currentSize % eltAlign);
|
||||
assert((currentSize == 0) || (currentSize % eltAlign) == 0);
|
||||
|
||||
// FIXME: we should pass this actual file/line number for the
|
||||
// member, not the position of the struct declaration
|
||||
llvm::DIFile diFile = elementPositions[i].GetDIFile();
|
||||
int line = elementPositions[i].first_line;
|
||||
#ifdef LLVM_2_9
|
||||
llvm::DIType fieldType =
|
||||
m->diBuilder->createMemberType(elementNames[i], diFile, pos.first_line,
|
||||
m->diBuilder->createMemberType(elementNames[i], diFile, line,
|
||||
eltSize, eltAlign, currentSize, 0,
|
||||
eltType);
|
||||
#else
|
||||
llvm::DIType fieldType =
|
||||
m->diBuilder->createMemberType(scope, elementNames[i], diFile,
|
||||
line, eltSize, eltAlign,
|
||||
currentSize, 0, eltType);
|
||||
#endif // LLVM_2_9
|
||||
elementLLVMTypes.push_back(fieldType);
|
||||
|
||||
currentSize += eltSize;
|
||||
@@ -1181,6 +1197,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
#else
|
||||
llvm::DIArray elements = m->diBuilder->getOrCreateArray(elementLLVMTypes);
|
||||
#endif
|
||||
llvm::DIFile diFile = pos.GetDIFile();
|
||||
return m->diBuilder->createStructType(scope, name, diFile, pos.first_line, currentSize,
|
||||
align, 0, elements);
|
||||
#endif // LLVM_2_8
|
||||
@@ -1188,7 +1205,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
|
||||
|
||||
const Type *
|
||||
StructType::GetMemberType(int i) const {
|
||||
StructType::GetElementType(int i) const {
|
||||
assert(i < (int)elementTypes.size());
|
||||
// If the struct is uniform qualified, then each member comes out with
|
||||
// the same type as in the original source file. If it's varying, then
|
||||
@@ -1200,7 +1217,7 @@ StructType::GetMemberType(int i) const {
|
||||
|
||||
|
||||
const Type *
|
||||
StructType::GetMemberType(const std::string &n) const {
|
||||
StructType::GetElementType(const std::string &n) const {
|
||||
for (unsigned int i = 0; i < elementNames.size(); ++i)
|
||||
if (elementNames[i] == n) {
|
||||
const Type *ret = isUniform ? elementTypes[i] :
|
||||
@@ -1212,7 +1229,7 @@ StructType::GetMemberType(const std::string &n) const {
|
||||
|
||||
|
||||
int
|
||||
StructType::GetMemberNumber(const std::string &n) const {
|
||||
StructType::GetElementNumber(const std::string &n) const {
|
||||
for (unsigned int i = 0; i < elementNames.size(); ++i)
|
||||
if (elementNames[i] == n)
|
||||
return i;
|
||||
@@ -1766,10 +1783,10 @@ Type::Equal(const Type *a, const Type *b) {
|
||||
const StructType *sta = dynamic_cast<const StructType *>(a);
|
||||
const StructType *stb = dynamic_cast<const StructType *>(b);
|
||||
if (sta && stb) {
|
||||
if (sta->NumElements() != stb->NumElements())
|
||||
if (sta->GetElementCount() != stb->GetElementCount())
|
||||
return false;
|
||||
for (int i = 0; i < sta->NumElements(); ++i)
|
||||
if (!Equal(sta->GetMemberType(i), stb->GetMemberType(i)))
|
||||
for (int i = 0; i < sta->GetElementCount(); ++i)
|
||||
if (!Equal(sta->GetElementType(i), stb->GetElementType(i)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
51
type.h
51
type.h
@@ -243,19 +243,42 @@ private:
|
||||
};
|
||||
|
||||
|
||||
/** @brief Abstract base class for tpyes that represent sequences
|
||||
/** @brief Abstract base class for types that represent collections of
|
||||
other types.
|
||||
|
||||
This is a common base class that StructTypes, ArrayTypes, and
|
||||
VectorTypes all inherit from.
|
||||
*/
|
||||
class CollectionType : public Type {
|
||||
public:
|
||||
/** Returns the total number of elements in the collection. */
|
||||
virtual int GetElementCount() const = 0;
|
||||
|
||||
/** Returns the type of the element given by index. (The value of
|
||||
index must be between 0 and GetElementCount()-1.
|
||||
*/
|
||||
virtual const Type *GetElementType(int index) const = 0;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Abstract base class for types that represent sequences
|
||||
|
||||
SequentialType is an abstract base class that adds interface routines
|
||||
for types that represent linear sequences of other types (i.e., arrays
|
||||
and vectors).
|
||||
*/
|
||||
class SequentialType : public Type {
|
||||
class SequentialType : public CollectionType {
|
||||
public:
|
||||
/** Returns the total number of elements in the sequence. */
|
||||
virtual int GetElementCount() const = 0;
|
||||
|
||||
/** Returns the Type of the elements that the sequence stores. */
|
||||
/** Returns the Type of the elements that the sequence stores; for
|
||||
SequentialTypes, all elements have the same type . */
|
||||
virtual const Type *GetElementType() const = 0;
|
||||
|
||||
/** SequentialType provides an implementation of this CollectionType
|
||||
method, just passing the query on to the GetElementType(void)
|
||||
implementation, since all of the elements of a SequentialType have
|
||||
the same type.
|
||||
*/
|
||||
const Type *GetElementType(int index) const;
|
||||
};
|
||||
|
||||
|
||||
@@ -439,10 +462,11 @@ private:
|
||||
|
||||
/** @brief Representation of a structure holding a number of members.
|
||||
*/
|
||||
class StructType : public Type {
|
||||
class StructType : public CollectionType {
|
||||
public:
|
||||
StructType(const std::string &name, const std::vector<const Type *> &elts,
|
||||
const std::vector<std::string> &eltNames, bool isConst,
|
||||
const std::vector<std::string> &eltNames,
|
||||
const std::vector<SourcePos> &eltPositions, bool isConst,
|
||||
bool isUniform, SourcePos pos);
|
||||
|
||||
bool IsUniformType() const;
|
||||
@@ -468,21 +492,21 @@ public:
|
||||
|
||||
/** Returns the type of the structure element with the given name (if any).
|
||||
Returns NULL if there is no such named element. */
|
||||
const Type *GetMemberType(const std::string &name) const;
|
||||
const Type *GetElementType(const std::string &name) const;
|
||||
|
||||
/** Returns the type of the i'th structure element. The value of \c i must
|
||||
be between 0 and NumElements()-1. */
|
||||
const Type *GetMemberType(int i) const;
|
||||
const Type *GetElementType(int i) const;
|
||||
|
||||
/** Returns which structure element number (starting from zero) that
|
||||
has the given name. If there is no such element, return -1. */
|
||||
int GetMemberNumber(const std::string &name) const;
|
||||
int GetElementNumber(const std::string &name) const;
|
||||
|
||||
/** Returns the name of the i'th element of the structure. */
|
||||
const std::string GetElementName(int i) const { return elementNames[i]; }
|
||||
|
||||
/** Returns the total number of elements in the structure. */
|
||||
int NumElements() const { return int(elementTypes.size()); }
|
||||
int GetElementCount() const { return int(elementTypes.size()); }
|
||||
|
||||
/** Returns the name of the structure type. (e.g. struct Foo -> "Foo".) */
|
||||
const std::string &GetStructName() const { return name; }
|
||||
@@ -501,6 +525,9 @@ private:
|
||||
*/
|
||||
const std::vector<const Type *> elementTypes;
|
||||
const std::vector<std::string> elementNames;
|
||||
/** Source file position at which each structure element declaration
|
||||
appeared. */
|
||||
const std::vector<SourcePos> elementPositions;
|
||||
const bool isUniform;
|
||||
const bool isConst;
|
||||
const SourcePos pos;
|
||||
|
||||
Reference in New Issue
Block a user