From 7e18f0e2473b9ccb940733b81de5ff15c001afe2 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 23 Mar 2012 16:09:32 -0700 Subject: [PATCH 001/173] Small improvement to float->half function in stdlib. Rewrite things to be able to do a float MINPS, for slightly better code on SSE2 (which has that but not an signed int min). SSE2 code now 23 instructions (vs 21 intrinsics). --- stdlib.ispc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/stdlib.ispc b/stdlib.ispc index f5984277..89c47cf7 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -3551,8 +3551,9 @@ static inline int16 float_to_half(float f) { // like recursive filters in DSP - not a typical half-float application. Whether // FP16 denormals are rare in practice, I don't know. Whatever slow path your HW // may or may not have for denormals, this may well hit it. - int32 fint2 = intbits(floatbits(fint & round_mask) * floatbits(magic)) - round_mask; - fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed + float fscale = floatbits(fint & round_mask) * floatbits(magic); + fscale = min(fscale, floatbits((31 << 23) - 0x1000)); + int32 fint2 = intbits(fscale) - round_mask; if (fint < f32infty) o = fint2 >> 13; // Take the bits! From 89bbceefee52d486dfe60c5f8f6cccac0a6766db Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sun, 25 Mar 2012 10:07:12 -0700 Subject: [PATCH 002/173] Make sure that foreach() statements never execute with an "all off" mask. --- ast.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ast.cpp b/ast.cpp index c89f00bb..7f601d04 100644 --- a/ast.cpp +++ b/ast.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2011, Intel Corporation + Copyright (c) 2011-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -32,8 +32,10 @@ */ /** @file ast.cpp - @brief -*/ + + @brief General functionality related to abstract syntax trees and + traversal of them. + */ #include "ast.h" #include "expr.h" @@ -363,6 +365,16 @@ lCheckAllOffSafety(ASTNode *node, void *data) { return false; } + if (dynamic_cast(node) != NULL) { + // foreach() statements also shouldn't be run with an all-off mask. + // Since they re-establish an 'all on' mask, this would be pretty + // unintuitive. (More generally, it's possibly a little strange to + // allow foreach() in the presence of any non-uniform control + // flow...) + *okPtr = false; + return false; + } + if (g->target.allOffMaskIsSafe == true) // Don't worry about memory accesses if we have a target that can // safely run them with the mask all off From 117f48a3311ba6fb753d68cdd10dd7e1221e8445 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sun, 25 Mar 2012 10:32:44 -0700 Subject: [PATCH 003/173] Don't include foreach stmts in cost estimates from EstimateCost(). Because they reestablish an 'all on' mask inside their body, it doesn't make sense to include their cost when evaluating whether it's worth re-establishing an 'all on' mask dynamically. (This does mean that EstimateCost()'s return value isn't the most obvious thing, but currently in all the cases where we need it, this is the more appropriate value to return.) --- ast.cpp | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/ast.cpp b/ast.cpp index 7f601d04..445ef4c7 100644 --- a/ast.cpp +++ b/ast.cpp @@ -307,19 +307,39 @@ TypeCheck(Stmt *stmt) { } +struct CostData { + CostData() { cost = foreachDepth = 0; } + + int cost; + int foreachDepth; +}; + + static bool -lCostCallback(ASTNode *node, void *c) { - int *cost = (int *)c; - *cost += node->EstimateCost(); +lCostCallbackPre(ASTNode *node, void *d) { + CostData *data = (CostData *)d; + if (dynamic_cast(node) != NULL) + ++data->foreachDepth; + if (data->foreachDepth == 0) + data->cost += node->EstimateCost(); return true; } +static ASTNode * +lCostCallbackPost(ASTNode *node, void *d) { + CostData *data = (CostData *)d; + if (dynamic_cast(node) != NULL) + --data->foreachDepth; + return node; +} + + int EstimateCost(ASTNode *root) { - int cost = 0; - WalkAST(root, lCostCallback, NULL, &cost); - return cost; + CostData data; + WalkAST(root, lCostCallbackPre, lCostCallbackPost, &data); + return data.cost; } From ca3100874fe9158fb5f1cd01888d6683d95a479a Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sun, 25 Mar 2012 11:35:28 -0700 Subject: [PATCH 004/173] Add FAQ about why varying values can't be passed to exported functions. --- docs/faq.rst | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index 2cdca136..3fc9f9e2 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -20,6 +20,7 @@ distribution. + `How can I generate a single binary executable with support for multiple instruction sets?`_ + `How can I determine at run-time which vector instruction set's instructions were selected to execute?`_ + `Is it possible to inline ispc functions in C/C++ code?`_ + + `Why is it illegal to pass "varying" values from C/C++ to ispc functions?`_ * Programming Techniques @@ -391,6 +392,48 @@ linking your applicaiton. ``-mattr=+avx`` flag to ``llc``.) +Why is it illegal to pass "varying" values from C/C++ to ispc functions? +------------------------------------------------------------------------ + +If any of the types in the parameter list to an exported function is +"varying" (including recursively, and members of structure types, etc.), +then ``ispc`` will issue an error and refuse to compile the function: + +:: + + % echo "export int add(int x) { return ++x; }" | ispc + :1:12: Error: Illegal to return a "varying" type from exported function "foo" + :1:20: Error: Varying parameter "x" is illegal in an exported function. + +While there's no fundamental reason why this isn't possible, recall the +definition of "varying" variables: they have one value for each program +instance in the gang. As such, the number of values and amount of storage +required to represent a varying variable depends on the gang size +(i.e. ``programCount``), which can have different values depending on the +compilation target. + +``ispc`` therefore prohibits passing "varying" values between the +application and the ``ispc`` program in order to prevent the +application-side code from depending on a particular gang size, in order to +encourage portability to different gang sizes. (A generally desirable +programming practice.) + +For cases where the size of data is actually fixed from the application +side, the value can be passed via a pointer to a short ``uniform`` array, +as follows: + +:: + + export void add4(uniform int ptr[4]) { + foreach (i = 0 ... 4) + ptr[i]++; + } + +On the 4-wide SSE instruction set, this compiles to a single vector add +instruction (and associated move instructions), while it still also +efficiently computes the correct result on 8-wide AVX targets. + + Programming Techniques ====================== From 1da2834b1e1a49711b191f39a1352e8d8eaec04a Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sun, 25 Mar 2012 13:10:12 -0700 Subject: [PATCH 005/173] Allow the last member of a struct to be an unsized/zero-length array. This enables the C truck of allocating a dynamic amount of storage for the struct in order to extend out the array to the desired length. --- decl.cpp | 19 ++++++++++--------- tests/struct-zero-len-array-member.ispc | 24 ++++++++++++++++++++++++ tests_errors/struct-unsized-array.ispc | 2 +- 3 files changed, 35 insertions(+), 10 deletions(-) create mode 100644 tests/struct-zero-len-array-member.ispc diff --git a/decl.cpp b/decl.cpp index f4382c8b..c881c7e7 100644 --- a/decl.cpp +++ b/decl.cpp @@ -739,15 +739,7 @@ GetStructTypesNamesPositions(const std::vector &sd, if (Type::Equal(sym->type, AtomicType::Void)) Error(d->pos, "\"void\" type illegal for struct member."); - const ArrayType *arrayType = - dynamic_cast(sym->type); - if (arrayType != NULL && arrayType->GetElementCount() == 0) { - Error(d->pos, "Unsized arrays aren't allowed in struct " - "definitions."); - elementTypes->push_back(NULL); - } - else - elementTypes->push_back(sym->type); + elementTypes->push_back(sym->type); if (seenNames.find(sym->name) != seenNames.end()) Error(d->pos, "Struct member \"%s\" has same name as a " @@ -759,4 +751,13 @@ GetStructTypesNamesPositions(const std::vector &sd, elementPositions->push_back(sym->pos); } } + + for (int i = 0; i < (int)elementTypes->size() - 1; ++i) { + const ArrayType *arrayType = + dynamic_cast((*elementTypes)[i]); + + if (arrayType != NULL && arrayType->GetElementCount() == 0) + Error((*elementPositions)[i], "Unsized arrays aren't allowed except " + "for the last member in a struct definition."); + } } diff --git a/tests/struct-zero-len-array-member.ispc b/tests/struct-zero-len-array-member.ispc new file mode 100644 index 00000000..83e91854 --- /dev/null +++ b/tests/struct-zero-len-array-member.ispc @@ -0,0 +1,24 @@ + +struct Foo { + float x; + float a[0]; +}; + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int nFloats = 3+programCount; + varying Foo * uniform ptr = (varying Foo * uniform)(uniform new varying int32[nFloats]); + memset(ptr, 0, nFloats*sizeof(int32)); + + for (uniform int i = 0; i < nFloats-1; ++i) + ptr->a[i] = i; + ptr->x = aFOO[programIndex]; + + RET[programIndex] = ptr->a[1+programIndex]; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1 + programIndex; +} diff --git a/tests_errors/struct-unsized-array.ispc b/tests_errors/struct-unsized-array.ispc index 77553eff..7238a351 100644 --- a/tests_errors/struct-unsized-array.ispc +++ b/tests_errors/struct-unsized-array.ispc @@ -1,4 +1,4 @@ -// Unsized arrays aren't allowed in struct definitions +// Unsized arrays aren't allowed except for the last member in a struct definition. struct Foo { float a[]; From d3a374e71c1927d84a9ab8f0fac521625122ec14 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sun, 25 Mar 2012 13:10:23 -0700 Subject: [PATCH 006/173] Fix malformed program crasher. --- type.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/type.cpp b/type.cpp index 0fb8817e..06ab8bd2 100644 --- a/type.cpp +++ b/type.cpp @@ -2014,6 +2014,10 @@ StructType::GetElementType(int i) const { Assert(variability != Variability::Unbound); Assert(i < (int)elementTypes.size()); const Type *ret = elementTypes[i]; + if (ret == NULL) { + Assert(m->errorCount > 0); + return NULL; + } // If the element has unbound variability, resolve its variability to // the struct type's variability From 388d0d2cfdff7c45742c0f1aecb4aaae7343fdbd Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sun, 25 Mar 2012 17:38:15 -0700 Subject: [PATCH 007/173] Add #include Fixes build on linux and windows. (Strangely, this didn't break the OSX build.) Issue #195. --- decl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/decl.cpp b/decl.cpp index c881c7e7..4ddc6219 100644 --- a/decl.cpp +++ b/decl.cpp @@ -44,6 +44,7 @@ #include "stmt.h" #include "expr.h" #include +#include #include static void From 95a8b6e5e894fe4891fb23ff2f00275d86f8055c Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sun, 25 Mar 2012 17:38:34 -0700 Subject: [PATCH 008/173] Fix & vs. && in logical test. Issue #196. --- decl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decl.cpp b/decl.cpp index 4ddc6219..748cffb1 100644 --- a/decl.cpp +++ b/decl.cpp @@ -579,7 +579,7 @@ Declarator::GetType(DeclSpecs *ds) const { const Type *type = GetType(baseType, ds); if (ds->declSpecList.size() > 0 && - type != NULL & + type != NULL && dynamic_cast(type) == NULL) { Error(pos, "__declspec specifiers for non-function type \"%s\" are " "not used.", type->GetString().c_str()); From 887882666185a68e45ac3743160e39f996512a3f Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 26 Mar 2012 09:37:59 -0700 Subject: [PATCH 009/173] Add non-short-circuiting and(), or(), select() to stdlib. --- docs/ispc.rst | 200 ++++++++++++++++++++++++++++++++------------------ stdlib.ispc | 119 ++++++++++++++++++++++++++++++ 2 files changed, 247 insertions(+), 72 deletions(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 4be80a18..0366c087 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -121,10 +121,14 @@ Contents: * `The ISPC Standard Library`_ + + `Basic Operations On Data`_ + + * `Logical and Selection Operations`_ + * `Bit Operations`_ + + `Math Functions`_ * `Basic Math Functions`_ - * `Bit-Level Operations`_ * `Transcendental Functions`_ * `Pseudo-Random Numbers`_ @@ -2150,6 +2154,12 @@ greater than or equal to ``NUM_ITEMS``. // ... } +Short-circuiting may impose some overhead in the generated code; for cases +where short-circuiting is undesirable due to performance impact, see +the section `Logical and Selection Operations`_, which introduces helper +functions in the standard library that provide these operations without +short-circuiting. + Dynamic Memory Allocation ------------------------- @@ -2827,6 +2837,123 @@ The ISPC Standard Library compiling ``ispc`` programs. (To disable the standard library, pass the ``--nostdlib`` command-line flag to the compiler.) +Basic Operations On Data +------------------------ + +Logical and Selection Operations +-------------------------------- + +Recall from `Expressions`_ that ``ispc`` short-circuits the evaluation of +logical and selection operators: given an expression like ``(index < count +&& array[index] == 0)``, then ``array[index] == 0`` is only evaluated if +``index < count`` is true. This property is useful for writing expressions +like the preceeding one, where the second expression may not be safe to +evaluate in some cases. + +This short-circuiting can impose overhead in the generated code; additional +operations are required to test the first value and to conditionally jump +over the code that evaluates the second value. The ``ispc`` compiler does +try to mitigate this cost by detecting cases where it is both safe and +inexpensive to evaluate both expressions, and skips short-circuiting in the +generated code in this case (without there being any programmer-visible +change in program behavior.) + +For cases where the compiler can't detect this case but the programmer +wants to avoid short-circuiting behavior, the standard library provides a +few helper functions. First, ``and()`` and ``or()`` provide +non-short-circuiting logical AND and OR operations. + +:: + + bool and(bool a, bool b) + bool or(bool a, bool b) + uniform bool and(uniform bool a, uniform bool b) + uniform bool or(uniform bool a, uniform bool b) + +And there are three variants of ``select()`` that select between two values +based on a boolean condition. These are the variants of ``select()`` for +the ``int8`` type: + +:: + + int8 select(bool v, int8 a, int8 b) + int8 select(uniform bool v, int8 a, int8 b) + uniform int8 select(uniform bool v, uniform int8 a, uniform int8 b) + +There are also variants for ``int16``, ``int32``, ``int64``, ``float``, and +``double`` types. + +Bit Operations +-------------- + +The various variants of ``popcnt()`` return the population count--the +number of bits set in the given value. + +:: + + uniform int popcnt(uniform int v) + int popcnt(int v) + uniform int popcnt(bool v) + + +A few functions determine how many leading bits in the given value are zero +and how many of the trailing bits are zero; there are also ``unsigned`` +variants of these functions and variants that take ``int64`` and ``unsigned +int64`` types. + +:: + + int32 count_leading_zeros(int32 v) + uniform int32 count_leading_zeros(uniform int32 v) + int32 count_trailing_zeros(int32 v) + uniform int32 count_trailing_zeros(uniform int32 v) + +Sometimes it's useful to convert a ``bool`` value to an integer using sign +extension so that the integer's bits are all on if the ``bool`` has the +value ``true`` (rather than just having the value one). The +``sign_extend()`` functions provide this functionality: + +:: + + int sign_extend(bool value) + uniform int sign_extend(uniform bool value) + +The ``intbits()`` and ``floatbits()`` functions can be used to implement +low-level floating-point bit twiddling. For example, ``intbits()`` returns +an ``unsigned int`` that is a bit-for-bit copy of the given ``float`` +value. (Note: it is **not** the same as ``(int)a``, but corresponds to +something like ``*((int *)&a)`` in C. + +:: + + float floatbits(unsigned int a); + uniform float floatbits(uniform unsigned int a); + unsigned int intbits(float a); + uniform unsigned int intbits(uniform float a); + + +The ``intbits()`` and ``floatbits()`` functions have no cost at runtime; +they just let the compiler know how to interpret the bits of the given +value. They make it possible to efficiently write functions that take +advantage of the low-level bit representation of floating-point values. + +For example, the ``abs()`` function in the standard library is implemented +as follows: + +:: + + float abs(float a) { + unsigned int i = intbits(a); + i &= 0x7fffffff; + return floatbits(i); + } + +This code directly clears the high order bit to ensure that the given +floating-point value is positive. This compiles down to a single ``andps`` +instruction when used with an IntelĀ® SSE target, for example. + + + Math Functions -------------- @@ -2919,77 +3046,6 @@ quite efficient.) uniform unsigned int low, uniform unsigned int high) -Bit-Level Operations --------------------- - - -The various variants of ``popcnt()`` return the population count--the -number of bits set in the given value. - -:: - - uniform int popcnt(uniform int v) - int popcnt(int v) - uniform int popcnt(bool v) - - -A few functions determine how many leading bits in the given value are zero -and how many of the trailing bits are zero; there are also ``unsigned`` -variants of these functions and variants that take ``int64`` and ``unsigned -int64`` types. - -:: - - int32 count_leading_zeros(int32 v) - uniform int32 count_leading_zeros(uniform int32 v) - int32 count_trailing_zeros(int32 v) - uniform int32 count_trailing_zeros(uniform int32 v) - -Sometimes it's useful to convert a ``bool`` value to an integer using sign -extension so that the integer's bits are all on if the ``bool`` has the -value ``true`` (rather than just having the value one). The -``sign_extend()`` functions provide this functionality: - -:: - - int sign_extend(bool value) - uniform int sign_extend(uniform bool value) - -The ``intbits()`` and ``floatbits()`` functions can be used to implement -low-level floating-point bit twiddling. For example, ``intbits()`` returns -an ``unsigned int`` that is a bit-for-bit copy of the given ``float`` -value. (Note: it is **not** the same as ``(int)a``, but corresponds to -something like ``*((int *)&a)`` in C. - -:: - - float floatbits(unsigned int a); - uniform float floatbits(uniform unsigned int a); - unsigned int intbits(float a); - uniform unsigned int intbits(uniform float a); - - -The ``intbits()`` and ``floatbits()`` functions have no cost at runtime; -they just let the compiler know how to interpret the bits of the given -value. They make it possible to efficiently write functions that take -advantage of the low-level bit representation of floating-point values. - -For example, the ``abs()`` function in the standard library is implemented -as follows: - -:: - - float abs(float a) { - unsigned int i = intbits(a); - i &= 0x7fffffff; - return floatbits(i); - } - -This code directly clears the high order bit to ensure that the given -floating-point value is positive. This compiles down to a single ``andps`` -instruction when used with an IntelĀ® SSE target, for example. - - Transcendental Functions ------------------------ diff --git a/stdlib.ispc b/stdlib.ispc index 89c47cf7..14d6f8cf 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -746,6 +746,125 @@ static inline void prefetch_nt(const void * varying ptr) { } } +/////////////////////////////////////////////////////////////////////////// +// non-short-circuiting alternatives + +__declspec(safe,cost1) +static inline bool and(bool a, bool b) { + return a && b; +} + +__declspec(safe,cost1) +static inline uniform bool and(uniform bool a, uniform bool b) { + return a && b; +} + +__declspec(safe,cost1) +static inline bool or(bool a, bool b) { + return a || b; +} + +__declspec(safe,cost1) +static inline uniform bool or(uniform bool a, uniform bool b) { + return a || b; +} + +__declspec(safe,cost1) +static inline int8 select(bool c, int8 a, int8 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline int8 select(uniform bool c, int8 a, int8 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline uniform int8 select(uniform bool c, uniform int8 a, + uniform int8 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline int16 select(bool c, int16 a, int16 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline int16 select(uniform bool c, int16 a, int16 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline uniform int16 select(uniform bool c, uniform int16 a, + uniform int16 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline int32 select(bool c, int32 a, int32 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline int32 select(uniform bool c, int32 a, int32 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline uniform int32 select(uniform bool c, uniform int32 a, + uniform int32 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline int64 select(bool c, int64 a, int64 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline int64 select(uniform bool c, int64 a, int64 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline uniform int64 select(uniform bool c, uniform int64 a, + uniform int64 b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline float select(bool c, float a, float b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline float select(uniform bool c, float a, float b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline uniform float select(uniform bool c, uniform float a, + uniform float b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline double select(bool c, double a, double b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline double select(uniform bool c, double a, double b) { + return c ? a : b; +} + +__declspec(safe,cost1) +static inline uniform double select(uniform bool c, uniform double a, + uniform double b) { + return c ? a : b; +} + /////////////////////////////////////////////////////////////////////////// // Horizontal ops / reductions From e15a1946c6f3b479c48610870da86df09d9daa70 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 08:02:39 -0700 Subject: [PATCH 010/173] Documentation: add ISPC_TARGET_AVX2 as a possible target #define --- docs/ispc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 0366c087..34f91afd 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -542,7 +542,7 @@ preprocessor runs: * - ISPC - 1 - Detecting that the ``ispc`` compiler is processing the file - * - ISPC_TARGET_{SSE2,SSE4,AVX} + * - ISPC_TARGET_{SSE2,SSE4,AVX,AVX2} - 1 - One of these will be set, depending on the compilation target. * - ISPC_POINTER_SIZE From 0ad8dbbfc93158cb5e31434cc9439016970348db Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 08:03:02 -0700 Subject: [PATCH 011/173] Fix documentation bug: atan2 arguments were reversed. Issue #203. --- docs/ispc.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 34f91afd..6fb60816 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3083,8 +3083,8 @@ The corresponding inverse functions are also available: uniform float acos(uniform float x) float atan(float x) uniform float atan(uniform float x) - float atan2(float x, float y) - uniform float atan2(uniform float x, uniform float y) + float atan2(float y, float x) + uniform float atan2(uniform float y, uniform float x) If both sine and cosine are needed, then the ``sincos()`` call computes both more efficiently than two calls to the respective individual From d8a8f3a99680b0e82e214dbbe784b616aa9d19f0 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 08:52:14 -0700 Subject: [PATCH 012/173] For symbols that are references, return uniform ptr type as lvalue type. Fixes issue #204. --- expr.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/expr.cpp b/expr.cpp index 17541012..cacbb312 100644 --- a/expr.cpp +++ b/expr.cpp @@ -7209,7 +7209,10 @@ SymbolExpr::GetLValueType() const { if (symbol == NULL) return NULL; - return PointerType::GetUniform(symbol->type); + if (dynamic_cast(symbol->type) != NULL) + return PointerType::GetUniform(symbol->type->GetReferenceTarget()); + else + return PointerType::GetUniform(symbol->type); } From 62cd3418ca142a888432c60e00457dc7a011f04c Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 09:04:45 -0700 Subject: [PATCH 013/173] Add test for the bug in issue #204. --- tests/ref-vec-param-index.ispc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/ref-vec-param-index.ispc diff --git a/tests/ref-vec-param-index.ispc b/tests/ref-vec-param-index.ispc new file mode 100644 index 00000000..70256dc1 --- /dev/null +++ b/tests/ref-vec-param-index.ispc @@ -0,0 +1,16 @@ + +export uniform int width() { return programCount; } + +float foo(uniform float<4> &vec) { + return vec[programIndex & 3]; +} + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform float<4> vec = { b, -1, 2*b, -b }; + RET[programIndex] = foo(vec); +} + +export void result(uniform float RET[]) { + uniform float a[4] = { 5, -1, 10, -5 }; + RET[programIndex] = a[programIndex & 3]; +} From ffe484c31e3c94a234257e0b2ffe3e55d85580b7 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 09:06:10 -0700 Subject: [PATCH 014/173] Implement simpler approach for header file struct emission. Rather than explicitly building a DAG and doing a topological sort, just traverse structs recursively and emit declarations for all of their dependent structs before emitting the original struct declaration. Not only is this simpler than the previous implementation, but it fixes a bug where we'd hit an assert if we had a struct with multiple contained members of another struct type. --- module.cpp | 135 ++++++++++++++++------------------------------------- 1 file changed, 41 insertions(+), 94 deletions(-) diff --git a/module.cpp b/module.cpp index 99da37ab..4555a3d5 100644 --- a/module.cpp +++ b/module.cpp @@ -755,109 +755,56 @@ Module::writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine, } -/** Small structure used in representing dependency graphs of structures - (i.e. given a StructType, which other structure types does it have as - elements). - */ -struct StructDAGNode { - StructDAGNode() - : visited(false) { } - - bool visited; - std::vector dependents; -}; - - -/** Visit a node for the topological sort. +/** Emits a declaration for the given struct to the given file. This + function first makes sure that declarations for any structs that are + (recursively) members of this struct are emitted first. */ static void -lVisitNode(const StructType *structType, - std::map &structToNode, - std::vector &sortedTypes) { - Assert(structToNode.find(structType) != structToNode.end()); - // Get the node that encodes the structs that this one is immediately - // dependent on. - StructDAGNode *node = structToNode[structType]; - if (node->visited) - return; +lEmitStructDecl(const StructType *st, std::vector *emittedStructs, + FILE *file) { + // Has this struct type already been declared? (This happens if it's a + // member of another struct for which we emitted a declaration + // previously.) + for (int i = 0; i < (int)emittedStructs->size(); ++i) + if (Type::EqualIgnoringConst(st, (*emittedStructs)[i])) + return; - node->visited = true; - // Depth-first traversal: visit all of the dependent nodes... - for (unsigned int i = 0; i < node->dependents.size(); ++i) - lVisitNode(node->dependents[i], structToNode, sortedTypes); - // ...and then add this one to the sorted list - sortedTypes.push_back(structType); + // Otherwise first make sure any contained structs have been declared. + for (int i = 0; i < st->GetElementCount(); ++i) { + const StructType *elementStructType = + dynamic_cast(st->GetElementType(i)); + if (elementStructType != NULL) + lEmitStructDecl(elementStructType, emittedStructs, file); + } + + // And now it's safe to declare this one + emittedStructs->push_back(st); + + fprintf(file, "struct %s", st->GetStructName().c_str()); + if (st->GetSOAWidth() > 0) + // This has to match the naming scheme in + // StructType::GetCDeclaration(). + fprintf(file, "_SOA%d", st->GetSOAWidth()); + fprintf(file, " {\n"); + + for (int i = 0; i < st->GetElementCount(); ++i) { + const Type *type = st->GetElementType(i)->GetAsNonConstType(); + std::string d = type->GetCDeclaration(st->GetElementName(i)); + fprintf(file, " %s;\n", d.c_str()); + } + fprintf(file, "};\n\n"); } - + /** Given a set of structures that we want to print C declarations of in a - header file, order them so that any struct that is used as a member - variable in another struct is printed before the struct that uses it - and then print them to the given file. + header file, emit their declarations. */ static void lEmitStructDecls(std::vector &structTypes, FILE *file) { - // First, build a DAG among the struct types where there is an edge - // from node A to node B if struct type A depends on struct type B - - // Records the struct types that have incoming edges in the - // DAG--i.e. the ones that one or more other struct types depend on - std::set hasIncomingEdges; - // Records the mapping between struct type pointers and the - // StructDagNode structures - std::map structToNode; - for (unsigned int i = 0; i < structTypes.size(); ++i) { - // For each struct type, create its DAG node and record the - // relationship between it and its node - const StructType *st = structTypes[i]; - StructDAGNode *node = new StructDAGNode; - structToNode[st] = node; - - for (int j = 0; j < st->GetElementCount(); ++j) { - const StructType *elementStructType = - dynamic_cast(st->GetElementType(j)); - // If this element is a struct type and we haven't already - // processed it for the current struct type, then upate th - // dependencies and record that this element type has other - // struct types that depend on it. - if (elementStructType != NULL && - (std::find(node->dependents.begin(), node->dependents.end(), - elementStructType) == node->dependents.end())) { - node->dependents.push_back(elementStructType); - hasIncomingEdges.insert(elementStructType); - } - } - } - - // Perform a topological sort of the struct types. Kick it off by - // visiting nodes with no incoming edges; i.e. the struct types that no - // other struct types depend on. - std::vector sortedTypes; - for (unsigned int i = 0; i < structTypes.size(); ++i) { - const StructType *structType = structTypes[i]; - if (hasIncomingEdges.find(structType) == hasIncomingEdges.end()) - lVisitNode(structType, structToNode, sortedTypes); - } - Assert(sortedTypes.size() == structTypes.size()); - - // And finally we can emit the struct declarations by going through the - // sorted ones in order. - for (unsigned int i = 0; i < sortedTypes.size(); ++i) { - const StructType *st = sortedTypes[i]; - fprintf(file, "struct %s", st->GetStructName().c_str()); - if (st->GetSOAWidth() > 0) - // This has to match the naming scheme in - // StructType::GetCDeclaration(). - fprintf(file, "_SOA%d", st->GetSOAWidth()); - fprintf(file, " {\n"); - - for (int j = 0; j < st->GetElementCount(); ++j) { - const Type *type = st->GetElementType(j)->GetAsNonConstType(); - std::string d = type->GetCDeclaration(st->GetElementName(j)); - fprintf(file, " %s;\n", d.c_str()); - } - fprintf(file, "};\n\n"); - } + std::vector emittedStructs; + for (unsigned int i = 0; i < structTypes.size(); ++i) + lEmitStructDecl(structTypes[i], &emittedStructs, file); + Assert(emittedStructs.size() == structTypes.size()); } From f9dc621ebe961f353ba4e85615e22ce95029b4dd Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 09:17:40 -0700 Subject: [PATCH 015/173] Fix bug when doing pointer math with varying integer offsets. We were incorrectly trying to type convert the varying offset to a uniform value, which in turn led to an incorrect compile-time error. Fixes issue #201. --- expr.cpp | 1 + tests/ptr-math-variability.ispc | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/ptr-math-variability.ispc diff --git a/expr.cpp b/expr.cpp index cacbb312..4f516382 100644 --- a/expr.cpp +++ b/expr.cpp @@ -2314,6 +2314,7 @@ BinaryExpr::TypeCheck() { if (type1->IsVaryingType()) { arg0 = TypeConvertExpr(arg0, type0->GetAsVaryingType(), "pointer addition"); + offsetType = offsetType->GetAsVaryingType(); Assert(arg0 != NULL); } diff --git a/tests/ptr-math-variability.ispc b/tests/ptr-math-variability.ispc new file mode 100644 index 00000000..4fa89206 --- /dev/null +++ b/tests/ptr-math-variability.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform float * uniform ptr = aFOO; + RET[programIndex] = *(ptr + programIndex) - 1; +} + +export void result(uniform float RET[]) { + RET[programIndex] = programIndex; +} From ca5c65d03207763bf043259bde3eb59d7cc1db03 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 09:33:43 -0700 Subject: [PATCH 016/173] Fix bugs where typecasting an expression to void would cause it to disappear. This was obviously problematic in cases where the expression was a function call or the like, with side effects. Fixes issue #199. --- expr.cpp | 20 ++++++++++++++++---- tests/typecast-void-funcall-1.ispc | 17 +++++++++++++++++ tests/typecast-void-funcall.ispc | 17 +++++++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 tests/typecast-void-funcall-1.ispc create mode 100644 tests/typecast-void-funcall.ispc diff --git a/expr.cpp b/expr.cpp index 4f516382..c1e6092f 100644 --- a/expr.cpp +++ b/expr.cpp @@ -6284,10 +6284,17 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { ctx->SetDebugPos(pos); const Type *toType = GetType(), *fromType = expr->GetType(); - if (!toType || !fromType || Type::Equal(toType, AtomicType::Void) || - Type::Equal(fromType, AtomicType::Void)) - // an error should have been issued elsewhere in this case + if (toType == NULL || fromType == NULL) { + Assert(m->errorCount > 0); return NULL; + } + + if (Type::Equal(toType, AtomicType::Void)) { + // emit the code for the expression in case it has side-effects but + // then we're done. + (void)expr->GetValue(ctx); + return NULL; + } const PointerType *fromPointerType = dynamic_cast(fromType); const PointerType *toPointerType = dynamic_cast(toType); @@ -6590,7 +6597,12 @@ TypeCastExpr::TypeCheck() { fromType = lDeconstifyType(fromType); toType = lDeconstifyType(toType); - if (fromType->IsVaryingType() && toType->IsUniformType()) { + // Anything can be cast to void... + if (Type::Equal(toType, AtomicType::Void)) + return this; + + if (Type::Equal(fromType, AtomicType::Void) || + fromType->IsVaryingType() && toType->IsUniformType()) { Error(pos, "Can't type cast from type \"%s\" to type \"%s\"", fromType->GetString().c_str(), toType->GetString().c_str()); return NULL; diff --git a/tests/typecast-void-funcall-1.ispc b/tests/typecast-void-funcall-1.ispc new file mode 100644 index 00000000..c9aa0ed7 --- /dev/null +++ b/tests/typecast-void-funcall-1.ispc @@ -0,0 +1,17 @@ + +export uniform int width() { return programCount; } + +float add(float a, float b, uniform float * uniform result) { + result[programIndex] = a+b; + return a+b; +} + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float a = aFOO[programIndex]; + float b = 0.; b = a; + (void)add(a, b, RET); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 2 + 2*programIndex; +} diff --git a/tests/typecast-void-funcall.ispc b/tests/typecast-void-funcall.ispc new file mode 100644 index 00000000..f2431ef9 --- /dev/null +++ b/tests/typecast-void-funcall.ispc @@ -0,0 +1,17 @@ + +export uniform int width() { return programCount; } + +uniform float add(float a, float b, uniform float * uniform result) { + result[programIndex] = a+b; + return 1; +} + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float a = aFOO[programIndex]; + float b = 0.; b = a; + (void)add(a, b, RET); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 2 + 2*programIndex; +} From 6e9fea377d5eaaccbf0e3539f25ab442cb970407 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 09:50:21 -0700 Subject: [PATCH 017/173] Type convert NULL to other pointer types for function call arguments. Fixes issue #198. --- expr.cpp | 4 +++- tests/ptr-null-func-arg.ispc | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 tests/ptr-null-func-arg.ispc diff --git a/expr.cpp b/expr.cpp index c1e6092f..fb363c58 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3409,7 +3409,9 @@ FunctionCallExpr::TypeCheck() { if (t == NULL) return NULL; argTypes.push_back(t); - argCouldBeNULL.push_back(lIsAllIntZeros(args->exprs[i])); + + argCouldBeNULL.push_back(lIsAllIntZeros(args->exprs[i]) || + dynamic_cast(args->exprs[i]) != NULL); } FunctionSymbolExpr *fse = dynamic_cast(func); diff --git a/tests/ptr-null-func-arg.ispc b/tests/ptr-null-func-arg.ispc new file mode 100644 index 00000000..fdd0cbab --- /dev/null +++ b/tests/ptr-null-func-arg.ispc @@ -0,0 +1,14 @@ + +export uniform int width() { return programCount; } + +bool bar(float * x) { + return (x != 0); +} + +export void f_f(uniform float RET[], uniform float aFOO[]) { + RET[programIndex] = bar(NULL); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} From 247775d1eceee9672071269780f0ff03a0346df7 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 10:07:54 -0700 Subject: [PATCH 018/173] Fix type conversion to allow array -> void * conversions. Fixes issue #193. --- expr.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/expr.cpp b/expr.cpp index fb363c58..5b162d15 100644 --- a/expr.cpp +++ b/expr.cpp @@ -253,6 +253,11 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, // "float foo[10]" -> "float * uniform foo", we have what's seemingly // a varying to uniform conversion (but not really) if (fromArrayType != NULL && toPointerType != NULL) { + // can convert any array to a void pointer (both uniform and + // varying). + if (PointerType::IsVoidPointer(toPointerType)) + goto typecast_ok; + // array to pointer to array element type const Type *eltType = fromArrayType->GetElementType(); if (toPointerType->GetBaseType()->IsConstType()) @@ -6411,7 +6416,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { // implicit array to pointer to first element Expr *arrayAsPtr = lArrayToPointer(expr); if (Type::EqualIgnoringConst(arrayAsPtr->GetType(), toPointerType) == false) { - Assert(Type::EqualIgnoringConst(arrayAsPtr->GetType()->GetAsVaryingType(), + Assert(PointerType::IsVoidPointer(toPointerType) || + Type::EqualIgnoringConst(arrayAsPtr->GetType()->GetAsVaryingType(), toPointerType) == true); arrayAsPtr = new TypeCastExpr(toPointerType, arrayAsPtr, pos); arrayAsPtr = ::TypeCheck(arrayAsPtr); From f8a39402a21f48a50aa9b9c0d5ab6420e8215940 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 13:25:11 -0700 Subject: [PATCH 019/173] Implement new, simpler function overload resolution algorithm. We now give each conversion a cost and then find the minimum sum of costs for all of the possible overloads. Fixes issue #194. --- expr.cpp | 443 ++++++++++-------------- expr.h | 22 +- tests/func-overload-max.ispc | 12 + tests_errors/func-param-mismatch-2.ispc | 2 +- tests_errors/func-param-mismatch-3.ispc | 2 +- tests_errors/func-param-mismatch.ispc | 2 +- 6 files changed, 214 insertions(+), 269 deletions(-) create mode 100644 tests/func-overload-max.ispc diff --git a/expr.cpp b/expr.cpp index 5b162d15..907f1a84 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3406,24 +3406,28 @@ FunctionCallExpr::TypeCheck() { return NULL; std::vector argTypes; - std::vector argCouldBeNULL; + std::vector argCouldBeNULL, argIsConstant; for (unsigned int i = 0; i < args->exprs.size(); ++i) { - if (args->exprs[i] == NULL) + Expr *expr = args->exprs[i]; + + if (expr == NULL) return NULL; - const Type *t = args->exprs[i]->GetType(); + const Type *t = expr->GetType(); if (t == NULL) return NULL; - argTypes.push_back(t); - argCouldBeNULL.push_back(lIsAllIntZeros(args->exprs[i]) || - dynamic_cast(args->exprs[i]) != NULL); + argTypes.push_back(t); + argCouldBeNULL.push_back(lIsAllIntZeros(expr) || + dynamic_cast(expr)); + argIsConstant.push_back(dynamic_cast(expr) || + dynamic_cast(expr)); } FunctionSymbolExpr *fse = dynamic_cast(func); if (fse != NULL) { // Regular function call - - if (fse->ResolveOverloads(args->pos, argTypes, &argCouldBeNULL) == false) + if (fse->ResolveOverloads(args->pos, argTypes, &argCouldBeNULL, + &argIsConstant) == false) return NULL; func = ::TypeCheck(fse); @@ -7403,282 +7407,183 @@ lPrintOverloadCandidates(SourcePos pos, const std::vector &funcs, } -/** Helper function used for function overload resolution: returns zero - cost if the call argument's type exactly matches the function argument - type (modulo a conversion to a const type if needed), otherwise reports - failure. - */ -static int -lExactMatch(const Type *callType, const Type *funcArgType) { - if (dynamic_cast(callType) == NULL) - callType = callType->GetAsNonConstType(); - if (dynamic_cast(funcArgType) != NULL && - dynamic_cast(callType) == NULL) - callType = new ReferenceType(callType); - - return Type::Equal(callType, funcArgType) ? 0 : -1; -} - - -/** Helper function used for function overload resolution: returns a cost - of 1 if the call argument type and the function argument type match, - modulo conversion to a reference type if needed. - */ -static int -lMatchIgnoringReferences(const Type *callType, const Type *funcArgType) { - int prev = lExactMatch(callType, funcArgType); - if (prev != -1) - return prev; - - callType = callType->GetReferenceTarget(); - if (funcArgType->IsConstType()) - callType = callType->GetAsConstType(); - - return Type::Equal(callType, - funcArgType->GetReferenceTarget()) ? 1 : -1; -} - -/** Helper function used for function overload resolution: returns a cost - of 1 if converting the argument to the call type only requires a type - conversion that won't lose information. Otherwise reports failure. -*/ -static int -lMatchWithTypeWidening(const Type *callType, const Type *funcArgType) { - int prev = lMatchIgnoringReferences(callType, funcArgType); - if (prev != -1) - return prev; - +/** Helper function used for function overload resolution: returns true if + converting the argument to the call type only requires a type + conversion that won't lose information. Otherwise return false. + */ +static bool +lIsMatchWithTypeWidening(const Type *callType, const Type *funcArgType) { const AtomicType *callAt = dynamic_cast(callType); const AtomicType *funcAt = dynamic_cast(funcArgType); if (callAt == NULL || funcAt == NULL) - return -1; + return false; if (callAt->IsUniformType() != funcAt->IsUniformType()) - return -1; + return false; switch (callAt->basicType) { case AtomicType::TYPE_BOOL: - return 1; + return true; case AtomicType::TYPE_INT8: case AtomicType::TYPE_UINT8: - return (funcAt->basicType != AtomicType::TYPE_BOOL) ? 1 : -1; + return (funcAt->basicType != AtomicType::TYPE_BOOL); case AtomicType::TYPE_INT16: case AtomicType::TYPE_UINT16: return (funcAt->basicType != AtomicType::TYPE_BOOL && funcAt->basicType != AtomicType::TYPE_INT8 && - funcAt->basicType != AtomicType::TYPE_UINT8) ? 1 : -1; + funcAt->basicType != AtomicType::TYPE_UINT8); case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: return (funcAt->basicType == AtomicType::TYPE_INT32 || funcAt->basicType == AtomicType::TYPE_UINT32 || funcAt->basicType == AtomicType::TYPE_INT64 || - funcAt->basicType == AtomicType::TYPE_UINT64) ? 1 : -1; + funcAt->basicType == AtomicType::TYPE_UINT64); case AtomicType::TYPE_FLOAT: - return (funcAt->basicType == AtomicType::TYPE_DOUBLE) ? 1 : -1; + return (funcAt->basicType == AtomicType::TYPE_DOUBLE); case AtomicType::TYPE_INT64: case AtomicType::TYPE_UINT64: return (funcAt->basicType == AtomicType::TYPE_INT64 || - funcAt->basicType == AtomicType::TYPE_UINT64) ? 1 : -1; + funcAt->basicType == AtomicType::TYPE_UINT64); case AtomicType::TYPE_DOUBLE: - return -1; + return false; default: FATAL("Unhandled atomic type"); - return -1; + return false; } } -/** Helper function used for function overload resolution: returns a cost - of 1 if the call argument type and the function argument type match if - we only do a uniform -> varying type conversion but otherwise have - exactly the same type. +/** Helper function used for function overload resolution: returns true if + the call argument type and the function argument type match if we only + do a uniform -> varying type conversion but otherwise have exactly the + same type. */ -static int -lMatchIgnoringUniform(const Type *callType, const Type *funcArgType) { - int prev = lMatchWithTypeWidening(callType, funcArgType); - if (prev != -1) - return prev; - - if (dynamic_cast(callType) == NULL) - callType = callType->GetAsNonConstType(); - +static bool +lIsMatchWithUniformToVarying(const Type *callType, const Type *funcArgType) { return (callType->IsUniformType() && funcArgType->IsVaryingType() && - Type::Equal(callType->GetAsVaryingType(), funcArgType)) ? 1 : -1; + Type::EqualIgnoringConst(callType->GetAsVaryingType(), funcArgType)); } -/** Helper function used for function overload resolution: returns a cost - of 1 if we can type convert from the call argument type to the function +/** Helper function used for function overload resolution: returns true if + we can type convert from the call argument type to the function argument type, but without doing a uniform -> varying conversion. */ -static int -lMatchWithTypeConvSameVariability(const Type *callType, - const Type *funcArgType) { - int prev = lMatchIgnoringUniform(callType, funcArgType); - if (prev != -1) - return prev; - - if (CanConvertTypes(callType, funcArgType) && - (callType->IsUniformType() == funcArgType->IsUniformType())) - return 1; - else - return -1; +static bool +lIsMatchWithTypeConvSameVariability(const Type *callType, + const Type *funcArgType) { + return (CanConvertTypes(callType, funcArgType) && + (callType->GetVariability() == funcArgType->GetVariability())); } -/** Helper function used for function overload resolution: returns a cost - of 1 if there is any type conversion that gets us from the caller - argument type to the function argument type. +/* Returns the set of function overloads that are potential matches, given + argCount values being passed as arguments to the function call. */ -static int -lMatchWithTypeConv(const Type *callType, const Type *funcArgType) { - int prev = lMatchWithTypeConvSameVariability(callType, funcArgType); - if (prev != -1) - return prev; - - return CanConvertTypes(callType, funcArgType) ? 0 : -1; -} - - -/** Given a set of potential matching functions and their associated cost, - return the one with the lowest cost, if unique. Otherwise, if multiple - functions match with the same cost, return NULL. - */ -static Symbol * -lGetBestMatch(std::vector > &matches) { - Assert(matches.size() > 0); - int minCost = matches[0].first; - - for (unsigned int i = 1; i < matches.size(); ++i) - minCost = std::min(minCost, matches[i].first); - - Symbol *match = NULL; - for (unsigned int i = 0; i < matches.size(); ++i) { - if (matches[i].first == minCost) { - if (match != NULL) - // multiple things had the same cost - return NULL; - else - match = matches[i].second; - } - } - return match; -} - - -/** See if we can find a single function from the set of overload options - based on the predicate function passed in. Returns true if no more - tries should be made to find a match, either due to success from - finding a single overloaded function that matches or failure due to - finding multiple ambiguous matches. - */ -bool -FunctionSymbolExpr::tryResolve(int (*matchFunc)(const Type *, const Type *), - SourcePos argPos, - const std::vector &callTypes, - const std::vector *argCouldBeNULL) { - const char *funName = candidateFunctions.front()->name.c_str(); - - std::vector > matches; - std::vector::iterator iter; - for (iter = candidateFunctions.begin(); - iter != candidateFunctions.end(); ++iter) { - // Loop over the set of candidate functions and try each one - Symbol *candidateFunction = *iter; +std::vector +FunctionSymbolExpr::getCandidateFunctions(int argCount) const { + std::vector ret; + for (int i = 0; i < (int)candidateFunctions.size(); ++i) { const FunctionType *ft = - dynamic_cast(candidateFunction->type); + dynamic_cast(candidateFunctions[i]->type); Assert(ft != NULL); // There's no way to match if the caller is passing more arguments // than this function instance takes. - if ((int)callTypes.size() > ft->GetNumParameters()) + if (argCount > ft->GetNumParameters()) continue; - int i; - // Note that we're looping over the caller arguments, not the - // function arguments; it may be ok to have more arguments to the - // function than are passed, if the function has default argument - // values. This case is handled below. - int cost = 0; - for (i = 0; i < (int)callTypes.size(); ++i) { - // This may happen if there's an error earlier in compilation. - // It's kind of a silly to redundantly discover this for each - // potential match versus detecting this earlier in the - // matching process and just giving up. - const Type *paramType = ft->GetParameterType(i); + // Not enough arguments, and no default argument value to save us + if (argCount < ft->GetNumParameters() && + ft->GetParameterDefault(argCount) == NULL) + continue; - if (callTypes[i] == NULL || paramType == NULL || - dynamic_cast(callTypes[i]) != NULL) - return false; + // Success + ret.push_back(candidateFunctions[i]); + } + return ret; +} - int argCost = matchFunc(callTypes[i], paramType); - if (argCost == -1) { - if (argCouldBeNULL != NULL && (*argCouldBeNULL)[i] == true && - dynamic_cast(paramType) != NULL) - // If the passed argument value is zero and this is a - // pointer type, then it can convert to a NULL value of - // that pointer type. - argCost = 0; - else - // If the predicate function returns -1, we have failed no - // matter what else happens, so we stop trying - break; - } - cost += argCost; - } - if (i == (int)callTypes.size()) { - // All of the arguments matched! - if (i == ft->GetNumParameters()) - // And we have exactly as many arguments as the function - // wants, so we're done. - matches.push_back(std::make_pair(cost, candidateFunction)); - else if (i < ft->GetNumParameters() && - ft->GetParameterDefault(i) != NULL) - // Otherwise we can still make it if there are default - // arguments for the rest of the arguments! Because in - // Module::AddFunction() we have verified that once the - // default arguments start, then all of the following ones - // have them as well. Therefore, we just need to check if - // the arg we stopped at has a default value and we're - // done. - matches.push_back(std::make_pair(cost, candidateFunction)); - // otherwise, we don't have a match + +/** This function computes the value of a cost function that represents the + cost of calling a function of the given type with arguments of the + given types. If it's not possible to call the function, regardless of + any type conversions applied, a cost of -1 is returned. + */ +int +FunctionSymbolExpr::computeOverloadCost(const FunctionType *ftype, + const std::vector &argTypes, + const std::vector *argCouldBeNULL, + const std::vector *argIsConstant) { + int costSum = 0; + + // In computing the cost function, we only worry about the actual + // argument types--using function default parameter values is free for + // the purposes here... + for (int i = 0; i < (int)argTypes.size(); ++i) { + // The cost imposed by this argument will be a multiple of + // costScale, which has a value set so that for each of the cost + // buckets, even if all of the function arguments undergo the next + // lower-cost conversion, the sum of their costs will be less than + // a single instance of the next higher-cost conversion. + int costScale = argTypes.size() + 1; + + const Type *fargType = ftype->GetParameterType(i); + const Type *callType = argTypes[i]; + + // For convenience, normalize to non-const types (except for + // references, where const-ness matters). For all other types, + // we're passing by value anyway, so const doesn't matter. + if (dynamic_cast(callType) == NULL) + callType = callType->GetAsNonConstType(); + if (dynamic_cast(fargType) == NULL) + fargType = fargType->GetAsNonConstType(); + + if (Type::Equal(callType, fargType)) + // Perfect match: no cost + costSum += 0; + else if (argCouldBeNULL && (*argCouldBeNULL)[i] && + dynamic_cast(fargType) != NULL) + // Passing NULL to a pointer-typed parameter is also a no-cost + // operation + costSum += 0; + else { + // If the argument is a compile-time constant, we'd like to + // count the cost of various conversions as much lower than the + // cost if it wasn't--so scale up the cost when this isn't the + // case.. + if (argIsConstant == NULL || (*argIsConstant)[i] == false) + costScale *= 32; + + if (Type::Equal(callType, fargType)) + // Exact match (after dealing with references, above) + costSum += 1 * costScale; + else if (lIsMatchWithTypeWidening(callType, fargType)) + costSum += 2 * costScale; + else if (lIsMatchWithUniformToVarying(callType, fargType)) + costSum += 4 * costScale; + else if (lIsMatchWithTypeConvSameVariability(callType, fargType)) + costSum += 8 * costScale; + else if (CanConvertTypes(callType, fargType)) + costSum += 16 * costScale; + else + // Failure--no type conversion possible... + return -1; } } - if (matches.size() == 0) - return false; - else if ((matchingFunc = lGetBestMatch(matches)) != NULL) - // We have a match! - return true; - else { - Error(pos, "Multiple overloaded instances of function \"%s\" matched.", - funName); - - // select the matches that have the lowest cost - std::vector bestMatches; - int minCost = matches[0].first; - for (unsigned int i = 1; i < matches.size(); ++i) - minCost = std::min(minCost, matches[i].first); - for (unsigned int i = 0; i < matches.size(); ++i) - if (matches[i].first == minCost) - bestMatches.push_back(matches[i].second); - - // And print a useful error message - lPrintOverloadCandidates(argPos, bestMatches, callTypes, argCouldBeNULL); - - // Stop trying to find more matches after an ambigious set of - // matches. - return true; - } + return costSum; } bool FunctionSymbolExpr::ResolveOverloads(SourcePos argPos, const std::vector &argTypes, - const std::vector *argCouldBeNULL) { + const std::vector *argCouldBeNULL, + const std::vector *argIsConstant) { + const char *funName = candidateFunctions.front()->name.c_str(); + triedToResolve = true; // Functions with names that start with "__" should only be various @@ -7689,45 +7594,67 @@ FunctionSymbolExpr::ResolveOverloads(SourcePos argPos, // called. bool exactMatchOnly = (name.substr(0,2) == "__"); - // Is there an exact match that doesn't require any argument type - // conversion (other than converting type -> reference type)? - if (tryResolve(lExactMatch, argPos, argTypes, argCouldBeNULL)) - return true; + // First, find the subset of overload candidates that take the same + // number of arguments as have parameters (including functions that + // take more arguments but have defaults starting no later than after + // our last parameter). + std::vector actualCandidates = + getCandidateFunctions(argTypes.size()); - if (exactMatchOnly == false) { - // Try to find a single match ignoring references - if (tryResolve(lMatchIgnoringReferences, argPos, argTypes, - argCouldBeNULL)) - return true; + int bestMatchCost = 1<<30; + std::vector matches; + std::vector candidateCosts; - // Try to find an exact match via type widening--i.e. int8 -> - // int16, etc.--things that don't lose data. - if (tryResolve(lMatchWithTypeWidening, argPos, argTypes, argCouldBeNULL)) - return true; + if (actualCandidates.size() == 0) + goto failure; - // Next try to see if there's a match via just uniform -> varying - // promotions. - if (tryResolve(lMatchIgnoringUniform, argPos, argTypes, argCouldBeNULL)) - return true; - - // Try to find a match via type conversion, but don't change - // unif->varying - if (tryResolve(lMatchWithTypeConvSameVariability, argPos, argTypes, - argCouldBeNULL)) - return true; - - // Last chance: try to find a match via arbitrary type conversion. - if (tryResolve(lMatchWithTypeConv, argPos, argTypes, argCouldBeNULL)) - return true; + // Compute the cost for calling each of the candidate functions + for (int i = 0; i < (int)actualCandidates.size(); ++i) { + const FunctionType *ft = + dynamic_cast(actualCandidates[i]->type); + Assert(ft != NULL); + candidateCosts.push_back(computeOverloadCost(ft, argTypes, + argCouldBeNULL, + argIsConstant)); } - // failure :-( - const char *funName = candidateFunctions.front()->name.c_str(); - Error(pos, "Unable to find matching overload for call to function \"%s\"%s.", - funName, exactMatchOnly ? " only considering exact matches" : ""); - lPrintOverloadCandidates(argPos, candidateFunctions, argTypes, - argCouldBeNULL); - return false; + // Find the best cost, and then the candidate or candidates that have + // that cost. + for (int i = 0; i < (int)candidateCosts.size(); ++i) { + if (candidateCosts[i] != -1 && candidateCosts[i] < bestMatchCost) + bestMatchCost = candidateCosts[i]; + } + // None of the candidates matched + if (bestMatchCost == (1<<30)) + goto failure; + for (int i = 0; i < (int)candidateCosts.size(); ++i) { + if (candidateCosts[i] == bestMatchCost) + matches.push_back(actualCandidates[i]); + } + + if (matches.size() == 1) { + // Only one match: success + matchingFunc = matches[0]; + return true; + } + else if (matches.size() > 1) { + // Multiple matches: ambiguous + Error(pos, "Multiple overloaded functions matched call to function " + "\"%s\"%s.", funName, + exactMatchOnly ? " only considering exact matches" : ""); + lPrintOverloadCandidates(argPos, matches, argTypes, argCouldBeNULL); + return false; + } + else { + // No matches at all + failure: + Error(pos, "Unable to find any matching overload for call to function " + "\"%s\"%s.", funName, + exactMatchOnly ? " only considering exact matches" : ""); + lPrintOverloadCandidates(argPos, candidateFunctions, argTypes, + argCouldBeNULL); + return false; + } } diff --git a/expr.h b/expr.h index 5c59ae83..e7461a1a 100644 --- a/expr.h +++ b/expr.h @@ -651,20 +651,26 @@ public: function overloading, this method resolves which actual function the arguments match best. If the argCouldBeNULL parameter is non-NULL, each element indicates whether the corresponding argument - is the number zero, indicating that it could be a NULL pointer. - This parameter may be NULL (for cases where overload resolution is - being done just given type information without the parameter - argument expressions being available. It returns true on success. + is the number zero, indicating that it could be a NULL pointer, and + if argIsConstant is non-NULL, each element indicates whether the + corresponding argument is a compile-time constant value. Both of + these parameters may be NULL (for cases where overload resolution + is being done just given type information without the parameter + argument expressions being available. This function returns true + on success. */ bool ResolveOverloads(SourcePos argPos, const std::vector &argTypes, - const std::vector *argCouldBeNULL = NULL); + const std::vector *argCouldBeNULL = NULL, + const std::vector *argIsConstant = NULL); Symbol *GetMatchingFunction(); private: - bool tryResolve(int (*matchFunc)(const Type *, const Type *), - SourcePos argPos, const std::vector &argTypes, - const std::vector *argCouldBeNULL); + std::vector getCandidateFunctions(int argCount) const; + static int computeOverloadCost(const FunctionType *ftype, + const std::vector &argTypes, + const std::vector *argCouldBeNULL, + const std::vector *argIsConstant); /** Name of the function that is being called. */ std::string name; diff --git a/tests/func-overload-max.ispc b/tests/func-overload-max.ispc new file mode 100644 index 00000000..37360030 --- /dev/null +++ b/tests/func-overload-max.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float a = 1. / aFOO[programIndex]; + RET[programIndex] = max(0, a); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1. / (1+programIndex); +} diff --git a/tests_errors/func-param-mismatch-2.ispc b/tests_errors/func-param-mismatch-2.ispc index 09b27064..63c0239a 100644 --- a/tests_errors/func-param-mismatch-2.ispc +++ b/tests_errors/func-param-mismatch-2.ispc @@ -1,4 +1,4 @@ -// Unable to find matching overload for call to function +// Unable to find any matching overload for call to function void foo(int x); diff --git a/tests_errors/func-param-mismatch-3.ispc b/tests_errors/func-param-mismatch-3.ispc index 7e5f2b99..cb34c8a7 100644 --- a/tests_errors/func-param-mismatch-3.ispc +++ b/tests_errors/func-param-mismatch-3.ispc @@ -1,4 +1,4 @@ -// Unable to find matching overload for call to function +// Unable to find any matching overload for call to function void foo(int x); diff --git a/tests_errors/func-param-mismatch.ispc b/tests_errors/func-param-mismatch.ispc index c2bac94f..44a50903 100644 --- a/tests_errors/func-param-mismatch.ispc +++ b/tests_errors/func-param-mismatch.ispc @@ -1,4 +1,4 @@ -// Unable to find matching overload for call to function +// Unable to find any matching overload for call to function void foo(); From 4690a678c1a29e71504dcd48eecbffd9edf9d368 Mon Sep 17 00:00:00 2001 From: Nipunn Koorapati Date: Wed, 28 Mar 2012 02:40:06 -0400 Subject: [PATCH 020/173] Added parentheses around a || b && c statement in TypeCastExpr to placate the compiler warning and make the code easier to understand. --- expr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expr.cpp b/expr.cpp index 907f1a84..78a89b6a 100644 --- a/expr.cpp +++ b/expr.cpp @@ -6614,7 +6614,7 @@ TypeCastExpr::TypeCheck() { return this; if (Type::Equal(fromType, AtomicType::Void) || - fromType->IsVaryingType() && toType->IsUniformType()) { + (fromType->IsVaryingType() && toType->IsUniformType())) { Error(pos, "Can't type cast from type \"%s\" to type \"%s\"", fromType->GetString().c_str(), toType->GetString().c_str()); return NULL; From 8368ba853945ab50fe84208d5b944317dc974b11 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 27 Mar 2012 17:01:31 -0700 Subject: [PATCH 021/173] Add missing checks for NULL current basic block in stmt code. Fixes crashes if, for example, these statement types appeared after early returns in the middle of functions. --- stmt.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/stmt.cpp b/stmt.cpp index 9aad4291..d7098ff0 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -2228,6 +2228,9 @@ GotoStmt::GotoStmt(const char *l, SourcePos gotoPos, SourcePos ip) void GotoStmt::EmitCode(FunctionEmitContext *ctx) const { + if (!ctx->GetCurrentBasicBlock()) + return; + if (ctx->VaryingCFDepth() > 0) { Error(pos, "\"goto\" statements are only legal under \"uniform\" " "control flow."); @@ -2478,6 +2481,9 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) { */ void PrintStmt::EmitCode(FunctionEmitContext *ctx) const { + if (!ctx->GetCurrentBasicBlock()) + return; + ctx->SetDebugPos(pos); // __do_print takes 5 arguments; we'll get them stored in the args[] array @@ -2583,6 +2589,9 @@ AssertStmt::AssertStmt(const std::string &msg, Expr *e, SourcePos p) void AssertStmt::EmitCode(FunctionEmitContext *ctx) const { + if (!ctx->GetCurrentBasicBlock()) + return; + if (expr == NULL) return; const Type *type = expr->GetType(); @@ -2658,6 +2667,9 @@ DeleteStmt::DeleteStmt(Expr *e, SourcePos p) void DeleteStmt::EmitCode(FunctionEmitContext *ctx) const { + if (!ctx->GetCurrentBasicBlock()) + return; + const Type *exprType; if (expr == NULL || ((exprType = expr->GetType()) == NULL)) { Assert(m->errorCount > 0); From 013a3e756738e1ac1f3c1ba7785e480937cb061d Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 28 Mar 2012 08:52:09 -0700 Subject: [PATCH 022/173] Support concatenation of adjacent string literals in the parser. Fixes issue #208. --- parse.yy | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/parse.yy b/parse.yy index 1fa8336f..cffdc0c9 100644 --- a/parse.yy +++ b/parse.yy @@ -250,6 +250,12 @@ struct ForeachDimension { string_constant : TOKEN_STRING_LITERAL { $$ = new std::string(*yylval.stringVal); } + | string_constant TOKEN_STRING_LITERAL + { + std::string s = *((std::string *)$1); + s += *yylval.stringVal; + $$ = new std::string(s); + } ; primary_expression From 3270e2bf5aac2035019a18ba9dd79f2caeed78c6 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 28 Mar 2012 09:20:06 -0700 Subject: [PATCH 023/173] Call CPUID to more reliably detect level of SSE/AVX that the host supports. Fixes, I hope, issue #205. --- ispc.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/ispc.cpp b/ispc.cpp index 49623be4..050f8a11 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -70,6 +70,49 @@ Module *m; /////////////////////////////////////////////////////////////////////////// // Target +#ifndef ISPC_IS_WINDOWS +static void __cpuid(int info[4], int infoType) { + __asm__ __volatile__ ("cpuid" + : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3]) + : "0" (infoType)); +} + +/* Save %ebx in case it's the PIC register */ +static void __cpuidex(int info[4], int level, int count) { + __asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t" + "cpuid\n\t" + "xchg{l}\t{%%}ebx, %1\n\t" + : "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3]) + : "0" (level), "2" (count)); +} +#endif // ISPC_IS_WINDOWS + + +static const char * +lGetSystemISA() { + int info[4]; + __cpuid(info, 1); + + if ((info[2] & (1 << 28)) != 0) { + // AVX1 for sure. Do we have AVX2? + // Call cpuid with eax=7, ecx=0 + __cpuidex(info, 7, 0); + if ((info[1] & (1 << 5)) != 0) + return "avx2"; + else + return "avx"; + } + else if ((info[2] & (1 << 19)) != 0) + return "sse4"; + else if ((info[3] & (1 << 26)) != 0) + return "sse2"; + else { + fprintf(stderr, "Unable to detect supported SSE/AVX ISA. Exiting.\n"); + exit(1); + } +} + + bool Target::GetTarget(const char *arch, const char *cpu, const char *isa, bool pic, Target *t) { @@ -85,15 +128,9 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->cpu = cpu; if (isa == NULL) { - if (!strcasecmp(cpu, "atom")) - isa = "sse2"; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) - else if (!strcasecmp(cpu, "sandybridge") || - !strcasecmp(cpu, "corei7-avx")) - isa = "avx"; -#endif // LLVM_3_0 - else - isa = "sse4"; + isa = lGetSystemISA(); + fprintf(stderr, "Notice: no --target specified on command-line. Using " + "system ISA \"%s\".\n", isa); } if (arch == NULL) arch = "x86-64"; From d0d9aae96800844756a6784363ec510aebb69a70 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 28 Mar 2012 10:10:51 -0700 Subject: [PATCH 024/173] Fix parser so that spaces aren't needed around "..." in foreach statements. Issue #207. --- lex.ll | 121 ++++++++++++++++++++++++++++++++++--------------------- parse.yy | 32 +++++++++++++-- 2 files changed, 104 insertions(+), 49 deletions(-) diff --git a/lex.ll b/lex.ll index 4130372f..96c19d1d 100644 --- a/lex.ll +++ b/lex.ll @@ -43,6 +43,7 @@ #include static uint64_t lParseBinary(const char *ptr, SourcePos pos, char **endPtr); +static int lParseInteger(bool dotdotdot); static void lCComment(SourcePos *); static void lCppComment(SourcePos *); static void lHandleCppHash(SourcePos *); @@ -322,7 +323,8 @@ inline int ispcRand() { %option nounistd WHITESPACE [ \t\r]+ -INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[kMG]? +INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[uUlL]*[kMG]?[uUlL]* +INT_NUMBER_DOTDOTDOT (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[uUlL]*[kMG]?[uUlL]*\.\.\. FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?) HEX_FLOAT_NUMBER (0x[01](\.[0-9a-fA-F]*)?p[-+]?[0-9]+[fF]?) @@ -406,53 +408,14 @@ L?\"(\\.|[^\\"])*\" { lStringConst(&yylval, &yylloc); return TOKEN_STRING_LITERA return TOKEN_IDENTIFIER; } -{INT_NUMBER}+(u|U|l|L)*? { +{INT_NUMBER} { RT; - int ls = 0, us = 0; + return lParseInteger(false); +} - char *endPtr = NULL; - if (yytext[0] == '0' && yytext[1] == 'b') - yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr); - else { -#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__) - yylval.intVal = _strtoui64(yytext, &endPtr, 0); -#else - // FIXME: should use strtouq and then issue an error if we can't - // fit into 64 bits... - yylval.intVal = strtoull(yytext, &endPtr, 0); -#endif - } - - bool kilo = false, mega = false, giga = false; - for (; *endPtr; endPtr++) { - if (*endPtr == 'k') - kilo = true; - else if (*endPtr == 'M') - mega = true; - else if (*endPtr == 'G') - giga = true; - else if (*endPtr == 'l' || *endPtr == 'L') - ls++; - else if (*endPtr == 'u' || *endPtr == 'U') - us++; - } - if (kilo) - yylval.intVal *= 1024; - if (mega) - yylval.intVal *= 1024*1024; - if (giga) - yylval.intVal *= 1024*1024*1024; - - if (ls >= 2) - return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT; - else if (ls == 1) - return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT; - - // See if we can fit this into a 32-bit integer... - if ((yylval.intVal & 0xffffffff) == yylval.intVal) - return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT; - else - return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT; +{INT_NUMBER_DOTDOTDOT} { + RT; + return lParseInteger(true); } @@ -562,6 +525,72 @@ lParseBinary(const char *ptr, SourcePos pos, char **endPtr) { } +static int +lParseInteger(bool dotdotdot) { + int ls = 0, us = 0; + + char *endPtr = NULL; + if (yytext[0] == '0' && yytext[1] == 'b') + yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr); + else { +#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__) + yylval.intVal = _strtoui64(yytext, &endPtr, 0); +#else + // FIXME: should use strtouq and then issue an error if we can't + // fit into 64 bits... + yylval.intVal = strtoull(yytext, &endPtr, 0); +#endif + } + + bool kilo = false, mega = false, giga = false; + for (; *endPtr; endPtr++) { + if (*endPtr == 'k') + kilo = true; + else if (*endPtr == 'M') + mega = true; + else if (*endPtr == 'G') + giga = true; + else if (*endPtr == 'l' || *endPtr == 'L') + ls++; + else if (*endPtr == 'u' || *endPtr == 'U') + us++; + else + Assert(dotdotdot && *endPtr == '.'); + } + if (kilo) + yylval.intVal *= 1024; + if (mega) + yylval.intVal *= 1024*1024; + if (giga) + yylval.intVal *= 1024*1024*1024; + + if (dotdotdot) { + if (ls >= 2) + return us ? TOKEN_UINT64DOTDOTDOT_CONSTANT : TOKEN_INT64DOTDOTDOT_CONSTANT; + else if (ls == 1) + return us ? TOKEN_UINT32DOTDOTDOT_CONSTANT : TOKEN_INT32DOTDOTDOT_CONSTANT; + + // See if we can fit this into a 32-bit integer... + if ((yylval.intVal & 0xffffffff) == yylval.intVal) + return us ? TOKEN_UINT32DOTDOTDOT_CONSTANT : TOKEN_INT32DOTDOTDOT_CONSTANT; + else + return us ? TOKEN_UINT64DOTDOTDOT_CONSTANT : TOKEN_INT64DOTDOTDOT_CONSTANT; + } + else { + if (ls >= 2) + return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT; + else if (ls == 1) + return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT; + + // See if we can fit this into a 32-bit integer... + if ((yylval.intVal & 0xffffffff) == yylval.intVal) + return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT; + else + return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT; + } +} + + /** Handle a C-style comment in the source. */ static void diff --git a/parse.yy b/parse.yy index cffdc0c9..978b11d7 100644 --- a/parse.yy +++ b/parse.yy @@ -173,8 +173,11 @@ struct ForeachDimension { } -%token TOKEN_INT32_CONSTANT TOKEN_UINT32_CONSTANT TOKEN_INT64_CONSTANT -%token TOKEN_UINT64_CONSTANT TOKEN_FLOAT_CONSTANT TOKEN_STRING_C_LITERAL +%token TOKEN_INT32_CONSTANT TOKEN_UINT32_CONSTANT +%token TOKEN_INT64_CONSTANT TOKEN_UINT64_CONSTANT +%token TOKEN_INT32DOTDOTDOT_CONSTANT TOKEN_UINT32DOTDOTDOT_CONSTANT +%token TOKEN_INT64DOTDOTDOT_CONSTANT TOKEN_UINT64DOTDOTDOT_CONSTANT +%token TOKEN_FLOAT_CONSTANT TOKEN_STRING_C_LITERAL %token TOKEN_IDENTIFIER TOKEN_STRING_LITERAL TOKEN_TYPE_NAME TOKEN_NULL %token TOKEN_PTR_OP TOKEN_INC_OP TOKEN_DEC_OP TOKEN_LEFT_OP TOKEN_RIGHT_OP %token TOKEN_LE_OP TOKEN_GE_OP TOKEN_EQ_OP TOKEN_NE_OP @@ -196,7 +199,7 @@ struct ForeachDimension { %token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK %token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT -%type primary_expression postfix_expression +%type primary_expression postfix_expression integer_dotdotdot %type unary_expression cast_expression funcall_expression launch_expression %type multiplicative_expression additive_expression shift_expression %type relational_expression equality_expression and_expression @@ -1620,11 +1623,34 @@ foreach_active_identifier } ; +integer_dotdotdot + : TOKEN_INT32DOTDOTDOT_CONSTANT { + $$ = new ConstExpr(AtomicType::UniformInt32->GetAsConstType(), + (int32_t)yylval.intVal, @1); + } + | TOKEN_UINT32DOTDOTDOT_CONSTANT { + $$ = new ConstExpr(AtomicType::UniformUInt32->GetAsConstType(), + (uint32_t)yylval.intVal, @1); + } + | TOKEN_INT64DOTDOTDOT_CONSTANT { + $$ = new ConstExpr(AtomicType::UniformInt64->GetAsConstType(), + (int64_t)yylval.intVal, @1); + } + | TOKEN_UINT64DOTDOTDOT_CONSTANT { + $$ = new ConstExpr(AtomicType::UniformUInt64->GetAsConstType(), + (uint64_t)yylval.intVal, @1); + } + ; + foreach_dimension_specifier : foreach_identifier '=' assignment_expression TOKEN_DOTDOTDOT assignment_expression { $$ = new ForeachDimension($1, $3, $5); } + | foreach_identifier '=' integer_dotdotdot assignment_expression + { + $$ = new ForeachDimension($1, $3, $4); + } ; foreach_dimension_list From b3c5043dccee58dfaed33d07a10db9b2158b59f8 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 28 Mar 2012 10:26:39 -0700 Subject: [PATCH 025/173] Don't enable llvm's UnsafeFPMath option when --opt=fast-math is supplied. This was causing functions like round() to fail on SSE2, since it has code that does: x += 0x1.0p23f; x -= 0x1.0p23f; which was in turn being undesirably optimized away. Fixes issue #211. --- ispc.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ispc.cpp b/ispc.cpp index 050f8a11..25bec209 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -355,8 +355,15 @@ Target::GetTargetMachine() const { #if defined(LLVM_3_1svn) std::string featuresString = attributes; llvm::TargetOptions options; +#if 0 + // This was breaking e.g. round() on SSE2, where the code we want to + // run wants to do: + // x += 0x1.0p23f; + // x -= 0x1.0p23f; + // But then LLVM was optimizing this away... if (g->opt.fastMath == true) options.UnsafeFPMath = 1; +#endif llvm::TargetMachine *targetMachine = target->createTargetMachine(triple, cpu, featuresString, options, relocModel); From 540fc6c2f3eadf427e817a80360886027cff9759 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 28 Mar 2012 11:51:56 -0700 Subject: [PATCH 026/173] Fix bugs with default parameter values for pointer-typed function parameters. In particular "void foo(int * ptr = NULL)" and the like work now. Issue #197. --- decl.cpp | 32 +++++++++++++++++--------------- module.cpp | 2 +- type.cpp | 6 +++--- type.h | 6 +++--- 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/decl.cpp b/decl.cpp index 748cffb1..c54abdb9 100644 --- a/decl.cpp +++ b/decl.cpp @@ -424,7 +424,7 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { case DK_FUNCTION: { std::vector args; std::vector argNames; - std::vector argDefaults; + std::vector argDefaults; std::vector argPos; // Loop over the function arguments and store the names, types, @@ -482,23 +482,25 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { argNames.push_back(sym->name); argPos.push_back(sym->pos); - ConstExpr *init = NULL; + Expr *init = NULL; if (d->declarators.size()) { - // Try to find an initializer expression; if there is one, - // it lives down to the base declarator. + // Try to find an initializer expression. Declarator *decl = d->declarators[0]; while (decl->child != NULL) { - Assert(decl->initExpr == NULL); - decl = decl->child; - } - - if (decl->initExpr != NULL && - (decl->initExpr = TypeCheck(decl->initExpr)) != NULL && - (decl->initExpr = Optimize(decl->initExpr)) != NULL && - (init = dynamic_cast(decl->initExpr)) == NULL) { - Error(decl->initExpr->pos, "Default value for parameter " - "\"%s\" must be a compile-time constant.", - sym->name.c_str()); + if (decl->initExpr != NULL) { + decl->initExpr = TypeCheck(decl->initExpr); + decl->initExpr = Optimize(decl->initExpr); + if (decl->initExpr != NULL && + ((init = dynamic_cast(decl->initExpr)) == NULL) && + ((init = dynamic_cast(decl->initExpr)) == NULL)) { + Error(decl->initExpr->pos, "Default value for parameter " + "\"%s\" must be a compile-time constant.", + sym->name.c_str()); + } + break; + } + else + decl = decl->child; } } argDefaults.push_back(init); diff --git a/module.cpp b/module.cpp index 4555a3d5..3f184a42 100644 --- a/module.cpp +++ b/module.cpp @@ -547,7 +547,7 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) { for (int i = 0; i < nArgs; ++i) { const Type *argType = functionType->GetParameterType(i); const std::string &argName = functionType->GetParameterName(i); - ConstExpr *defaultValue = functionType->GetParameterDefault(i); + Expr *defaultValue = functionType->GetParameterDefault(i); const SourcePos &argPos = functionType->GetParameterSourcePos(i); // If the function is exported, make sure that the parameter diff --git a/type.cpp b/type.cpp index 06ab8bd2..80856d5e 100644 --- a/type.cpp +++ b/type.cpp @@ -2330,7 +2330,7 @@ FunctionType::FunctionType(const Type *r, const std::vector &a, SourcePos p) : isTask(false), isExported(false), isExternC(false), returnType(r), paramTypes(a), paramNames(std::vector(a.size(), "")), - paramDefaults(std::vector(a.size(), NULL)), + paramDefaults(std::vector(a.size(), NULL)), paramPositions(std::vector(a.size(), p)) { Assert(returnType != NULL); isSafe = false; @@ -2340,7 +2340,7 @@ FunctionType::FunctionType(const Type *r, const std::vector &a, FunctionType::FunctionType(const Type *r, const std::vector &a, const std::vector &an, - const std::vector &ad, + const std::vector &ad, const std::vector &ap, bool it, bool is, bool ec) : isTask(it), isExported(is), isExternC(ec), returnType(r), paramTypes(a), @@ -2614,7 +2614,7 @@ FunctionType::GetParameterType(int i) const { } -ConstExpr * +Expr * FunctionType::GetParameterDefault(int i) const { Assert(i < (int)paramDefaults.size()); return paramDefaults[i]; diff --git a/type.h b/type.h index 94c28f0b..54cea005 100644 --- a/type.h +++ b/type.h @@ -745,7 +745,7 @@ public: FunctionType(const Type *returnType, const std::vector &argTypes, const std::vector &argNames, - const std::vector &argDefaults, + const std::vector &argDefaults, const std::vector &argPos, bool isTask, bool isExported, bool isExternC); @@ -785,7 +785,7 @@ public: int GetNumParameters() const { return (int)paramTypes.size(); } const Type *GetParameterType(int i) const; - ConstExpr * GetParameterDefault(int i) const; + Expr * GetParameterDefault(int i) const; const SourcePos &GetParameterSourcePos(int i) const; const std::string &GetParameterName(int i) const; @@ -818,7 +818,7 @@ private: const std::vector paramNames; /** Default values of the function's arguments. For arguments without default values provided, NULL is stored. */ - mutable std::vector paramDefaults; + mutable std::vector paramDefaults; /** The names provided (if any) with the function arguments in the function's signature. These should only be used for error messages and the like and so not affect testing function types for equality, From 5b2d43f6651387a49e6459404c05828200c290e6 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 28 Mar 2012 14:15:49 -0700 Subject: [PATCH 027/173] Fix global variable code to correctly handle extern declarations. When we have an "extern" global, now we no longer inadvertently define storage for it. Further, we now successfully do define storage when we encounter a definition following one or more extern declarations. Issues #215 and #217. --- module.cpp | 115 ++++++++++++++++++++++--------- sym.h | 2 +- tests_errors/global-decl-1.ispc | 4 ++ tests_errors/global-decl-2.ispc | 4 ++ tests_errors/global-redef-1.ispc | 4 ++ tests_errors/global-redef.ispc | 4 ++ 6 files changed, 101 insertions(+), 32 deletions(-) create mode 100644 tests_errors/global-decl-1.ispc create mode 100644 tests_errors/global-decl-2.ispc create mode 100644 tests_errors/global-redef-1.ispc create mode 100644 tests_errors/global-redef.ispc diff --git a/module.cpp b/module.cpp index 3f184a42..87cb1a88 100644 --- a/module.cpp +++ b/module.cpp @@ -270,53 +270,106 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) { Error(sym->pos, "Initializer can't be provided with \"extern\" " "global variable \"%s\".", sym->name.c_str()); } - else if (initExpr != NULL) { - initExpr = TypeCheck(initExpr); + else { if (initExpr != NULL) { - // We need to make sure the initializer expression is - // the same type as the global. (But not if it's an - // ExprList; they don't have types per se / can't type - // convert themselves anyway.) - if (dynamic_cast(initExpr) == NULL) - initExpr = TypeConvertExpr(initExpr, sym->type, "initializer"); - + initExpr = TypeCheck(initExpr); if (initExpr != NULL) { - initExpr = Optimize(initExpr); - // Fingers crossed, now let's see if we've got a - // constant value.. - llvmInitializer = initExpr->GetConstant(sym->type); + // We need to make sure the initializer expression is + // the same type as the global. (But not if it's an + // ExprList; they don't have types per se / can't type + // convert themselves anyway.) + if (dynamic_cast(initExpr) == NULL) + initExpr = TypeConvertExpr(initExpr, sym->type, "initializer"); + + if (initExpr != NULL) { + initExpr = Optimize(initExpr); + // Fingers crossed, now let's see if we've got a + // constant value.. + llvmInitializer = initExpr->GetConstant(sym->type); - if (llvmInitializer != NULL) { - if (sym->type->IsConstType()) - // Try to get a ConstExpr associated with - // the symbol. This dynamic_cast can - // validly fail, for example for types like - // StructTypes where a ConstExpr can't - // represent their values. - sym->constValue = - dynamic_cast(initExpr); + if (llvmInitializer != NULL) { + if (sym->type->IsConstType()) + // Try to get a ConstExpr associated with + // the symbol. This dynamic_cast can + // validly fail, for example for types like + // StructTypes where a ConstExpr can't + // represent their values. + sym->constValue = + dynamic_cast(initExpr); + } + else + Error(initExpr->pos, "Initializer for global variable \"%s\" " + "must be a constant.", sym->name.c_str()); } - else - Error(initExpr->pos, "Initializer for global variable \"%s\" " - "must be a constant.", sym->name.c_str()); } } + + // If no initializer was provided or if we couldn't get a value + // above, initialize it with zeros.. + if (llvmInitializer == NULL) + llvmInitializer = llvm::Constant::getNullValue(llvmType); } - // If no initializer was provided or if we couldn't get a value - // above, initialize it with zeros.. - if (llvmInitializer == NULL) - llvmInitializer = llvm::Constant::getNullValue(llvmType); + Symbol *stSym = symbolTable->LookupVariable(sym->name.c_str()); + llvm::GlobalVariable *oldGV = NULL; + if (stSym != NULL) { + // We've already seen either a declaration or a definition of this + // global. + + // If the type doesn't match with the previous one, issue an error. + if (!Type::Equal(sym->type, stSym->type)) { + Error(sym->pos, "Definition of variable \"%s\" conflicts with " + "definition at %s:%d.", sym->name.c_str(), + stSym->pos.name, stSym->pos.first_line); + return; + } + + llvm::GlobalVariable *gv = + llvm::dyn_cast(stSym->storagePtr); + Assert(gv != NULL); + + // And issue an error if this is a redefinition of a variable + if (gv->hasInitializer() && + sym->storageClass != SC_EXTERN && sym->storageClass != SC_EXTERN_C) { + Error(sym->pos, "Redefinition of variable \"%s\" is illegal. " + "(Previous definition at %s:%d.)", sym->name.c_str(), + stSym->pos.name, stSym->pos.first_line); + return; + } + + // Now, we either have a redeclaration of a global, or a definition + // of a previously-declared global. First, save the pointer to the + // previous llvm::GlobalVariable + oldGV = gv; + + // Now copy over all of the members of the current Symbol to the + // symbol in the symbol table. + *stSym = *sym; + // And copy the pointer of the one in the symbol table to sym, so + // that the operations below update storagePtr for the Symbol + // already in the symbol table. + sym = stSym; + } + else + symbolTable->AddVariable(sym); llvm::GlobalValue::LinkageTypes linkage = (sym->storageClass == SC_STATIC) ? llvm::GlobalValue::InternalLinkage : llvm::GlobalValue::ExternalLinkage; + + // Note that the NULL llvmInitializer is what leads to "extern" + // declarations coming up extern and not defining storage (a bit + // subtle)... sym->storagePtr = new llvm::GlobalVariable(*module, llvmType, isConst, linkage, llvmInitializer, sym->name.c_str()); - symbolTable->AddVariable(sym); - if (diBuilder && (sym->storageClass != SC_EXTERN)) { + // Patch up any references to the previous GlobalVariable (e.g. from a + // declaration of a global that was later defined.) + if (oldGV != NULL) + oldGV->replaceAllUsesWith(sym->storagePtr); + + if (diBuilder) { llvm::DIFile file = sym->pos.GetDIFile(); diBuilder->createGlobalVariable(sym->name, file, diff --git a/sym.h b/sym.h index fa452326..8e14495a 100644 --- a/sym.h +++ b/sym.h @@ -75,7 +75,7 @@ public: std::string MangledName() const; SourcePos pos; /*!< Source file position where the symbol was defined */ - const std::string name; /*!< Symbol's name */ + std::string name; /*!< Symbol's name */ llvm::Value *storagePtr; /*!< For symbols with storage associated with them (i.e. variables but not functions), this member stores a pointer to its diff --git a/tests_errors/global-decl-1.ispc b/tests_errors/global-decl-1.ispc new file mode 100644 index 00000000..6f111bbf --- /dev/null +++ b/tests_errors/global-decl-1.ispc @@ -0,0 +1,4 @@ +// Definition of variable "foo" conflicts with definition at + +extern int foo; +float foo; diff --git a/tests_errors/global-decl-2.ispc b/tests_errors/global-decl-2.ispc new file mode 100644 index 00000000..66647ea7 --- /dev/null +++ b/tests_errors/global-decl-2.ispc @@ -0,0 +1,4 @@ +// Definition of variable "foo" conflicts with definition at + +extern int foo; +extern float foo; diff --git a/tests_errors/global-redef-1.ispc b/tests_errors/global-redef-1.ispc new file mode 100644 index 00000000..7ebb3da7 --- /dev/null +++ b/tests_errors/global-redef-1.ispc @@ -0,0 +1,4 @@ +// Definition of variable "foo" conflicts with definition at + +int foo; +float foo; diff --git a/tests_errors/global-redef.ispc b/tests_errors/global-redef.ispc new file mode 100644 index 00000000..9a2df32f --- /dev/null +++ b/tests_errors/global-redef.ispc @@ -0,0 +1,4 @@ +// Redefinition of variable "foo" is illegal + +int foo; +int foo; From d5e3416e8ef9c11dac11bfd4dc8dcab656782aed Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 28 Mar 2012 14:29:58 -0700 Subject: [PATCH 028/173] Fix bug in default argument handling introduced in 540fc6c2f3 --- decl.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/decl.cpp b/decl.cpp index c54abdb9..1a67e387 100644 --- a/decl.cpp +++ b/decl.cpp @@ -486,16 +486,18 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { if (d->declarators.size()) { // Try to find an initializer expression. Declarator *decl = d->declarators[0]; - while (decl->child != NULL) { + while (decl != NULL) { if (decl->initExpr != NULL) { decl->initExpr = TypeCheck(decl->initExpr); decl->initExpr = Optimize(decl->initExpr); - if (decl->initExpr != NULL && - ((init = dynamic_cast(decl->initExpr)) == NULL) && - ((init = dynamic_cast(decl->initExpr)) == NULL)) { - Error(decl->initExpr->pos, "Default value for parameter " - "\"%s\" must be a compile-time constant.", - sym->name.c_str()); + if (decl->initExpr != NULL) { + init = dynamic_cast(decl->initExpr); + if (init == NULL) + init = dynamic_cast(decl->initExpr); + if (init == NULL) + Error(decl->initExpr->pos, "Default value for parameter " + "\"%s\" must be a compile-time constant.", + sym->name.c_str()); } break; } @@ -511,6 +513,7 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { Error(pos, "No return type provided in function declaration."); return NULL; } + if (dynamic_cast(returnType) != NULL) { Error(pos, "Illegal to return function type from function."); return NULL; From 255791f18eed6477e66fcdc3de5be631e41c8b88 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 29 Mar 2012 11:50:15 -0700 Subject: [PATCH 029/173] Fix to get correct variable names for extern globals that are later defined. --- module.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/module.cpp b/module.cpp index 87cb1a88..f9e38d87 100644 --- a/module.cpp +++ b/module.cpp @@ -366,8 +366,11 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) { // Patch up any references to the previous GlobalVariable (e.g. from a // declaration of a global that was later defined.) - if (oldGV != NULL) + if (oldGV != NULL) { oldGV->replaceAllUsesWith(sym->storagePtr); + oldGV->removeFromParent(); + sym->storagePtr->setName(sym->name.c_str()); + } if (diBuilder) { llvm::DIFile file = sym->pos.GetDIFile(); From 87c8a8934973aba5faecbef9833c0a4cf11a0b80 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 29 Mar 2012 13:16:55 -0700 Subject: [PATCH 030/173] Make 'export' a type qualifier, not a storage class. In particular, this makes it legal to do "extern export foo()", among other things. Partially addresses issue #216. --- decl.cpp | 20 ++++++++++++++------ decl.h | 2 +- module.cpp | 4 ++-- parse.yy | 9 ++++++--- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/decl.cpp b/decl.cpp index 1a67e387..5dab985e 100644 --- a/decl.cpp +++ b/decl.cpp @@ -56,6 +56,7 @@ lPrintTypeQualifiers(int typeQualifiers) { if (typeQualifiers & TYPEQUAL_TASK) printf("task "); if (typeQualifiers & TYPEQUAL_SIGNED) printf("signed "); if (typeQualifiers & TYPEQUAL_UNSIGNED) printf("unsigned "); + if (typeQualifiers & TYPEQUAL_EXPORT) printf("export "); } @@ -189,7 +190,6 @@ lGetStorageClassName(StorageClass storageClass) { case SC_NONE: return ""; case SC_EXTERN: return "extern"; case SC_EXTERN_C: return "extern \"C\""; - case SC_EXPORT: return "export"; case SC_STATIC: return "static"; case SC_TYPEDEF: return "typedef"; default: FATAL("Unhandled storage class in lGetStorageClassName"); @@ -347,6 +347,7 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { bool hasUniformQual = ((typeQualifiers & TYPEQUAL_UNIFORM) != 0); bool hasVaryingQual = ((typeQualifiers & TYPEQUAL_VARYING) != 0); bool isTask = ((typeQualifiers & TYPEQUAL_TASK) != 0); + bool isExported = ((typeQualifiers & TYPEQUAL_EXPORT) != 0); bool isConst = ((typeQualifiers & TYPEQUAL_CONST) != 0); if (hasUniformQual && hasVaryingQual) { @@ -355,6 +356,8 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { } if (kind != DK_FUNCTION && isTask) Error(pos, "\"task\" qualifier illegal in variable declaration."); + if (kind != DK_FUNCTION && isExported) + Error(pos, "\"export\" qualifier illegal in variable declaration."); Variability variability(Variability::Unbound); if (hasUniformQual) @@ -519,8 +522,8 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { return NULL; } - bool isExported = ds && (ds->storageClass == SC_EXPORT); bool isExternC = ds && (ds->storageClass == SC_EXTERN_C); + bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0); bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0); if (isExported && isTask) { @@ -731,10 +734,15 @@ GetStructTypesNamesPositions(const std::vector &sd, // FIXME: making this fake little DeclSpecs here is really // disgusting DeclSpecs ds(type); - if (type->IsUniformType()) - ds.typeQualifiers |= TYPEQUAL_UNIFORM; - else if (type->IsVaryingType()) - ds.typeQualifiers |= TYPEQUAL_VARYING; + if (Type::Equal(type, AtomicType::Void) == false) { + if (type->IsUniformType()) + ds.typeQualifiers |= TYPEQUAL_UNIFORM; + else if (type->IsVaryingType()) + ds.typeQualifiers |= TYPEQUAL_VARYING; + else if (type->GetSOAWidth() != 0) + ds.soaWidth = type->GetSOAWidth(); + // FIXME: ds.vectorSize? + } for (unsigned int j = 0; j < sd[i]->declarators->size(); ++j) { Declarator *d = (*sd[i]->declarators)[j]; diff --git a/decl.h b/decl.h index 0bae20b8..ff96e149 100644 --- a/decl.h +++ b/decl.h @@ -64,7 +64,6 @@ class Declarator; enum StorageClass { SC_NONE, SC_EXTERN, - SC_EXPORT, SC_STATIC, SC_TYPEDEF, SC_EXTERN_C @@ -82,6 +81,7 @@ enum StorageClass { #define TYPEQUAL_SIGNED (1<<4) #define TYPEQUAL_UNSIGNED (1<<5) #define TYPEQUAL_INLINE (1<<6) +#define TYPEQUAL_EXPORT (1<<7) /** @brief Representation of the declaration specifiers in a declaration. diff --git a/module.cpp b/module.cpp index f9e38d87..c7b2a424 100644 --- a/module.cpp +++ b/module.cpp @@ -584,7 +584,7 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) { // Make sure that the return type isn't 'varying' if the function is // 'export'ed. - if (funSym->storageClass == SC_EXPORT && + if (functionType->isExported && lRecursiveCheckValidParamType(functionType->GetReturnType())) Error(funSym->pos, "Illegal to return a \"varying\" type from exported " "function \"%s\"", funSym->name.c_str()); @@ -608,7 +608,7 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) { // If the function is exported, make sure that the parameter // doesn't have any varying stuff going on in it. - if (funSym->storageClass == SC_EXPORT) + if (functionType->isExported) lCheckForVaryingParameter(argType, argName, argPos); // ISPC assumes that no pointers alias. (It should be possible to diff --git a/parse.yy b/parse.yy index 978b11d7..8448a559 100644 --- a/parse.yy +++ b/parse.yy @@ -810,7 +810,6 @@ storage_class_specifier : TOKEN_TYPEDEF { $$ = SC_TYPEDEF; } | TOKEN_EXTERN { $$ = SC_EXTERN; } | TOKEN_EXTERN TOKEN_STRING_C_LITERAL { $$ = SC_EXTERN_C; } - | TOKEN_EXPORT { $$ = SC_EXPORT; } | TOKEN_STATIC { $$ = SC_STATIC; } ; @@ -985,6 +984,11 @@ specifier_qualifier_list "function declarations."); $$ = $2; } + else if ($1 == TYPEQUAL_EXPORT) { + Error(@1, "\"export\" qualifier is illegal outside of " + "function declarations."); + $$ = $2; + } else FATAL("Unhandled type qualifier in parser."); } @@ -1117,6 +1121,7 @@ type_qualifier | TOKEN_UNIFORM { $$ = TYPEQUAL_UNIFORM; } | TOKEN_VARYING { $$ = TYPEQUAL_VARYING; } | TOKEN_TASK { $$ = TYPEQUAL_TASK; } + | TOKEN_EXPORT { $$ = TYPEQUAL_EXPORT; } | TOKEN_INLINE { $$ = TYPEQUAL_INLINE; } | TOKEN_SIGNED { $$ = TYPEQUAL_SIGNED; } | TOKEN_UNSIGNED { $$ = TYPEQUAL_UNSIGNED; } @@ -2096,8 +2101,6 @@ lGetStorageClassString(StorageClass sc) { return ""; case SC_EXTERN: return "extern"; - case SC_EXPORT: - return "export"; case SC_STATIC: return "static"; case SC_TYPEDEF: From 560bf5ca0981144a249d9edd4755021780d9e53e Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 30 Mar 2012 16:35:55 -0700 Subject: [PATCH 031/173] Updated logic for selecting target ISA when not specified. Now, if the user specified a CPU then we base the ISA choice on that--only if no CPU and no target is specified do we use the CPUID-based check to pick a vector ISA. Improvement to fix to #205. --- ispc.cpp | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/ispc.cpp b/ispc.cpp index 25bec209..9eb808dc 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -116,6 +116,34 @@ lGetSystemISA() { bool Target::GetTarget(const char *arch, const char *cpu, const char *isa, bool pic, Target *t) { + if (isa == NULL) { + if (cpu != NULL) { + // If a CPU was specified explicitly, try to pick the best + // possible ISA based on that. +#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) + if (!strcasecmp(cpu, "sandybridge") || + !strcasecmp(cpu, "corei7-avx")) + isa = "avx"; + else +#endif + if (!strcasecmp(cpu, "corei7") || + !strcasecmp(cpu, "penryn")) + isa = "sse4"; + else + isa = "sse2"; + fprintf(stderr, "Notice: no --target specified on command-line. " + "Using ISA \"%s\" based on specified CPU \"%s\".\n", isa, + cpu); + } + else { + // No CPU and no ISA, so use CPUID to figure out what this CPU + // supports. + isa = lGetSystemISA(); + fprintf(stderr, "Notice: no --target specified on command-line. " + "Using system ISA \"%s\".\n", isa); + } + } + if (cpu == NULL) { std::string hostCPU = llvm::sys::getHostCPUName(); if (hostCPU.size() > 0) @@ -127,11 +155,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, } t->cpu = cpu; - if (isa == NULL) { - isa = lGetSystemISA(); - fprintf(stderr, "Notice: no --target specified on command-line. Using " - "system ISA \"%s\".\n", isa); - } if (arch == NULL) arch = "x86-64"; From e9626a1d10b902ad42320bcd1190f95e5dc3df74 Mon Sep 17 00:00:00 2001 From: Jean-Luc Duprat Date: Tue, 27 Mar 2012 12:46:59 -0700 Subject: [PATCH 032/173] Added macro PRId64 to opt.cpp for compilation on Windows --- opt.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/opt.cpp b/opt.cpp index af661d9a..1ebfd4a4 100644 --- a/opt.cpp +++ b/opt.cpp @@ -84,6 +84,13 @@ #endif #endif // ISPC_IS_WINDOWS +#ifndef PRId64 +#define PRId64 "lld" +#endif +#ifndef PRIu64 +#define PRIu64 "llu" +#endif + static llvm::Pass *CreateIntrinsicsOptPass(); static llvm::Pass *CreateVSelMovmskOptPass(); static llvm::Pass *CreateDetectGSBaseOffsetsPass(); From c1661eb06b6b66c71670506b8134c3de6e3ac7c0 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 30 Mar 2012 16:55:22 -0700 Subject: [PATCH 033/173] Allow calling GetAs{Non}ConstType() for FunctionTypes. It's just a no-op, though, rather than an assertion failure as before. --- type.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/type.cpp b/type.cpp index 80856d5e..3a276be4 100644 --- a/type.cpp +++ b/type.cpp @@ -2454,15 +2454,13 @@ FunctionType::ResolveUnboundVariability(Variability v) const { const Type * FunctionType::GetAsConstType() const { - FATAL("FunctionType::GetAsConstType shouldn't be called"); - return NULL; + return this; } const Type * FunctionType::GetAsNonConstType() const { - FATAL("FunctionType::GetAsNonConstType shouldn't be called"); - return NULL; + return this; } From 05d1b06eeb010611dffbc8c83cdadde20ba723fc Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 30 Mar 2012 16:55:36 -0700 Subject: [PATCH 034/173] Fixes to get the C++ backend more working again. --- cbackend.cpp | 116 ++++++++++++++++++++++++++------------------------- stmt.cpp | 2 +- 2 files changed, 61 insertions(+), 57 deletions(-) diff --git a/cbackend.cpp b/cbackend.cpp index b1a0a907..e39f8461 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -2071,69 +2071,16 @@ bool CWriter::doInitialization(Module &M) { Out << "#include \"" << includeName << "\"\n"; - generateCompilerSpecificCode(Out, TD); - - // Function declarations - Out << "\n/* Function Declarations */\n"; + Out << "\n/* Basic Library Function Declarations */\n"; Out << "extern \"C\" {\n"; Out << "int puts(unsigned char *);\n"; Out << "unsigned int putchar(unsigned int);\n"; Out << "int fflush(void *);\n"; Out << "int printf(const unsigned char *, ...);\n"; Out << "uint8_t *memcpy(uint8_t *, uint8_t *, uint64_t );\n"; + Out << "}\n\n"; - // Store the intrinsics which will be declared/defined below. - SmallVector intrinsicsToDefine; - - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - // Don't print declarations for intrinsic functions. - // Store the used intrinsics, which need to be explicitly defined. - if (I->isIntrinsic()) { - switch (I->getIntrinsicID()) { - default: - break; - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - intrinsicsToDefine.push_back(I); - break; - } - continue; - } - - if (I->getName() == "setjmp" || I->getName() == "abort" || - I->getName() == "longjmp" || I->getName() == "_setjmp" || - I->getName() == "memset" || I->getName() == "memset_pattern16" || - I->getName() == "puts" || - I->getName() == "printf" || I->getName() == "putchar" || - I->getName() == "fflush" || I->getName() == "malloc" || - I->getName() == "free") - continue; - - // Don't redeclare ispc's own intrinsics - std::string name = I->getName(); - if (name.size() > 2 && name[0] == '_' && name[1] == '_') - continue; - - if (I->hasExternalWeakLinkage()) - Out << "extern "; - printFunctionSignature(I, true); - if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - if (I->hasExternalWeakLinkage()) - Out << " __EXTERNAL_WEAK__"; - if (StaticCtors.count(I)) - Out << " __ATTRIBUTE_CTOR__"; - if (StaticDtors.count(I)) - Out << " __ATTRIBUTE_DTOR__"; - if (I->hasHiddenVisibility()) - Out << " __HIDDEN__"; - - if (I->hasName() && I->getName()[0] == 1) - Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")"; - - Out << ";\n"; - } - Out << "}\n"; + generateCompilerSpecificCode(Out, TD); // Provide a definition for `bool' if not compiling with a C++ compiler. Out << "\n" @@ -2303,6 +2250,63 @@ bool CWriter::doInitialization(Module &M) { } } + // Function declarations + Out << "\n/* Function Declarations */\n"; + Out << "extern \"C\" {\n"; + + // Store the intrinsics which will be declared/defined below. + SmallVector intrinsicsToDefine; + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + // Don't print declarations for intrinsic functions. + // Store the used intrinsics, which need to be explicitly defined. + if (I->isIntrinsic()) { + switch (I->getIntrinsicID()) { + default: + break; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + intrinsicsToDefine.push_back(I); + break; + } + continue; + } + + if (I->getName() == "setjmp" || I->getName() == "abort" || + I->getName() == "longjmp" || I->getName() == "_setjmp" || + I->getName() == "memset" || I->getName() == "memset_pattern16" || + I->getName() == "puts" || + I->getName() == "printf" || I->getName() == "putchar" || + I->getName() == "fflush" || I->getName() == "malloc" || + I->getName() == "free") + continue; + + // Don't redeclare ispc's own intrinsics + std::string name = I->getName(); + if (name.size() > 2 && name[0] == '_' && name[1] == '_') + continue; + + if (I->hasExternalWeakLinkage()) + Out << "extern "; + printFunctionSignature(I, true); + if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) + Out << " __ATTRIBUTE_WEAK__"; + if (I->hasExternalWeakLinkage()) + Out << " __EXTERNAL_WEAK__"; + if (StaticCtors.count(I)) + Out << " __ATTRIBUTE_CTOR__"; + if (StaticDtors.count(I)) + Out << " __ATTRIBUTE_DTOR__"; + if (I->hasHiddenVisibility()) + Out << " __HIDDEN__"; + + if (I->hasName() && I->getName()[0] == 1) + Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")"; + + Out << ";\n"; + } + Out << "}\n\n"; + if (!M.empty()) Out << "\n\n/* Function Bodies */\n"; diff --git a/stmt.cpp b/stmt.cpp index d7098ff0..14032e7b 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -2786,7 +2786,7 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) { Expr *maskVecExpr = new SymbolExpr(maskSym, pos); std::vector mmFuns; m->symbolTable->LookupFunction("__movmsk", &mmFuns); - Assert(mmFuns.size() == 2); + Assert(mmFuns.size() == (g->target.isa == Target::GENERIC ? 1 : 2)); FunctionSymbolExpr *movmskFunc = new FunctionSymbolExpr("__movmsk", mmFuns, pos); ExprList *movmskArgs = new ExprList(maskVecExpr, pos); From dc09d46bf4d4ad823307505ff0e2e5de27654540 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 3 Apr 2012 05:36:21 -0700 Subject: [PATCH 035/173] Don't emit type declarations for extern'ed globals in generated header files. This actually wasn't a good idea, since we'd like ispc programs to be able to have varying globals that it uses internally among ispc code, without having errors about varying globals when generating headers. Issue #214. --- module.cpp | 39 +-------------------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/module.cpp b/module.cpp index c7b2a424..c7725b2f 100644 --- a/module.cpp +++ b/module.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -1027,21 +1027,6 @@ lPrintFunctionDeclarations(FILE *file, const std::vector &funcs) { } -static void -lPrintExternGlobals(FILE *file, const std::vector &externGlobals) { - for (unsigned int i = 0; i < externGlobals.size(); ++i) { - Symbol *sym = externGlobals[i]; - if (lRecursiveCheckValidParamType(sym->type)) - Warning(sym->pos, "Not emitting declaration for symbol \"%s\" into " - "generated header file since it (or some of its members) " - "has types that are illegal in exported symbols.", - sym->name.c_str()); - else - fprintf(file, "extern %s;\n", sym->type->GetCDeclaration(sym->name).c_str()); - } -} - - static bool lIsExported(const Symbol *sym) { const FunctionType *ft = dynamic_cast(sym->type); @@ -1058,12 +1043,6 @@ lIsExternC(const Symbol *sym) { } -static bool -lIsExternGlobal(const Symbol *sym) { - return sym->storageClass == SC_EXTERN || sym->storageClass == SC_EXTERN_C; -} - - bool Module::writeHeader(const char *fn) { FILE *f = fopen(fn, "w"); @@ -1116,13 +1095,6 @@ Module::writeHeader(const char *fn) { lGetExportedParamTypes(externCFuncs, &exportedStructTypes, &exportedEnumTypes, &exportedVectorTypes); - // And do the same for the 'extern' globals - std::vector externGlobals; - symbolTable->GetMatchingVariables(lIsExternGlobal, &externGlobals); - for (unsigned int i = 0; i < externGlobals.size(); ++i) - lGetExportedTypes(externGlobals[i]->type, &exportedStructTypes, - &exportedEnumTypes, &exportedVectorTypes); - // And print them lEmitVectorTypedefs(exportedVectorTypes, f); lEmitEnumDecls(exportedEnumTypes, f); @@ -1149,15 +1121,6 @@ Module::writeHeader(const char *fn) { // end namespace fprintf(f, "\n#ifdef __cplusplus\n}\n#endif // __cplusplus\n"); - // and only now emit externs for globals, outside of the ispc namespace - if (externGlobals.size() > 0) { - fprintf(f, "\n"); - fprintf(f, "///////////////////////////////////////////////////////////////////////////\n"); - fprintf(f, "// Globals declared \"extern\" from ispc code\n"); - fprintf(f, "///////////////////////////////////////////////////////////////////////////\n"); - lPrintExternGlobals(f, externGlobals); - } - // end guard fprintf(f, "\n#endif // %s\n", guard.c_str()); From 920cf63201c49b8d32fd5dbd2c686cca658e7dfd Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 3 Apr 2012 05:43:23 -0700 Subject: [PATCH 036/173] Improve error message about incompatible function types. When reporting that a function has illegally been overloaded only by return type, include "task", "export", and "extern "C"", as appropriate in the error message to make clear what the issue is. Finishes issue #216. --- module.cpp | 9 ++++++--- type.cpp | 13 +++++++++++++ type.h | 2 ++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/module.cpp b/module.cpp index c7725b2f..58782d0f 100644 --- a/module.cpp +++ b/module.cpp @@ -506,10 +506,13 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) { break; } if (i == functionType->GetNumParameters()) { + std::string thisRetType = functionType->GetReturnTypeString(); + std::string otherRetType = ofType->GetReturnTypeString(); Error(funSym->pos, "Illegal to overload function by return " - "type only (previous declaration was at line %d of " - "file %s).", overloadFunc->pos.first_line, - overloadFunc->pos.name); + "type only. This function returns \"%s\" while " + "previous declaration at %s:%d returns \"%s\".", + thisRetType.c_str(), overloadFunc->pos.name, + overloadFunc->pos.first_line, otherRetType.c_str()); return; } } diff --git a/type.cpp b/type.cpp index 3a276be4..1aaeb2a6 100644 --- a/type.cpp +++ b/type.cpp @@ -2552,6 +2552,19 @@ FunctionType::GetDIType(llvm::DIDescriptor scope) const { } +const std::string +FunctionType::GetReturnTypeString() const { + std::string ret; + if (isTask) + ret += "task "; + if (isExported) + ret += "export "; + if (isExternC) + ret += "extern \"C\" "; + return ret + returnType->GetString(); +} + + LLVM_TYPE_CONST llvm::FunctionType * FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask) const { if (isTask == true) diff --git a/type.h b/type.h index 54cea005..5c690e60 100644 --- a/type.h +++ b/type.h @@ -776,6 +776,8 @@ public: const Type *GetReturnType() const { return returnType; } + const std::string GetReturnTypeString() const; + /** This method returns the LLVM FunctionType that corresponds to this function type. The \c includeMask parameter indicates whether the llvm::FunctionType should have a mask as the last argument in its From eb85da81e1c1ffcd3ccf557e0d8eff2e56476d85 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 3 Apr 2012 05:55:50 -0700 Subject: [PATCH 037/173] Further improvements to error reporting with function types. Issue #219. --- expr.cpp | 4 ++-- tests_errors/ptr-1.ispc | 2 +- type.cpp | 23 +++++++++++------------ 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/expr.cpp b/expr.cpp index 78a89b6a..ebfac14f 100644 --- a/expr.cpp +++ b/expr.cpp @@ -328,8 +328,8 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, !Type::Equal(fromPointerType->GetBaseType()->GetAsConstType(), toPointerType->GetBaseType())) { if (!failureOk) - Error(pos, "Can't convert between incompatible pointer types " - "\"%s\" and \"%s\" for %s.", + Error(pos, "Can't convert from pointer type \"%s\" to " + "incompatible pointer type \"%s\" for %s.", fromPointerType->GetString().c_str(), toPointerType->GetString().c_str(), errorMsgBase); return false; diff --git a/tests_errors/ptr-1.ispc b/tests_errors/ptr-1.ispc index 66a9bff4..97a88488 100644 --- a/tests_errors/ptr-1.ispc +++ b/tests_errors/ptr-1.ispc @@ -1,4 +1,4 @@ -// Can't convert between incompatible pointer types +// Can't convert from pointer type "void * varying" to incompatible pointer type "uniform int32 * varying" for return statement int *foo(void *p) { return p; diff --git a/type.cpp b/type.cpp index 1aaeb2a6..2fb0a678 100644 --- a/type.cpp +++ b/type.cpp @@ -2466,18 +2466,7 @@ FunctionType::GetAsNonConstType() const { std::string FunctionType::GetString() const { - std::string ret; - if (isTask) ret += "task "; - if (isSafe) ret += "/*safe*/ "; - if (costOverride > 0) { - char buf[32]; - sprintf(buf, "/*cost=%d*/ ", costOverride); - ret += buf; - } - if (returnType != NULL) - ret += returnType->GetString(); - else - ret += "/* ERROR */"; + std::string ret = GetReturnTypeString(); ret += "("; for (unsigned int i = 0; i < paramTypes.size(); ++i) { if (paramTypes[i] == NULL) @@ -2554,6 +2543,9 @@ FunctionType::GetDIType(llvm::DIDescriptor scope) const { const std::string FunctionType::GetReturnTypeString() const { + if (returnType == NULL) + return "/* ERROR */"; + std::string ret; if (isTask) ret += "task "; @@ -2561,6 +2553,13 @@ FunctionType::GetReturnTypeString() const { ret += "export "; if (isExternC) ret += "extern \"C\" "; + if (isSafe) + ret += "/*safe*/ "; + if (costOverride > 0) { + char buf[32]; + sprintf(buf, "/*cost=%d*/ ", costOverride); + ret += buf; + } return ret + returnType->GetString(); } From b813452d33792caea23a3d2c7c4a2ee213a0cedd Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 3 Apr 2012 06:13:28 -0700 Subject: [PATCH 038/173] Don't issue a slew of warnings if a bogus cpu type is specified. Issue #221. --- ispc.cpp | 51 +++++++++++++++++++++++++++++++++++++-------------- ispc.h | 2 +- main.cpp | 2 +- 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/ispc.cpp b/ispc.cpp index 9eb808dc..4e39c0b2 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -113,6 +113,14 @@ lGetSystemISA() { } +static const char *supportedCPUs[] = { + "atom", "penryn", "core2", "corei7", +#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) + "corei7-avx" +#endif +}; + + bool Target::GetTarget(const char *arch, const char *cpu, const char *isa, bool pic, Target *t) { @@ -121,13 +129,13 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, // If a CPU was specified explicitly, try to pick the best // possible ISA based on that. #if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) - if (!strcasecmp(cpu, "sandybridge") || - !strcasecmp(cpu, "corei7-avx")) + if (!strcmp(cpu, "sandybridge") || + !strcmp(cpu, "corei7-avx")) isa = "avx"; else #endif - if (!strcasecmp(cpu, "corei7") || - !strcasecmp(cpu, "penryn")) + if (!strcmp(cpu, "corei7") || + !strcmp(cpu, "penryn")) isa = "sse4"; else isa = "sse2"; @@ -153,6 +161,22 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, cpu = "generic"; } } + else { + bool foundCPU = false; + for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0])); + ++i) { + if (!strcmp(cpu, supportedCPUs[i])) { + foundCPU = true; + break; + } + } + if (foundCPU == false) { + fprintf(stderr, "Error: CPU type \"%s\" unknown. Supported CPUs: " + "%s.\n", cpu, SupportedTargetCPUs().c_str()); + return false; + } + } + t->cpu = cpu; if (arch == NULL) @@ -309,17 +333,16 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, } -const char * +std::string Target::SupportedTargetCPUs() { - return "atom, barcelona, core2, corei7, " -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) - "corei7-avx, " -#endif - "istanbul, nocona, penryn, " -#ifdef LLVM_2_9 - "sandybridge, " -#endif - "westmere"; + std::string ret; + int count = sizeof(supportedCPUs) / sizeof(supportedCPUs[0]); + for (int i = 0; i < count; ++i) { + ret += supportedCPUs[i]; + if (i != count - 1) + ret += ", "; + } + return ret; } diff --git a/ispc.h b/ispc.h index 360b7d99..fb334141 100644 --- a/ispc.h +++ b/ispc.h @@ -164,7 +164,7 @@ struct Target { /** Returns a comma-delimited string giving the names of the currently supported target CPUs. */ - static const char *SupportedTargetCPUs(); + static std::string SupportedTargetCPUs(); /** Returns a comma-delimited string giving the names of the currently supported target architectures. */ diff --git a/main.cpp b/main.cpp index b29a9f0f..8c231b60 100644 --- a/main.cpp +++ b/main.cpp @@ -91,7 +91,7 @@ usage(int ret) { Target::SupportedTargetArchs()); printf(" [--c++-include-file=]\t\tSpecify name of file to emit in #include statement in generated C++ code.\n"); printf(" [--cpu=]\t\t\tSelect target CPU type\n"); - printf(" ={%s}\n", Target::SupportedTargetCPUs()); + printf(" ={%s}\n", Target::SupportedTargetCPUs().c_str()); printf(" [-D]\t\t\t\t#define given value when running preprocessor\n"); printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n"); #ifndef LLVM_2_9 From 4cd0cf1650317934bc8b3b93fed80c0b9020653f Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 3 Apr 2012 10:09:07 -0700 Subject: [PATCH 039/173] Revamp handling of function types, conversion to function ptr types. Implicit conversion to function types is now a more standard part of the type conversion infrastructure, rather than special cases of things like FunctionSymbolExpr immediately returning a pointer type, etc. Improved AddressOfExpr::TypeCheck() to actually issue errors in cases where it's illegal to take the address of an expression. Added AddressOfExpr::GetConstant() implementation that handles taking the address of functions. Issue #223. --- expr.cpp | 165 +++++++++++++++++++++++++++-------- expr.h | 1 + tests/funcptr-null-1.ispc | 2 +- tests/funcptr-null-3.ispc | 2 +- tests/funcptr-null-6.ispc | 2 +- tests/funcptr-uniform-2.ispc | 2 +- tests_errors/addr-of-1.ispc | 5 ++ type.cpp | 11 +++ 8 files changed, 149 insertions(+), 41 deletions(-) create mode 100644 tests_errors/addr-of-1.ispc diff --git a/expr.cpp b/expr.cpp index ebfac14f..f46347ee 100644 --- a/expr.cpp +++ b/expr.cpp @@ -212,11 +212,27 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, } if (dynamic_cast(fromType)) { - if (!failureOk) - Error(pos, "Can't convert function type \"%s\" to \"%s\" for %s.", - fromType->GetString().c_str(), - toType->GetString().c_str(), errorMsgBase); - return false; + if (dynamic_cast(toType) != NULL) { + // Convert function type to pointer to function type + if (expr != NULL) { + Expr *aoe = new AddressOfExpr(*expr, (*expr)->pos); + if (lDoTypeConv(aoe->GetType(), toType, &aoe, failureOk, + errorMsgBase, pos)) { + *expr = aoe; + return true; + } + } + else + return lDoTypeConv(PointerType::GetUniform(fromType), toType, NULL, + failureOk, errorMsgBase, pos); + } + else { + if (!failureOk) + Error(pos, "Can't convert function type \"%s\" to \"%s\" for %s.", + fromType->GetString().c_str(), + toType->GetString().c_str(), errorMsgBase); + return false; + } } if (dynamic_cast(toType)) { if (!failureOk) @@ -3434,10 +3450,15 @@ FunctionCallExpr::TypeCheck() { if (func == NULL) return NULL; - const PointerType *pt = - dynamic_cast(func->GetType()); - const FunctionType *ft = (pt == NULL) ? NULL : - dynamic_cast(pt->GetBaseType()); + const FunctionType *ft = + dynamic_cast(func->GetType()); + if (ft == NULL) { + const PointerType *pt = + dynamic_cast(func->GetType()); + ft = (pt == NULL) ? NULL : + dynamic_cast(pt->GetBaseType()); + } + if (ft == NULL) { Error(pos, "Valid function name must be used for function call."); return NULL; @@ -6774,6 +6795,34 @@ TypeCastExpr::GetBaseSymbol() const { } +static +llvm::Constant * +lConvertPointerConstant(llvm::Constant *c, const Type *constType) { + if (c == NULL || constType->IsUniformType()) + return c; + + // Handle conversion to int and then to vector of int or array of int + // (for varying and soa types, respectively) + llvm::Constant *intPtr = + llvm::ConstantExpr::getPtrToInt(c, LLVMTypes::PointerIntType); + Assert(constType->IsVaryingType() || constType->IsSOAType()); + int count = constType->IsVaryingType() ? g->target.vectorWidth : + constType->GetSOAWidth(); + + std::vector smear; + for (int i = 0; i < count; ++i) + smear.push_back(intPtr); + + if (constType->IsVaryingType()) + return llvm::ConstantVector::get(smear); + else { + LLVM_TYPE_CONST llvm::ArrayType *at = + llvm::ArrayType::get(LLVMTypes::PointerIntType, count); + return llvm::ConstantArray::get(at, smear); + } +} + + llvm::Constant * TypeCastExpr::GetConstant(const Type *constType) const { // We don't need to worry about most the basic cases where the type @@ -6781,11 +6830,18 @@ TypeCastExpr::GetConstant(const Type *constType) const { // TypeCastExpr::Optimize() method generally ends up doing the type // conversion and returning a ConstExpr, which in turn will have its // GetConstant() method called. However, because ConstExpr currently - // can't represent pointer values, we have to handle two cases here: - // 1. Null pointers (NULL, 0) valued initializers, and - // 2. Converting a uniform function pointer to a varying function - // pointer of the same type. - return expr->GetConstant(constType); + // can't represent pointer values, we have to handle a few cases + // related to pointers here: + // + // 1. Null pointer (NULL, 0) valued initializers + // 2. Converting function types to pointer-to-function types + // 3. And converting these from uniform to the varying/soa equivalents. + // + if (dynamic_cast(constType) == NULL) + return NULL; + + llvm::Constant *c = expr->GetConstant(constType->GetAsUniformType()); + return lConvertPointerConstant(c, constType); } @@ -7078,7 +7134,8 @@ AddressOfExpr::GetValue(FunctionEmitContext *ctx) const { return NULL; const Type *exprType = expr->GetType(); - if (dynamic_cast(exprType) != NULL) + if (dynamic_cast(exprType) != NULL || + dynamic_cast(exprType) != NULL) return expr->GetValue(ctx); else return expr->GetLValue(ctx); @@ -7093,8 +7150,18 @@ AddressOfExpr::GetType() const { const Type *exprType = expr->GetType(); if (dynamic_cast(exprType) != NULL) return PointerType::GetUniform(exprType->GetReferenceTarget()); - else - return expr->GetLValueType(); + + const Type *t = expr->GetLValueType(); + if (t != NULL) + return t; + else { + t = expr->GetType(); + if (t == NULL) { + Assert(m->errorCount > 0); + return NULL; + } + return PointerType::GetUniform(t); + } } @@ -7118,7 +7185,22 @@ AddressOfExpr::Print() const { Expr * AddressOfExpr::TypeCheck() { - return this; + const Type *exprType; + if (expr == NULL || (exprType = expr->GetType()) == NULL) { + Assert(m->errorCount > 0); + return NULL; + } + + if (dynamic_cast(exprType) != NULL|| + dynamic_cast(exprType) != NULL) { + return this; + } + + if (expr->GetLValueType() != NULL) + return this; + + Error(expr->pos, "Illegal to take address of non-lvalue or function."); + return NULL; } @@ -7134,6 +7216,29 @@ AddressOfExpr::EstimateCost() const { } +llvm::Constant * +AddressOfExpr::GetConstant(const Type *type) const { + const Type *exprType; + if (expr == NULL || (exprType = expr->GetType()) == NULL) { + Assert(m->errorCount > 0); + return NULL; + } + + const PointerType *pt = dynamic_cast(type); + if (pt == NULL) + return NULL; + + const FunctionType *ft = + dynamic_cast(pt->GetBaseType()); + if (ft != NULL) { + llvm::Constant *c = expr->GetConstant(ft); + return lConvertPointerConstant(c, type); + } + else + return NULL; +} + + /////////////////////////////////////////////////////////////////////////// // SizeOfExpr @@ -7313,8 +7418,7 @@ FunctionSymbolExpr::GetType() const { return NULL; } - return matchingFunc ? - new PointerType(matchingFunc->type, Variability::Uniform, true) : NULL; + return matchingFunc ? matchingFunc->type : NULL; } @@ -7364,27 +7468,14 @@ FunctionSymbolExpr::GetConstant(const Type *type) const { if (matchingFunc == NULL || matchingFunc->function == NULL) return NULL; - const FunctionType *ft; - if (dynamic_cast(type) == NULL || - (ft = dynamic_cast(type->GetBaseType())) == NULL) + const FunctionType *ft = dynamic_cast(type); + if (ft == NULL) return NULL; - LLVM_TYPE_CONST llvm::Type *llvmUnifType = - type->GetAsUniformType()->LLVMType(g->ctx); - if (llvmUnifType != matchingFunc->function->getType()) + if (Type::Equal(type, matchingFunc->type) == false) return NULL; - if (type->IsUniformType()) - return matchingFunc->function; - else { - llvm::Constant *intPtr = - llvm::ConstantExpr::getPtrToInt(matchingFunc->function, - LLVMTypes::PointerIntType); - std::vector smear; - for (int i = 0; i < g->target.vectorWidth; ++i) - smear.push_back(intPtr); - return llvm::ConstantVector::get(smear); - } + return matchingFunc->function; } diff --git a/expr.h b/expr.h index e7461a1a..f7d112b9 100644 --- a/expr.h +++ b/expr.h @@ -584,6 +584,7 @@ public: Expr *TypeCheck(); Expr *Optimize(); int EstimateCost() const; + llvm::Constant *GetConstant(const Type *type) const; Expr *expr; }; diff --git a/tests/funcptr-null-1.ispc b/tests/funcptr-null-1.ispc index 05798918..784b5ada 100644 --- a/tests/funcptr-null-1.ispc +++ b/tests/funcptr-null-1.ispc @@ -15,7 +15,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; float b = aFOO[0]-1; uniform FuncType func = foo; - RET[programIndex] = (func ? func : bar)(a, b); + RET[programIndex] = (func ? func : &bar)(a, b); } export void result(uniform float RET[]) { diff --git a/tests/funcptr-null-3.ispc b/tests/funcptr-null-3.ispc index 8e228315..3fd74da0 100644 --- a/tests/funcptr-null-3.ispc +++ b/tests/funcptr-null-3.ispc @@ -14,7 +14,7 @@ static float bar(float a, float b) { export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; float b = aFOO[0]-1; - FuncType func = foo; + FuncType func = &foo; if (a == 2) func = NULL; if (func != NULL) diff --git a/tests/funcptr-null-6.ispc b/tests/funcptr-null-6.ispc index cf92c4a7..45bcfcdd 100644 --- a/tests/funcptr-null-6.ispc +++ b/tests/funcptr-null-6.ispc @@ -16,7 +16,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) { float b = aFOO[0]-1; FuncType func = NULL; if (a == 2) - func = foo; + func = &foo; if (!func) RET[programIndex] = -1; else diff --git a/tests/funcptr-uniform-2.ispc b/tests/funcptr-uniform-2.ispc index 849c9492..59d54b40 100644 --- a/tests/funcptr-uniform-2.ispc +++ b/tests/funcptr-uniform-2.ispc @@ -14,7 +14,7 @@ static float bar(float a, float b) { export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; float b = aFOO[0]-1; - uniform FuncType func = bar; + uniform FuncType func = &bar; if (aFOO[0] == 1) func = foo; RET[programIndex] = func(a, b); diff --git a/tests_errors/addr-of-1.ispc b/tests_errors/addr-of-1.ispc new file mode 100644 index 00000000..4d770f01 --- /dev/null +++ b/tests_errors/addr-of-1.ispc @@ -0,0 +1,5 @@ +// Illegal to take address of non-lvalue or function + +void foo() { + int *ptr = &(1+1); +} diff --git a/type.cpp b/type.cpp index 2fb0a678..8fffb682 100644 --- a/type.cpp +++ b/type.cpp @@ -2695,6 +2695,17 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char bool forceVarying, int vecSize) { Assert(reason != NULL); + // First, if one or both types are function types, convert them to + // pointer to function types and then try again. + if (dynamic_cast(t0) || + dynamic_cast(t1)) { + if (dynamic_cast(t0)) + t0 = PointerType::GetUniform(t0); + if (dynamic_cast(t1)) + t1 = PointerType::GetUniform(t1); + return MoreGeneralType(t0, t1, pos, reason, forceVarying, vecSize); + } + // First, if we need to go varying, promote both of the types to be // varying. if (t0->IsVaryingType() || t1->IsVaryingType() || forceVarying) { From 391678a5b3cf31f986c7688e26001181c5e3afc9 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 3 Apr 2012 10:40:41 -0700 Subject: [PATCH 040/173] Update function overload resolution logic. Closer compatibility with C++: given a non-reference type, treat matching to a non-const reference of that type as a better match than a const reference of that type (rather than both being equal cost). Issue #224. --- expr.cpp | 69 +++++++++++++++++++++++++++-------- tests/func-overload-refs.ispc | 14 +++++++ 2 files changed, 67 insertions(+), 16 deletions(-) create mode 100644 tests/func-overload-refs.ispc diff --git a/expr.cpp b/expr.cpp index f46347ee..81f51151 100644 --- a/expr.cpp +++ b/expr.cpp @@ -7497,7 +7497,24 @@ lPrintOverloadCandidates(SourcePos pos, const std::vector &funcs, Error(pos, "%s", passedTypes.c_str()); } - + +static bool +lIsMatchToNonConstReference(const Type *callType, const Type *funcArgType) { + return (dynamic_cast(funcArgType) && + (funcArgType->IsConstType() == false) && + Type::Equal(callType, funcArgType->GetReferenceTarget())); +} + + +static bool +lIsMatchToNonConstReferenceUnifToVarying(const Type *callType, + const Type *funcArgType) { + return (dynamic_cast(funcArgType) && + (funcArgType->IsConstType() == false) && + Type::Equal(callType->GetAsVaryingType(), + funcArgType->GetReferenceTarget())); +} + /** Helper function used for function overload resolution: returns true if converting the argument to the call type only requires a type conversion that won't lose information. Otherwise return false. @@ -7597,6 +7614,20 @@ FunctionSymbolExpr::getCandidateFunctions(int argCount) const { } +static bool +lArgIsPointerType(const Type *type) { + if (dynamic_cast(type) != NULL) + return true; + + const ReferenceType *rt = dynamic_cast(type); + if (rt == NULL) + return false; + + const Type *t = rt->GetReferenceTarget(); + return (dynamic_cast(t) != NULL); +} + + /** This function computes the value of a cost function that represents the cost of calling a function of the given type with arguments of the given types. If it's not possible to call the function, regardless of @@ -7623,19 +7654,11 @@ FunctionSymbolExpr::computeOverloadCost(const FunctionType *ftype, const Type *fargType = ftype->GetParameterType(i); const Type *callType = argTypes[i]; - // For convenience, normalize to non-const types (except for - // references, where const-ness matters). For all other types, - // we're passing by value anyway, so const doesn't matter. - if (dynamic_cast(callType) == NULL) - callType = callType->GetAsNonConstType(); - if (dynamic_cast(fargType) == NULL) - fargType = fargType->GetAsNonConstType(); - if (Type::Equal(callType, fargType)) // Perfect match: no cost costSum += 0; else if (argCouldBeNULL && (*argCouldBeNULL)[i] && - dynamic_cast(fargType) != NULL) + lArgIsPointerType(fargType)) // Passing NULL to a pointer-typed parameter is also a no-cost // operation costSum += 0; @@ -7645,19 +7668,33 @@ FunctionSymbolExpr::computeOverloadCost(const FunctionType *ftype, // cost if it wasn't--so scale up the cost when this isn't the // case.. if (argIsConstant == NULL || (*argIsConstant)[i] == false) - costScale *= 32; + costScale *= 128; + + // For convenience, normalize to non-const types (except for + // references, where const-ness matters). For all other types, + // we're passing by value anyway, so const doesn't matter. + const Type *callTypeNC = callType, *fargTypeNC = fargType; + if (dynamic_cast(callType) == NULL) + callTypeNC = callType->GetAsNonConstType(); + if (dynamic_cast(fargType) == NULL) + fargTypeNC = fargType->GetAsNonConstType(); - if (Type::Equal(callType, fargType)) + if (Type::Equal(callTypeNC, fargTypeNC)) // Exact match (after dealing with references, above) costSum += 1 * costScale; - else if (lIsMatchWithTypeWidening(callType, fargType)) + // note: orig fargType for the next two... + else if (lIsMatchToNonConstReference(callTypeNC, fargType)) costSum += 2 * costScale; - else if (lIsMatchWithUniformToVarying(callType, fargType)) + else if (lIsMatchToNonConstReferenceUnifToVarying(callTypeNC, fargType)) costSum += 4 * costScale; - else if (lIsMatchWithTypeConvSameVariability(callType, fargType)) + else if (lIsMatchWithTypeWidening(callTypeNC, fargTypeNC)) costSum += 8 * costScale; - else if (CanConvertTypes(callType, fargType)) + else if (lIsMatchWithUniformToVarying(callTypeNC, fargTypeNC)) costSum += 16 * costScale; + else if (lIsMatchWithTypeConvSameVariability(callTypeNC, fargTypeNC)) + costSum += 32 * costScale; + else if (CanConvertTypes(callTypeNC, fargTypeNC)) + costSum += 64 * costScale; else // Failure--no type conversion possible... return -1; diff --git a/tests/func-overload-refs.ispc b/tests/func-overload-refs.ispc new file mode 100644 index 00000000..89184812 --- /dev/null +++ b/tests/func-overload-refs.ispc @@ -0,0 +1,14 @@ + +export uniform int width() { return programCount; } + +float foo(float &a) { return 1; } +float foo(const float &a) { return 2; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float x = 0; + RET[programIndex] = foo(x); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1; +} From 637d076e99c174152da010cddaa6378484892d9f Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 3 Apr 2012 12:18:38 -0700 Subject: [PATCH 041/173] Remove half/float conversion functions from AVX2 output. (We were leaving around unused/unnecessary __half_to_float_uniform and the like, which in turn called out to the corresponding instruction.) --- builtins.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/builtins.cpp b/builtins.cpp index 0e34596d..3738858f 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -411,12 +411,16 @@ lSetInternalFunctions(llvm::Module *module) { "__extract_int64", "__extract_int8", "__fastmath", + "__float_to_half_uniform", + "__float_to_half_varying", "__floatbits_uniform_int32", "__floatbits_varying_int32", "__floor_uniform_double", "__floor_uniform_float", "__floor_varying_double", "__floor_varying_float", + "__half_to_float_uniform", + "__half_to_float_varying", "__insert_int16", "__insert_int32", "__insert_int64", From c27418da776a719f76f46622cf49cd8ebfdeedd2 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 4 Apr 2012 05:56:22 -0700 Subject: [PATCH 042/173] Add checks about references to non-lvalues. Both ReturnStmt and DeclStmt now check the values being associated with references to make sure that they are legal (e.g. it's illegal to assign a varying lvalue, or a compile-time constant to a reference type). Previously we didn't catch this and would end up hitting assertions in LLVM when code did this stuff. Mostly fixes issue #225 (except for adding a FAQ about what this error message means.) --- ast.cpp | 2 +- stmt.cpp | 57 ++++++++++++++++++++++++----- stmt.h | 6 +-- tests_errors/ref-initializer-1.ispc | 6 +++ tests_errors/ref-initializer-2.ispc | 6 +++ tests_errors/ref-initializer-3.ispc | 6 +++ tests_errors/return-ref-1.ispc | 5 +++ tests_errors/return-ref-2.ispc | 5 +++ type.h | 4 ++ 9 files changed, 83 insertions(+), 14 deletions(-) create mode 100644 tests_errors/ref-initializer-1.ispc create mode 100644 tests_errors/ref-initializer-2.ispc create mode 100644 tests_errors/ref-initializer-3.ispc create mode 100644 tests_errors/return-ref-1.ispc create mode 100644 tests_errors/return-ref-2.ispc diff --git a/ast.cpp b/ast.cpp index 445ef4c7..0c71cdc8 100644 --- a/ast.cpp +++ b/ast.cpp @@ -153,7 +153,7 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, else if ((ls = dynamic_cast(node)) != NULL) ls->stmt = (Stmt *)WalkAST(ls->stmt, preFunc, postFunc, data); else if ((rs = dynamic_cast(node)) != NULL) - rs->val = (Expr *)WalkAST(rs->val, preFunc, postFunc, data); + rs->expr = (Expr *)WalkAST(rs->expr, preFunc, postFunc, data); else if ((sl = dynamic_cast(node)) != NULL) { std::vector &sls = sl->stmts; for (unsigned int i = 0; i < sls.size(); ++i) diff --git a/stmt.cpp b/stmt.cpp index 14032e7b..bf7fa661 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -40,6 +40,7 @@ #include "util.h" #include "expr.h" #include "type.h" +#include "func.h" #include "sym.h" #include "module.h" #include "llvmutil.h" @@ -167,11 +168,25 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { } // References must have initializer expressions as well. - if (dynamic_cast(sym->type) && initExpr == NULL) { - Error(sym->pos, - "Must provide initializer for reference-type variable \"%s\".", - sym->name.c_str()); - continue; + if (IsReferenceType(sym->type) == true) { + if (initExpr == NULL) { + Error(sym->pos, "Must provide initializer for reference-type " + "variable \"%s\".", sym->name.c_str()); + continue; + } + if (IsReferenceType(initExpr->GetType()) == false) { + const Type *initLVType = initExpr->GetLValueType(); + if (initLVType == NULL) { + Error(initExpr->pos, "Initializer for reference-type variable " + "\"%s\" must have an lvalue type.", sym->name.c_str()); + continue; + } + if (initLVType->IsUniformType() == false) { + Error(initExpr->pos, "Initializer for reference-type variable " + "\"%s\" must have a uniform lvalue type.", sym->name.c_str()); + continue; + } + } } LLVM_TYPE_CONST llvm::Type *llvmType = sym->type->LLVMType(g->ctx); @@ -2173,8 +2188,8 @@ SwitchStmt::EstimateCost() const { /////////////////////////////////////////////////////////////////////////// // ReturnStmt -ReturnStmt::ReturnStmt(Expr *v, bool cc, SourcePos p) - : Stmt(p), val(v), +ReturnStmt::ReturnStmt(Expr *e, bool cc, SourcePos p) + : Stmt(p), expr(e), doCoherenceCheck(cc && !g->opt.disableCoherentControlFlow) { } @@ -2189,8 +2204,29 @@ ReturnStmt::EmitCode(FunctionEmitContext *ctx) const { return; } + // Make sure we're not trying to return a reference to something where + // that doesn't make sense + const Function *func = ctx->GetFunction(); + const Type *returnType = func->GetReturnType(); + if (IsReferenceType(returnType) == true && + IsReferenceType(expr->GetType()) == false) { + const Type *lvType = expr->GetLValueType(); + if (lvType == NULL) { + Error(expr->pos, "Illegal to return non-lvalue from function " + "returning reference type \"%s\".", + returnType->GetString().c_str()); + return; + } + else if (lvType->IsUniformType() == false) { + Error(expr->pos, "Illegal to return varying lvalue type from " + "function returning a reference type \"%s\".", + returnType->GetString().c_str()); + return; + } + } + ctx->SetDebugPos(pos); - ctx->CurrentLanesReturned(val, doCoherenceCheck); + ctx->CurrentLanesReturned(expr, doCoherenceCheck); } @@ -2210,7 +2246,8 @@ void ReturnStmt::Print(int indent) const { printf("%*c%sReturn Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : ""); pos.Print(); - if (val) val->Print(); + if (expr) + expr->Print(); else printf("(void)"); printf("\n"); } diff --git a/stmt.h b/stmt.h index da418ec7..88115ab2 100644 --- a/stmt.h +++ b/stmt.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -265,7 +265,7 @@ public: statement in the program. */ class ReturnStmt : public Stmt { public: - ReturnStmt(Expr *v, bool cc, SourcePos p); + ReturnStmt(Expr *e, bool cc, SourcePos p); void EmitCode(FunctionEmitContext *ctx) const; void Print(int indent) const; @@ -273,7 +273,7 @@ public: Stmt *TypeCheck(); int EstimateCost() const; - Expr *val; + Expr *expr; /** This indicates whether the generated code will check to see if no more program instances are currently running after the return, in which case the code can possibly jump to the end of the current diff --git a/tests_errors/ref-initializer-1.ispc b/tests_errors/ref-initializer-1.ispc new file mode 100644 index 00000000..c926c793 --- /dev/null +++ b/tests_errors/ref-initializer-1.ispc @@ -0,0 +1,6 @@ +// Initializer for reference-type variable "x" must have an lvalue type + +float &func(uniform float a[], int i, float f) { + float &x = 1.; // a[i]; +} + diff --git a/tests_errors/ref-initializer-2.ispc b/tests_errors/ref-initializer-2.ispc new file mode 100644 index 00000000..4612addf --- /dev/null +++ b/tests_errors/ref-initializer-2.ispc @@ -0,0 +1,6 @@ +// Initializer for reference-type variable "x" must have a uniform lvalue type + +float &func(uniform float a[], int i, float f) { + float &x = a[i]; +} + diff --git a/tests_errors/ref-initializer-3.ispc b/tests_errors/ref-initializer-3.ispc new file mode 100644 index 00000000..27833b54 --- /dev/null +++ b/tests_errors/ref-initializer-3.ispc @@ -0,0 +1,6 @@ +// Initializer for reference-type variable "x" must have a uniform lvalue type + +float &func(uniform int a[], int i, float f) { + float &x = a[i]; +} + diff --git a/tests_errors/return-ref-1.ispc b/tests_errors/return-ref-1.ispc new file mode 100644 index 00000000..fee20b18 --- /dev/null +++ b/tests_errors/return-ref-1.ispc @@ -0,0 +1,5 @@ +// Illegal to return non-lvalue from function returning reference type + +float &func(uniform float a[], int i, float f) { + return 1.f; +} diff --git a/tests_errors/return-ref-2.ispc b/tests_errors/return-ref-2.ispc new file mode 100644 index 00000000..6ed667c1 --- /dev/null +++ b/tests_errors/return-ref-2.ispc @@ -0,0 +1,5 @@ +// Illegal to return varying lvalue type from function returning a reference type + +float &func(uniform float a[], int i, float f) { + return a[i]; +} diff --git a/type.h b/type.h index 5c690e60..fdcc41e6 100644 --- a/type.h +++ b/type.h @@ -828,4 +828,8 @@ private: const std::vector paramPositions; }; +inline bool IsReferenceType(const Type *t) { + return dynamic_cast(t) != NULL; +} + #endif // ISPC_TYPE_H From 1dac05960a5372c7299ba2187dc230b2af090425 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 5 Apr 2012 08:17:56 -0700 Subject: [PATCH 043/173] Fix build with LLVM 3.1 ToT --- Makefile | 2 +- cbackend.cpp | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e39eb831..6cf14be1 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ ARCH_TYPE = $(shell arch) ifeq ($(shell llvm-config --version), 3.1svn) LLVM_LIBS=-lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker \ -lLLVMArchive -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo \ - -lLLVMBitWriter -lLLVMTableGen -lLLVMCBackendInfo \ + -lLLVMBitWriter -lLLVMTableGen \ -lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMSelectionDAG \ -lLLVMAsmPrinter -lLLVMX86AsmParser -lLLVMX86Desc -lLLVMX86Info \ -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCDisassembler -lLLVMMCParser \ diff --git a/cbackend.cpp b/cbackend.cpp index e39f8461..45d1a791 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -933,6 +933,20 @@ void CWriter::printConstantDataSequential(ConstantDataSequential *CDS, } #endif // LLVM_3_1svn +#ifdef LLVM_3_1svn +static inline std::string ftostr(const APFloat& V) { + std::string Buf; + if (&V.getSemantics() == &APFloat::IEEEdouble) { + raw_string_ostream(Buf) << V.convertToDouble(); + return Buf; + } else if (&V.getSemantics() == &APFloat::IEEEsingle) { + raw_string_ostream(Buf) << (double)V.convertToFloat(); + return Buf; + } + return ""; // error +} +#endif // LLVM_3_1svn + // isFPCSafeToPrint - Returns true if we may assume that CFP may be written out // textually as a double (rather than as a reference to a stack-allocated // variable). We decide this by converting CFP to a string and back into a From 4c9ac7fcf120a9ddf0dd59f824be227972d84395 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 5 Apr 2012 08:22:40 -0700 Subject: [PATCH 044/173] Fix build with LLVM 2.9. --- llvmutil.cpp | 5 ++++- opt.cpp | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/llvmutil.cpp b/llvmutil.cpp index 2ba60dbb..9f4fc658 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -1443,7 +1443,10 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore, llvm::Instruction *phiInsertPos = phi->getParent()->begin(); llvm::PHINode *scalarPhi = llvm::PHINode::Create(vt->getElementType(), - phi->getNumIncomingValues(), newName, +#ifndef LLVM_2_9 + phi->getNumIncomingValues(), +#endif // !LLVM_2_9 + newName, phiInsertPos); phiMap[phi] = scalarPhi; diff --git a/opt.cpp b/opt.cpp index 1ebfd4a4..9159c495 100644 --- a/opt.cpp +++ b/opt.cpp @@ -1130,7 +1130,11 @@ lExtractFromInserts(llvm::Value *v, unsigned int index) { return NULL; Assert(iv->hasIndices() && iv->getNumIndices() == 1); +#ifdef LLVM_2_9 + if (*(iv->idx_begin()) == index) +#else if (iv->getIndices()[0] == index) +#endif return iv->getInsertedValueOperand(); else return lExtractFromInserts(iv->getAggregateOperand(), index); @@ -1260,7 +1264,11 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets, llvm::ExtractValueInst *ev = llvm::dyn_cast(ptrs); if (ev != NULL) { Assert(ev->getNumIndices() == 1); +#ifdef LLVM_2_9 + int index = *(ev->idx_begin()); +#else int index = ev->getIndices()[0]; +#endif ptrs = lExtractFromInserts(ev->getAggregateOperand(), index); if (ptrs != NULL) return lGetBasePtrAndOffsets(ptrs, offsets, insertBefore); From 4f8cf019caa8a2421bd62bd0c6db83eb9a7e29f2 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 5 Apr 2012 08:49:39 -0700 Subject: [PATCH 045/173] Add pass to verify module before starting optimizations. --- opt.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/opt.cpp b/opt.cpp index 9159c495..5bc3737e 100644 --- a/opt.cpp +++ b/opt.cpp @@ -286,6 +286,8 @@ Optimize(llvm::Module *module, int optLevel) { llvm::PassManager optPM; llvm::FunctionPassManager funcPM(module); + optPM.add(llvm::createVerifierPass()); + if (g->target.isa != Target::GENERIC) { llvm::TargetLibraryInfo *targetLibraryInfo = new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple())); From c7dc8862a55c612a0576c1451ccdd7208076ef34 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 5 Apr 2012 15:24:26 -0700 Subject: [PATCH 046/173] Add FAQs about various language details. One of these finishes off issue #225. --- docs/faq.rst | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index 3fc9f9e2..a3517bea 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -14,6 +14,12 @@ distribution. + `Why are there multiple versions of exported ispc functions in the assembly output?`_ + `How can I more easily see gathers and scatters in generated assembly?`_ +* Language Details + + + `What is the difference between "int *foo" and "int foo[]"?`_ + + `Why are pointed-to types "uniform" by default?`_ + + `What am I getting an error about assigning a varying lvalue to a reference type?`_ + * Interoperability + `How can I supply an initial execution mask in the call from the application?`_ @@ -214,6 +220,125 @@ easier to understand: jmp ___pseudo_scatter_base_offsets32_32 ## TAILCALL +Language Details +================ + +What is the difference between "int \*foo" and "int foo[]"? +----------------------------------------------------------- + +In C and C++, declaring a function to take a parameter ``int *foo`` and +``int foo[]`` results in the same type for the parameter. Both are +pointers to integers. In ``ispc``, these are different types. The first +one is a varying pointer to a uniform integer value in memory, while the +second results in a uniform pointer to the start of an array of varying +integer values in memory. + +To understand why the first is a varying pointer to a uniform integer, +first recall that types without explicit rate qualifiers (``uniform``, +``varying``, or ``soa<>``) are ``varying`` by default. Second, recall from +the `discussion of pointer types in the ispc User's Guide`_ that pointed-to +types without rate qualifiers are ``uniform`` by default. (This second +rule is discussed further below, in `Why are pointed-to types "uniform" by +default?`_.) The type of ``int *foo`` follows from these. + +.. _discussion of pointer types in the ispc User's Guide: ispc.html#pointer-types + +Conversely, in a function body, ``int foo[10]`` represents a declaration of +a 10-element array of varying ``int`` values. In that we'd certainly like +to be able to pass such an array to a function that takes a ``int []`` +parameter, the natural type for an ``int []`` parameter is a uniform +pointer to varying integer values. + +In terms of compatibility with C/C++, it's unfortunate that this +distinction exists, though any other set of rules seems to introduce more +awkwardness than this one. (Though we're interested to hear ideas to +improve these rules!). + +Why are pointed-to types "uniform" by default? +---------------------------------------------- + +In ``ispc``, types without rate qualifiers are "varying" by default, but +types pointed to by pointers without rate qualifiers are "uniform" by +default. Why this difference? + +:: + + int foo; // no rate qualifier, "varying int". + uniform int *foo; // pointer type has no rate qualifier, pointed-to does. + // "varying pointer to uniform int". + int *foo; // neither pointer type nor pointed-to type ("int") have + // rate qualifiers. Pointer type is varying by default, + // pointed-to is uniform. "varying pointer to uniform int". + varying int *foo; // varying pointer to varying int + +The first rule, having types without rate qualifiers be varying by default, +is a default that keeps the number of "uniform" or "varying" qualifiers in +``ispc`` programs low. Most ``ispc`` programs use mostly "varying" +variables, so this rule allows most variables to be declared without also +requiring rate qualifiers. + +On a related note, this rule allows many C/C++ functions to be used to +define equivalent functions in the SPMD execution model that ``ispc`` +provides with little or no modification: + +:: + + // scalar add in C/C++, SPMD/vector add in ispc + int add(int a, int b) { return a + b; } + +This motivation also explains why ``uniform int *foo`` represents a varying +pointer; having pointers be varying by default if they don't have rate +qualifiers similarly helps with porting code from C/C++ to ``ispc``. + +The tricker issue is why pointed-to types are "uniform" by default. In our +experience, data in memory that is accessed via pointers is most often +uniform; this generally includes all data that has been allocated and +initialized by the C/C++ application code. In practice, "varying" types are +more generally (but not exclusively) used for local data in ``ispc`` +functions. Thus, making the pointed-to type uniform by default leads to +more concise code for the most common cases. + + +What am I getting an error about assigning a varying lvalue to a reference type? +-------------------------------------------------------------------------------- + +Given code like the following: + +:: + + uniform float a[...]; + int index = ...; + float &r = a[index]; + +``ispc`` issues the error "Initializer for reference-type variable "r" must +have a uniform lvalue type.". The underlying issue stems from how +references are represented in the code generated by ``ispc``. Recall that +``ispc`` supports both uniform and varying pointer types--a uniform pointer +points to the same location in memory for all program instances in the +gang, while a varying pointer allows each program instance to have its own +pointer value. + +References are represented a pointer in the code generated by ``ispc``, +though this is generally opaque to the user; in ``ispc``, they are +specifically uniform pointers. This design decision was made so that given +code like this: + +:: + + extern void func(float &val); + float foo = ...; + func(foo); + +Then the reference would be handled efficiently as a single pointer, rather +than unnecessarily being turned into a gang-size of pointers. + +However, an implication of this decision is that it's not possible for +references to refer to completely different things for each of the program +instances. (And hence the error that is issued). In cases where a unique +per-program-instance pointer is needed, a varying pointer should be used +instead of a reference. + + Interoperability ================ From 581472564dbfc39cf07c90353ba55d8074e46b5c Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 5 Apr 2012 15:51:44 -0700 Subject: [PATCH 047/173] Print "friendly" ispc message when abort/seg fault signal is thrown. Make crashes that happen in LLVM less inscrutable. Issue #222. --- main.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/main.cpp b/main.cpp index 8c231b60..cc6fd778 100644 --- a/main.cpp +++ b/main.cpp @@ -44,7 +44,6 @@ #ifdef ISPC_IS_WINDOWS #include #endif // ISPC_IS_WINDOWS -#include #include #if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) #include @@ -202,17 +201,18 @@ static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128]) { } +static void +lSignal(void *) { + FATAL("Unhandled signal sent to process; terminating."); +} + + int main(int Argc, char *Argv[]) { int argc; char *argv[128]; lGetAllArgs(Argc, Argv, argc, argv); -#if 0 - // Use LLVM's little utility function to print out nice stack traces if - // we crash - llvm::sys::PrintStackTraceOnErrorSignal(); - llvm::PrettyStackTraceProgram X(argc, argv); -#endif + llvm::sys::AddSignalHandler(lSignal, NULL); // initialize available LLVM targets LLVMInitializeX86TargetInfo(); From 95556811fa4cf0428de9b38df011db18a56a888e Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 5 Apr 2012 20:39:39 -0700 Subject: [PATCH 048/173] Fix linux build --- cbackend.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cbackend.cpp b/cbackend.cpp index 45d1a791..c3f6d0f7 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -16,6 +16,8 @@ #warning "The C++ backend isn't supported when building with LLVM 2.9" #else +#include + #ifndef _MSC_VER #include #endif From 802add1f979d9cfd67c46ec8689b9aeaafa9eae0 Mon Sep 17 00:00:00 2001 From: Nipunn Koorapati Date: Fri, 6 Apr 2012 17:54:55 -0400 Subject: [PATCH 049/173] Added to the Makefile the ability to point to a custom installation of llvm and clang. --- Makefile | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index e39eb831..3b55b25a 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,15 @@ # ispc Makefile # +# If you have your own special version of llvm and/or clang, change +# these variables to match. +LLVM_CONFIG=$(shell which llvm-config) +CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir) + +# Add llvm bin to the path so any scripts run will go to the right llvm-config +LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir) +export PATH:=$(LLVM_BIN):$(PATH) + ARCH_OS = $(shell uname) ifeq ($(ARCH_OS), Darwin) ARCH_OS2 = "OSX" @@ -10,7 +19,7 @@ else endif ARCH_TYPE = $(shell arch) -ifeq ($(shell llvm-config --version), 3.1svn) +ifeq ($(shell $(LLVM_CONFIG) --version), 3.1svn) LLVM_LIBS=-lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker \ -lLLVMArchive -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo \ -lLLVMBitWriter -lLLVMTableGen -lLLVMCBackendInfo \ @@ -22,18 +31,18 @@ ifeq ($(shell llvm-config --version), 3.1svn) -lLLVMExecutionEngine -lLLVMTarget -lLLVMMC -lLLVMObject -lLLVMCore \ -lLLVMSupport else - LLVM_LIBS=$(shell llvm-config --libs) + LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs) endif CLANG=clang CLANG_LIBS = -lclangFrontend -lclangDriver \ -lclangSerialization -lclangParse -lclangSema \ -lclangAnalysis -lclangAST -lclangLex -lclangBasic -ifeq ($(shell llvm-config --version), 3.1svn) +ifeq ($(shell $(LLVM_CONFIG) --version), 3.1svn) CLANG_LIBS += -lclangEdit endif -ISPC_LIBS=$(shell llvm-config --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \ +ISPC_LIBS=$(shell $(LLVM_CONFIG) --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \ -lpthread ifeq ($(ARCH_OS),Linux) @@ -44,8 +53,8 @@ ifeq ($(ARCH_OS2),Msys) ISPC_LIBS += -lshlwapi -limagehlp -lpsapi endif -LLVM_CXXFLAGS=$(shell llvm-config --cppflags) -LLVM_VERSION=LLVM_$(shell llvm-config --version | sed s/\\./_/) +LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags) +LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed s/\\./_/) LLVM_VERSION_DEF=-D$(LLVM_VERSION) BUILD_DATE=$(shell date +%Y%m%d) @@ -54,7 +63,8 @@ BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1) CXX=g++ CPP=cpp OPT=-g3 -CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -Wall $(LLVM_VERSION_DEF) \ +CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE) \ + -Wall $(LLVM_VERSION_DEF) \ -DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\"" LDFLAGS= From c8feee238b2a9c7da1c1d6a0354e688422a3391c Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 6 Apr 2012 15:30:54 -0700 Subject: [PATCH 050/173] Bump release number to 1.2.1 --- ispc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ispc.h b/ispc.h index fb334141..a349573f 100644 --- a/ispc.h +++ b/ispc.h @@ -38,7 +38,7 @@ #ifndef ISPC_H #define ISPC_H -#define ISPC_VERSION "1.2.1dev" +#define ISPC_VERSION "1.2.1" #if !defined(LLVM_2_9) && !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn) #error "Only LLVM 2.9, 3.0, and the 3.1 development branch are supported" From d0e583b29c754abc6d7adbb26591e5b80f90c553 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 6 Apr 2012 16:02:19 -0700 Subject: [PATCH 051/173] Release notes and doxygen version nubmer bump for 1.2.1 --- docs/ReleaseNotes.txt | 44 +++++++++++++++++++++++++++++++++++++++++++ docs/news.rst | 18 ++++++++++++++++++ doxygen.cfg | 2 +- 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/docs/ReleaseNotes.txt b/docs/ReleaseNotes.txt index 62f46289..f99066ac 100644 --- a/docs/ReleaseNotes.txt +++ b/docs/ReleaseNotes.txt @@ -1,3 +1,47 @@ +=== v1.2.1 === (6 April 2012) + +This release contains only minor new functionality and is mostly for many +small bugfixes and improvements to error handling and error reporting. +The new functionality that is present is: + +* Significantly more efficient versions of the float / half conversion + routines are now available in the standard library, thanks to Fabian + Giesen. + +* The last member of a struct can now be a zero-length array; this allows + the trick of dynamically allocating enough storage for the struct and + some number of array elements at the end of it. + +Significant bugs fixed include: + +* Issue #205: When a target ISA isn't specified, use the host system's + capabilities to choose a target for which it will be able to run the + generated code. + +* Issues #215 and #217: Don't allocate storage for global variables that + are declared "extern". + +* Issue #197: Allow NULL as a default argument value in a function + declaration. + +* Issue #223: Fix bugs where taking the address of a function wouldn't work + as expected. + +* Issue #224: When there are overloaded variants of a function that take + both reference and const reference parameters, give the non-const + reference preference when matching values of that underlying type. + +* Issue #225: An error is issed when a varying lvalue is assigned to a + reference type (rather than crashing). + +* Issue #193: Permit conversions from array types to void *, not just the + pointer type of the underlying array element. + +* Issue #199: Still evaluate expressions that are cast to (void). + +The documentation has also been improved, with FAQs added to clarify some +aspects of the ispc pointer model. + === v1.2.0 === (20 March 2012) This is a major new release of ispc, with a number of significant diff --git a/docs/news.rst b/docs/news.rst index e875b077..ad6c4bd5 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -2,6 +2,24 @@ ispc News ========= +ispc 1.2.1 is Released +---------------------- + +This is a bugfix release, fixing approximately 20 bugs in the system and +improving error handling and error reporting. New functionality includes +very efficient float/half conversion routines thanks to Fabian +Giesen. See the `1.2.1 release notes`_ for details. + +.. _1.2.1 release notes: https://github.com/ispc/ispc/tree/master/docs/ReleaseNotes.txt + +ispc 1.2.0 is Released +----------------------- + +A new major release was posted on March 20, 2012. This release includes +significant new functionality for cleanly handling "structure of arrays" +(SoA) data layout and a new model for how uniform and varying are handled +with structure types. + Paper on ispc To Appear in InPar 2012 ------------------------------------- diff --git a/doxygen.cfg b/doxygen.cfg index f8637ddf..1659fbdd 100644 --- a/doxygen.cfg +++ b/doxygen.cfg @@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 1.2.0 +PROJECT_NUMBER = 1.2.1 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. From 8475dc082a78b8c41f92d2cdcbbb290f3ec1a16c Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 6 Apr 2012 16:16:50 -0700 Subject: [PATCH 052/173] Bump version number to 1.2.2dev --- ispc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ispc.h b/ispc.h index a349573f..9e25baa0 100644 --- a/ispc.h +++ b/ispc.h @@ -38,7 +38,7 @@ #ifndef ISPC_H #define ISPC_H -#define ISPC_VERSION "1.2.1" +#define ISPC_VERSION "1.2.2dev" #if !defined(LLVM_2_9) && !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn) #error "Only LLVM 2.9, 3.0, and the 3.1 development branch are supported" From 972043c146bc17a2f7059108200f6223c4208b76 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 9 Apr 2012 14:23:08 -0700 Subject: [PATCH 053/173] Fix serious bug in handling constant-valued initializers. In InitSymbol(), we try to be smart and emit a memcpy when there are a number of values to store (e.g. for arrays, structs, etc.) Unfortunately, this wasn't working as desired for bools (i.e. i1 types), since the SizeOf() call that tried to figure out how many bytes to copy would return 0 bytes, due to dividing the number of bits to copy by 8. Fixes issue #234. --- expr.cpp | 18 +++++++++++------- ispc.cpp | 4 +++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/expr.cpp b/expr.cpp index 81f51151..2ad7a495 100644 --- a/expr.cpp +++ b/expr.cpp @@ -643,13 +643,17 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr, return; } - llvm::Value *constPtr = - new llvm::GlobalVariable(*m->module, llvmType, true /* const */, - llvm::GlobalValue::InternalLinkage, - constValue, "const_initializer"); - llvm::Value *size = g->target.SizeOf(llvmType, - ctx->GetCurrentBasicBlock()); - ctx->MemcpyInst(ptr, constPtr, size); + if (Type::IsBasicType(symType)) + ctx->StoreInst(constValue, ptr); + else { + llvm::Value *constPtr = + new llvm::GlobalVariable(*m->module, llvmType, true /* const */, + llvm::GlobalValue::InternalLinkage, + constValue, "const_initializer"); + llvm::Value *size = g->target.SizeOf(llvmType, + ctx->GetCurrentBasicBlock()); + ctx->MemcpyInst(ptr, constPtr, size); + } return; } diff --git a/ispc.cpp b/ispc.cpp index 4e39c0b2..e9357832 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -518,7 +518,9 @@ Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type, const llvm::TargetData *td = GetTargetMachine()->getTargetData(); Assert(td != NULL); - uint64_t byteSize = td->getTypeSizeInBits(type) / 8; + uint64_t bitSize = td->getTypeSizeInBits(type); + Assert((bitSize % 8) == 0); + uint64_t byteSize = bitSize / 8; if (is32Bit || g->opt.force32BitAddressing) return LLVMInt32((int32_t)byteSize); else From 7aaeb27e0f97a10ae7a18db34a609d4af645f719 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 9 Apr 2012 14:23:17 -0700 Subject: [PATCH 054/173] Remove duplicate test. --- tests/short-circuit-13.ispc | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 tests/short-circuit-13.ispc diff --git a/tests/short-circuit-13.ispc b/tests/short-circuit-13.ispc deleted file mode 100644 index fb0a94a2..00000000 --- a/tests/short-circuit-13.ispc +++ /dev/null @@ -1,25 +0,0 @@ - -export uniform int width() { return programCount; } - -uniform int * uniform ptr; - -float foo(uniform float a[]) { - int index = (programIndex & 1) * 10000; - if (a[programIndex] < 10000 && a[index] == 1) - return 1; - else - return 1234; -} - -export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - float a = aFOO[programIndex]; - float a0 = aFOO[0], a1 = aFOO[1]; - if ((programIndex & 1) == 0) - RET[programIndex] = foo(aFOO); - else - RET[programIndex] = 2; -} - -export void result(uniform float RET[]) { - RET[programIndex] = (programIndex & 1) ? 2 : 1; -} From 8a1a214ca9a1b9c353831d7a806609a5f8594e59 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 9 Apr 2012 14:36:39 -0700 Subject: [PATCH 055/173] Provide required alignment when generating debug info for pointer types. --- type.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/type.cpp b/type.cpp index 8fffb682..31fd63a9 100644 --- a/type.cpp +++ b/type.cpp @@ -1157,13 +1157,15 @@ PointerType::GetDIType(llvm::DIDescriptor scope) const { llvm::DIType diTargetType = baseType->GetDIType(scope); int bitsSize = g->target.is32Bit ? 32 : 64; + int ptrAlignBits = bitsSize; switch (variability.type) { case Variability::Uniform: - return m->diBuilder->createPointerType(diTargetType, bitsSize); + return m->diBuilder->createPointerType(diTargetType, bitsSize, + ptrAlignBits); case Variability::Varying: { // emit them as an array of pointers llvm::DIType eltType = m->diBuilder->createPointerType(diTargetType, - bitsSize); + bitsSize, ptrAlignBits); return lCreateDIArray(eltType, g->target.vectorWidth); } case Variability::SOA: { @@ -1963,6 +1965,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const { llvm::DIType eltType = GetElementType(i)->GetDIType(scope); uint64_t eltAlign = eltType.getAlignInBits(); uint64_t eltSize = eltType.getSizeInBits(); + Assert(eltAlign != 0); // The alignment for the entire structure is the maximum of the // required alignments of its elements From 08696653caa560a57642abad0faf0b5c5d8442dc Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 10 Apr 2012 19:27:31 -0700 Subject: [PATCH 056/173] Don't include struct member types in mangled string. Not only was this quite verbose, it was unnecessary since we do type equality by name. This also needed to be fixed before we could handle structs declared like "struct Foo;", when we then e.g. have other structs with Foo * members. --- type.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/type.cpp b/type.cpp index 31fd63a9..3795a3dc 100644 --- a/type.cpp +++ b/type.cpp @@ -1912,10 +1912,7 @@ StructType::Mangle() const { ret += "_c_"; ret += variability.MangleString(); - ret += name + std::string("]<"); - for (unsigned int i = 0; i < elementTypes.size(); ++i) - ret += GetElementType(i)->Mangle(); - ret += ">"; + ret += name + std::string("]"); return ret; } From acfbe77ffc781e03af1cb217495e6b443484ad17 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 10 Apr 2012 19:27:37 -0700 Subject: [PATCH 057/173] Fix typo. --- ctx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctx.h b/ctx.h index 0b1ccffa..8cb24a06 100644 --- a/ctx.h +++ b/ctx.h @@ -632,7 +632,7 @@ private: std::vector controlFlowInfo; /** DIFile object corresponding to the source file where the current - function was defined (used for debugging info0. */ + function was defined (used for debugging info). */ llvm::DIFile diFile; /** DISubprogram corresponding to this function (used for debugging From 2aa61007c64e79e1e0e8cf1b19994f83bbacf3cc Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 10 Apr 2012 19:37:03 -0700 Subject: [PATCH 058/173] Remove memory_barrier() calls from atomics. This was unnecessary overhead to impose on all callers; the user should handle these as needed on their own. Also added some explanatory text to the documentation that highlights that memory_barrier() is only needed across HW threads/cores, not across program instances in a gang. --- docs/ispc.rst | 5 +++++ stdlib.ispc | 29 ++--------------------------- 2 files changed, 7 insertions(+), 27 deletions(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 6fb60816..ffc39490 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3880,6 +3880,11 @@ code. void memory_barrier(); +Note that this barrier is *not* needed for coordinating reads and writes +among the program instances in a gang; it's only needed for coordinating +between multiple hardware threads running on different cores. See the +section `Data Races Within a Gang`_ for the guarantees provided about +memory read/write ordering across a gang. Prefetches ---------- diff --git a/stdlib.ispc b/stdlib.ispc index 14d6f8cf..cebed66f 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -1,6 +1,6 @@ // -*- mode: c++ -*- /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -1588,22 +1588,17 @@ static inline void memory_barrier() { #define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \ static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \ - memory_barrier(); \ TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \ - memory_barrier(); \ return ret; \ } \ static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \ uniform TA value) { \ - memory_barrier(); \ uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \ - memory_barrier(); \ return ret; \ } \ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \ uniform TA * uniform ptrArray[programCount]; \ ptrArray[programIndex] = ptr; \ - memory_barrier(); \ TA ret; \ __foreach_active (i) { \ uniform TA * uniform p = ptrArray[i]; \ @@ -1611,13 +1606,11 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \ uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \ ret = insert(ret, i, r); \ } \ - memory_barrier(); \ return ret; \ } \ #define DEFINE_ATOMIC_SWAP(TA,TB) \ static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \ - memory_barrier(); \ uniform int i = 0; \ TA ret[programCount]; \ TA memVal; \ @@ -1647,20 +1640,16 @@ static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \ /* And the last instance that wanted to swap gets the value we \ originally got back from memory... */ \ ret[lastSwap] = memVal; \ - memory_barrier(); \ return ret[programIndex]; \ } \ static inline uniform TA atomic_swap_global(uniform TA * uniform ptr, \ uniform TA value) { \ - memory_barrier(); \ uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value); \ - memory_barrier(); \ return ret; \ } \ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \ uniform TA * uniform ptrArray[programCount]; \ ptrArray[programIndex] = ptr; \ - memory_barrier(); \ TA ret; \ __foreach_active (i) { \ uniform TA * uniform p = ptrArray[i]; \ @@ -1668,7 +1657,6 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \ uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \ ret = insert(ret, i, r); \ } \ - memory_barrier(); \ return ret; \ } \ @@ -1676,25 +1664,19 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \ static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \ uniform TA oneval = reduce_##OPA(value); \ TA ret; \ - if (lanemask() != 0) { \ - memory_barrier(); \ + if (lanemask() != 0) \ ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval); \ - memory_barrier(); \ - } \ return ret; \ } \ static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \ uniform TA value) { \ - memory_barrier(); \ uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \ - memory_barrier(); \ return ret; \ } \ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \ TA value) { \ uniform TA * uniform ptrArray[programCount]; \ ptrArray[programIndex] = ptr; \ - memory_barrier(); \ TA ret; \ __foreach_active (i) { \ uniform TA * uniform p = ptrArray[i]; \ @@ -1702,7 +1684,6 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \ uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \ ret = insert(ret, i, r); \ } \ - memory_barrier(); \ return ret; \ } @@ -1757,25 +1738,20 @@ DEFINE_ATOMIC_SWAP(double,double) #define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \ static inline uniform TA atomic_compare_exchange_global( \ uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \ - memory_barrier(); \ uniform TA ret = \ __atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \ - memory_barrier(); \ return ret; \ } \ static inline TA atomic_compare_exchange_global( \ uniform TA * uniform ptr, TA oldval, TA newval) { \ - memory_barrier(); \ TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval, \ (MASKTYPE)__mask); \ - memory_barrier(); \ return ret; \ } \ static inline TA atomic_compare_exchange_global( \ uniform TA * varying ptr, TA oldval, TA newval) { \ uniform TA * uniform ptrArray[programCount]; \ ptrArray[programIndex] = ptr; \ - memory_barrier(); \ TA ret; \ __foreach_active (i) { \ uniform TA r = \ @@ -1784,7 +1760,6 @@ static inline TA atomic_compare_exchange_global( \ extract(newval, i)); \ ret = insert(ret, i, r); \ } \ - memory_barrier(); \ return ret; \ } From 70a5348f430a08b5131c7ce45ab2d065c9953da8 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 11 Apr 2012 05:32:53 -0700 Subject: [PATCH 059/173] Add size_t, ptrdiff_t, and [u]intptr_t types. --- docs/ispc.rst | 13 +++++++++++-- module.cpp | 20 ++++++++++++++++++++ tests/intptr.ispc | 19 +++++++++++++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 tests/intptr.ispc diff --git a/docs/ispc.rst b/docs/ispc.rst index ffc39490..7668e5e9 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -1394,8 +1394,8 @@ Types Basic Types and Type Qualifiers ------------------------------- -``ispc`` is a statically-typed language. It supports a variety of basic -types. +``ispc`` is a statically-typed language. It supports a variety of core +basic types: * ``void``: "empty" type representing no value. * ``bool``: boolean value; may be assigned ``true``, ``false``, or the @@ -1412,6 +1412,15 @@ types. * ``unsigned int64``: 64-bit unsigned integer. * ``double``: 64-bit double-precision floating point value. +There are also a few built-in types related to pointers and memory: + +* ``size_t``: the maximum size of any object (structure or array) +* ``ptrdiff_t``: an integer type large enough to represent the difference + between two pointers +* ``intptr_t``: signed integer type that is large enough to represent + a pointer value +* ``uintptr_t``: unsigned integer type large enough to represent a pointer + Implicit type conversion between values of different types is done automatically by the ``ispc`` compiler. Thus, a value of ``float`` type can be assigned to a variable of ``int`` type directly. In binary diff --git a/module.cpp b/module.cpp index 58782d0f..914a9bd2 100644 --- a/module.cpp +++ b/module.cpp @@ -88,6 +88,24 @@ #include #include +static void +lDeclareSizeAndPtrIntTypes(SymbolTable *symbolTable) { + const Type *ptrIntType = (g->target.is32Bit) ? AtomicType::VaryingInt32 : + AtomicType::VaryingInt64; + ptrIntType = ptrIntType->GetAsUnboundVariabilityType(); + + symbolTable->AddType("intptr_t", ptrIntType, SourcePos()); + symbolTable->AddType("uintptr_t", ptrIntType->GetAsUnsignedType(), + SourcePos()); + symbolTable->AddType("ptrdiff_t", ptrIntType, SourcePos()); + + const Type *sizeType = (g->target.is32Bit || g->opt.force32BitAddressing) ? + AtomicType::VaryingInt32 : AtomicType::VaryingInt64; + sizeType = sizeType->GetAsUnboundVariabilityType(); + symbolTable->AddType("size_t", sizeType, SourcePos()); +} + + /////////////////////////////////////////////////////////////////////////// // Module @@ -103,6 +121,8 @@ Module::Module(const char *fn) { symbolTable = new SymbolTable; ast = new AST; + lDeclareSizeAndPtrIntTypes(symbolTable); + module = new llvm::Module(filename ? filename : "", *g->ctx); module->setTargetTriple(g->target.GetTripleString()); diff --git a/tests/intptr.ispc b/tests/intptr.ispc new file mode 100644 index 00000000..7eb9eef7 --- /dev/null +++ b/tests/intptr.ispc @@ -0,0 +1,19 @@ + +export uniform int width() { return programCount; } + + +export void f_v(uniform float RET[]) { + RET[programIndex] = sizeof(uniform intptr_t); +} + +export void result(uniform float RET[]) { + RET[programIndex] = +#if (ISPC_POINTER_SIZE==32) + 4 +#elif (ISPC_POINTER_SIZE==64) + 8 +#else +#error Unknown pointer size +#endif + ; +} From 66765dc123698d96b055239bbaa345688b5a145b Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 11 Apr 2012 06:11:52 -0700 Subject: [PATCH 060/173] Fix printing of function overload candidates in error message. --- expr.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/expr.cpp b/expr.cpp index 2ad7a495..dab2b416 100644 --- a/expr.cpp +++ b/expr.cpp @@ -7487,8 +7487,12 @@ static void lPrintOverloadCandidates(SourcePos pos, const std::vector &funcs, const std::vector &argTypes, const std::vector *argCouldBeNULL) { - for (unsigned int i = 0; i < funcs.size(); ++i) - Error(funcs[i]->pos, "Candidate function:"); + for (unsigned int i = 0; i < funcs.size(); ++i) { + const FunctionType *ft = + dynamic_cast(funcs[i]->type); + Assert(ft != NULL); + Error(funcs[i]->pos, "Candidate function: %s.", ft->GetString().c_str()); + } std::string passedTypes = "Passed types: ("; for (unsigned int i = 0; i < argTypes.size(); ++i) { From 491fa239bde017413d0e1b762736ba2b9a5a812a Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 11 Apr 2012 06:12:31 -0700 Subject: [PATCH 061/173] Add atomic swap and cmpxchg for void * as well. Issue #232. --- docs/ispc.rst | 22 ++++++++++++- stdlib.ispc | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 7668e5e9..9edd7325 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3797,6 +3797,13 @@ For global atomics, only atomic swap is available for these types: float atomic_swap_global(uniform float * uniform ptr, float value) double atomic_swap_global(uniform double * uniform ptr, double value) +Finally, "swap" (but none of these other atomics) is available for pointer +types: + +:: + + void *atomic_swap_{local,global}(void * * uniform ptr, void * value) + There are also variants of the atomic that take ``uniform`` values for the operand and return a ``uniform`` result. These correspond to a single atomic operation being performed for the entire gang of program instances, @@ -3821,6 +3828,13 @@ rather than one per program instance. uniform int32 atomic_swap_{local,global}(uniform int32 * uniform ptr, uniform int32 newval) +And similarly for pointers: + +:: + + uniform void *atomic_swap_{local,global}(void * * uniform ptr, + void *newval) + Be careful that you use the atomic function that you mean to; consider the following code: @@ -3862,12 +3876,18 @@ the same location in memory!) int32 atomic_xor_{local,global}(uniform int32 * varying ptr, int32 value) int32 atomic_swap_{local,global}(uniform int32 * varying ptr, int32 value) +And: + +:: + + void *atomic_swap_{local,global}(void * * ptr, void *value) + There are also atomic "compare and exchange" functions. Compare and exchange atomically compares the value in "val" to "compare"--if they match, it assigns "newval" to "val". In either case, the old value of "val" is returned. (As with the other atomic operations, there are also ``unsigned`` and 64-bit variants of this function. Furthermore, there are -``float`` and ``double`` variants as well.) +``float``, ``double``, and ``void *`` variants as well.) :: diff --git a/stdlib.ispc b/stdlib.ispc index cebed66f..fd0df7ce 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -1772,6 +1772,49 @@ ATOMIC_DECL_CMPXCHG(double, double, IntMaskType) #undef ATOMIC_DECL_CMPXCHG +// void * variants of swap and compare exchange + +static inline void *atomic_swap_global(void ** uniform ptr, + void * value) { + return (void *)atomic_swap_global((intptr_t * uniform)ptr, + (intptr_t)value); +} + +static inline void * uniform atomic_swap_global(void ** uniform ptr, + void * uniform value) { + return (void * uniform)atomic_swap_global((intptr_t * uniform)ptr, + (uniform intptr_t)value); +} + +static inline void *atomic_swap_global(void ** ptr, void * value) { + return (void *)atomic_swap_global((intptr_t *)ptr, + (intptr_t)value); +} + +static inline void * +atomic_compare_exchange_global(void ** uniform ptr, + void * oldval, void * newval) { + return (void *)atomic_compare_exchange_global((intptr_t * uniform)ptr, + (intptr_t)oldval, + (intptr_t)newval); +} + +static inline void * uniform +atomic_compare_exchange_global(void ** uniform ptr, void * uniform oldval, + void * uniform newval) { + return (void * uniform)atomic_compare_exchange_global((intptr_t * uniform)ptr, + (uniform intptr_t)oldval, + (uniform intptr_t)newval); +} + +static inline void * +atomic_compare_exchange_global(void ** ptr, void * oldval, + void * newval) { + return (void *)atomic_compare_exchange_global((intptr_t *)ptr, + (intptr_t)oldval, + (intptr_t)newval); +} + /////////////////////////////////////////////////////////////////////////// // local atomics @@ -1943,6 +1986,49 @@ LOCAL_CMPXCHG(double) #undef LOCAL_ATOMIC #undef LOCAL_CMPXCHG +// void * variants of swap and compare exchange + +static inline void *atomic_swap_local(void ** uniform ptr, + void * value) { + return (void *)atomic_swap_local((intptr_t * uniform)ptr, + (intptr_t)value); +} + +static inline void * uniform atomic_swap_local(void ** uniform ptr, + void * uniform value) { + return (void * uniform)atomic_swap_local((intptr_t * uniform)ptr, + (uniform intptr_t)value); +} + +static inline void *atomic_swap_local(void ** ptr, void * value) { + return (void *)atomic_swap_local((intptr_t *)ptr, + (intptr_t)value); +} + +static inline void * +atomic_compare_exchange_local(void ** uniform ptr, + void * oldval, void * newval) { + return (void *)atomic_compare_exchange_local((intptr_t * uniform)ptr, + (intptr_t)oldval, + (intptr_t)newval); +} + +static inline void * uniform +atomic_compare_exchange_local(void ** uniform ptr, void * uniform oldval, + void * uniform newval) { + return (void * uniform)atomic_compare_exchange_local((intptr_t * uniform)ptr, + (uniform intptr_t)oldval, + (uniform intptr_t)newval); +} + +static inline void * +atomic_compare_exchange_local(void ** ptr, void * oldval, + void * newval) { + return (void *)atomic_compare_exchange_local((intptr_t *)ptr, + (intptr_t)oldval, + (intptr_t)newval); +} + /////////////////////////////////////////////////////////////////////////// // Transcendentals (float precision) From ca7cc4744eaa14c6221bc57f5548c59616343788 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 12 Apr 2012 06:08:19 -0700 Subject: [PATCH 062/173] Fix bug with taking references of temporaries. Previously, the compiler would crash if e.g. the program passed a temporary value to a function taking a const reference. This change fixes ReferenceExpr::GetValue() to handle this case and allocate temporary storage for the temporary so that the pointer to that storage can be used for the reference value. --- expr.cpp | 29 ++++++++++++++++++++++++++++- tests/ref-as-temporary.ispc | 14 ++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 tests/ref-as-temporary.ispc diff --git a/expr.cpp b/expr.cpp index dab2b416..cbef1622 100644 --- a/expr.cpp +++ b/expr.cpp @@ -6861,7 +6861,34 @@ ReferenceExpr::ReferenceExpr(Expr *e, SourcePos p) llvm::Value * ReferenceExpr::GetValue(FunctionEmitContext *ctx) const { ctx->SetDebugPos(pos); - return expr ? expr->GetLValue(ctx) : NULL; + if (expr == NULL) { + Assert(m->errorCount > 0); + return NULL; + } + + llvm::Value *value = expr->GetLValue(ctx); + if (value != NULL) + return value; + + // value is NULL if the expression is a temporary; in this case, we'll + // allocate storage for it so that we can return the pointer to that... + const Type *type; + LLVM_TYPE_CONST llvm::Type *llvmType; + if ((type = expr->GetType()) == NULL || + (llvmType = type->LLVMType(g->ctx)) == NULL) { + Assert(m->errorCount > 0); + return NULL; + } + + value = expr->GetValue(ctx); + if (value == NULL) { + Assert(m->errorCount > 0); + return NULL; + } + + llvm::Value *ptr = ctx->AllocaInst(llvmType); + ctx->StoreInst(value, ptr); + return ptr; } diff --git a/tests/ref-as-temporary.ispc b/tests/ref-as-temporary.ispc new file mode 100644 index 00000000..1b167da6 --- /dev/null +++ b/tests/ref-as-temporary.ispc @@ -0,0 +1,14 @@ + +export uniform int width() { return programCount; } + + +int func(const int &a) { return a+1; } +int bar() { return 0; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + RET[programIndex] = func(bar()); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1; +} From fd846fbe77243e2fdb6691c1bb1d7542a919f26d Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 12 Apr 2012 10:28:15 -0700 Subject: [PATCH 063/173] Fix bug in __gather_base_offsets_32. In short, we weren't correctly zeroing the compile-time constant portion of the offsets for lanes that aren't executing. (!) Fixes issue #235. --- builtins/util.m4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtins/util.m4 b/builtins/util.m4 index 26cbfafb..501f2e47 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -2880,11 +2880,11 @@ define <$1 x $2> @__gather_base_offsets32_$2(i8 * %ptr, <$1 x i32> %offsets, i32 %newDelta = load <$1 x i32> * %deltaPtr %ret0 = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %newOffsets, - i32 %offset_scale, <$1 x i32> %offset_delta, + i32 %offset_scale, <$1 x i32> %newDelta, <$1 x $2> undef, i32 0) forloop(lane, 1, eval($1-1), `patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr, - <$1 x i32> %newOffsets, i32 %offset_scale, <$1 x i32> %offset_delta, + <$1 x i32> %newOffsets, i32 %offset_scale, <$1 x i32> %newDelta, <$1 x $2> %retPREV, i32 LANE) ', `LANE', lane), `PREV', eval(lane-1))') ret <$1 x $2> %ret`'eval($1-1) From 2a18efef823edb858129e64cfdf6e165ffab71c5 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 12 Apr 2012 11:23:02 -0700 Subject: [PATCH 064/173] Use type conversion machinery when processing expr lists for initializers. Once we're down to something that's not another nested expr list, use TypeConvertExpr() to convert the expression to the type we need. This should allow simplifying a number of the GetConstant() implementations, to remove partial reimplementation of type conversion there. For now, this change finishes off issue #220. --- expr.cpp | 20 ++++++++++++++++++-- tests/func-ptr-initializer.ispc | 28 ++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 tests/func-ptr-initializer.ispc diff --git a/expr.cpp b/expr.cpp index cbef1622..a804d733 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3662,7 +3662,19 @@ ExprList::GetConstant(const Type *type) const { if (exprs[i] == NULL) return NULL; const Type *elementType = collectionType->GetElementType(i); - llvm::Constant *c = exprs[i]->GetConstant(elementType); + + Expr *expr = exprs[i]; + if (dynamic_cast(expr) == NULL) { + // If there's a simple type conversion from the type of this + // expression to the type we need, then let the regular type + // conversion machinery handle it. + expr = TypeConvertExpr(exprs[i], elementType, "initializer list"); + Assert(expr != NULL); + // Re-establish const-ness if possible + expr = ::Optimize(expr); + } + + llvm::Constant *c = expr->GetConstant(elementType); if (c == NULL) // If this list element couldn't convert to the right constant // type for the corresponding collection member, then give up. @@ -7503,8 +7515,12 @@ FunctionSymbolExpr::GetConstant(const Type *type) const { if (ft == NULL) return NULL; - if (Type::Equal(type, matchingFunc->type) == false) + if (Type::Equal(type, matchingFunc->type) == false) { + Error(pos, "Type of function symbol \"%s\" doesn't match expected type " + "\"%s\".", matchingFunc->type->GetString().c_str(), + type->GetString().c_str()); return NULL; + } return matchingFunc->function; } diff --git a/tests/func-ptr-initializer.ispc b/tests/func-ptr-initializer.ispc new file mode 100644 index 00000000..96537391 --- /dev/null +++ b/tests/func-ptr-initializer.ispc @@ -0,0 +1,28 @@ + +export uniform int width() { return programCount; } + + +typedef float (*func)(); + +float foo(); +float bar(); + +struct X { func f, g; }; + +static uniform X x = { foo, &bar }; + +export void f_f(uniform float RET[], uniform float aFOO[]) { + RET[programIndex] = x.f() + x.g(); +} + +export void result(uniform float RET[]) { + RET[programIndex] = programIndex; +} + +float foo() { + return 2 * programIndex; +} + +float bar() { + return -programIndex; +} From d88dbf3612f6241318d38d29ba6b68077ce50d30 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 12 Apr 2012 11:40:28 -0700 Subject: [PATCH 065/173] Fix two bugs with resolving unbound variability. We still need to call ResolveUnboundVariability even if the type returns false from HasUnboundVariability; we may have, for example, a pointer type where the pointer is resolved, but the pointed-to type is unresolved. Fixes issue #228. --- expr.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/expr.cpp b/expr.cpp index a804d733..b6955bc3 100644 --- a/expr.cpp +++ b/expr.cpp @@ -7292,8 +7292,7 @@ SizeOfExpr::SizeOfExpr(Expr *e, SourcePos p) SizeOfExpr::SizeOfExpr(const Type *t, SourcePos p) : Expr(p), expr(NULL), type(t) { - if (type->HasUnboundVariability()) - type = type->ResolveUnboundVariability(Variability::Varying); + type = type->ResolveUnboundVariability(Variability::Varying); } @@ -7970,7 +7969,7 @@ NewExpr::NewExpr(int typeQual, const Type *t, Expr *init, Expr *count, // varying new. isVarying = (typeQual == 0) || (typeQual & TYPEQUAL_VARYING); - if (allocType != NULL && allocType->HasUnboundVariability()) + if (allocType != NULL) allocType = allocType->ResolveUnboundVariability(Variability::Uniform); } From 5ece6fec04a3e39e55fdc6b133270bccb7b3ee3f Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 12 Apr 2012 17:28:30 -0700 Subject: [PATCH 066/173] Substantial rewrite (again) of decl handling. The decl.* code now no longer interacts with Symbols, but just returns names, types, initializer expressions, etc., as needed. This makes the code a bit more understandable. Fixes issues #171 and #130. --- ast.cpp | 4 +- ast.h | 5 +- decl.cpp | 409 ++++++++++++++-------------------- decl.h | 45 ++-- func.cpp | 30 ++- func.h | 4 +- ispc.h | 9 + module.cpp | 172 +++++++------- module.h | 17 +- parse.yy | 64 +++--- sym.cpp | 6 - sym.h | 7 - tests/func-anon-param.ispc | 15 ++ tests/global-decl-define.ispc | 14 ++ tests/ptr-cast-complex.ispc | 18 ++ 15 files changed, 396 insertions(+), 423 deletions(-) create mode 100644 tests/func-anon-param.ispc create mode 100644 tests/global-decl-define.ispc create mode 100644 tests/ptr-cast-complex.ispc diff --git a/ast.cpp b/ast.cpp index 0c71cdc8..752585f1 100644 --- a/ast.cpp +++ b/ast.cpp @@ -55,10 +55,10 @@ ASTNode::~ASTNode() { // AST void -AST::AddFunction(Symbol *sym, const std::vector &args, Stmt *code) { +AST::AddFunction(Symbol *sym, Stmt *code) { if (sym == NULL) return; - functions.push_back(new Function(sym, args, code)); + functions.push_back(new Function(sym, code)); } diff --git a/ast.h b/ast.h index 0f73677b..f03d7343 100644 --- a/ast.h +++ b/ast.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2011, Intel Corporation + Copyright (c) 2011-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -84,8 +84,7 @@ class AST { public: /** Add the AST for a function described by the given declaration information and source code. */ - void AddFunction(Symbol *sym, const std::vector &args, - Stmt *code); + void AddFunction(Symbol *sym, Stmt *code); /** Generate LLVM IR for all of the functions into the current module. */ diff --git a/decl.cpp b/decl.cpp index 5dab985e..942df0ce 100644 --- a/decl.cpp +++ b/decl.cpp @@ -33,7 +33,7 @@ /** @file decl.cpp @brief Implementations of classes related to turning declarations into - symbols and types. + symbol names and types. */ #include "decl.h" @@ -218,50 +218,44 @@ Declarator::Declarator(DeclaratorKind dk, SourcePos p) : pos(p), kind(dk) { child = NULL; typeQualifiers = 0; + storageClass = SC_NONE; arraySize = -1; - sym = NULL; + type = NULL; initExpr = NULL; } void Declarator::InitFromDeclSpecs(DeclSpecs *ds) { - const Type *t = GetType(ds); - if (t == NULL) { + const Type *baseType = ds->GetBaseType(pos); + InitFromType(baseType, ds); + + if (type == NULL) { Assert(m->errorCount > 0); return; } - Symbol *sym = GetSymbol(); - if (sym != NULL) { - sym->type = t; - sym->storageClass = ds->storageClass; + storageClass = ds->storageClass; + + if (ds->declSpecList.size() > 0 && + dynamic_cast(type) == NULL) { + Error(pos, "__declspec specifiers for non-function type \"%s\" are " + "not used.", type->GetString().c_str()); } } -Symbol * -Declarator::GetSymbol() const { - // The symbol lives at the last child in the chain, so walk down there - // and return the one there. - const Declarator *d = this; - while (d->child != NULL) - d = d->child; - return d->sym; -} - - void Declarator::Print(int indent) const { printf("%*cdeclarator: [", indent, ' '); pos.Print(); lPrintTypeQualifiers(typeQualifiers); - Symbol *sym = GetSymbol(); - if (sym != NULL) - printf("%s", sym->name.c_str()); + printf("%s ", lGetStorageClassName(storageClass)); + if (name.size() > 0) + printf("%s", name.c_str()); else - printf("(null symbol)"); + printf("(unnamed)"); printf(", array size = %d", arraySize); @@ -295,55 +289,8 @@ Declarator::Print(int indent) const { } -Symbol * -Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector *funArgs) { - const FunctionType *type = - dynamic_cast(GetType(ds)); - if (type == NULL) - return NULL; - - Symbol *declSym = GetSymbol(); - Assert(declSym != NULL); - - // Get the symbol for the function from the symbol table. (It should - // already have been added to the symbol table by AddGlobal() by the - // time we get here.) - Symbol *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type); - if (funSym == NULL) - // May be NULL due to error earlier in compilation - Assert(m->errorCount > 0); - else - funSym->pos = pos; - - // Walk down to the declarator for the function. (We have to get past - // the stuff that specifies the function's return type before we get to - // the function's declarator.) - Declarator *d = this; - while (d != NULL && d->kind != DK_FUNCTION) - d = d->child; - Assert(d != NULL); - - for (unsigned int i = 0; i < d->functionParams.size(); ++i) { - Symbol *sym = d->GetSymbolForFunctionParameter(i); - if (sym->type == NULL) { - Assert(m->errorCount > 0); - continue; - } - else - sym->type = sym->type->ResolveUnboundVariability(Variability::Varying); - - funArgs->push_back(sym); - } - - if (funSym != NULL) - funSym->type = funSym->type->ResolveUnboundVariability(Variability::Varying); - - return funSym; -} - - -const Type * -Declarator::GetType(const Type *base, DeclSpecs *ds) const { +void +Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { bool hasUniformQual = ((typeQualifiers & TYPEQUAL_UNIFORM) != 0); bool hasVaryingQual = ((typeQualifiers & TYPEQUAL_VARYING) != 0); bool isTask = ((typeQualifiers & TYPEQUAL_TASK) != 0); @@ -352,12 +299,16 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { if (hasUniformQual && hasVaryingQual) { Error(pos, "Can't provide both \"uniform\" and \"varying\" qualifiers."); - return NULL; + return; } - if (kind != DK_FUNCTION && isTask) + if (kind != DK_FUNCTION && isTask) { Error(pos, "\"task\" qualifier illegal in variable declaration."); - if (kind != DK_FUNCTION && isExported) + return; + } + if (kind != DK_FUNCTION && isExported) { Error(pos, "\"export\" qualifier illegal in variable declaration."); + return; + } Variability variability(Variability::Unbound); if (hasUniformQual) @@ -365,66 +316,76 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { else if (hasVaryingQual) variability = Variability::Varying; - const Type *type = base; - switch (kind) { - case DK_BASE: + if (kind == DK_BASE) { // All of the type qualifiers should be in the DeclSpecs for the // base declarator Assert(typeQualifiers == 0); Assert(child == NULL); - return type; - - case DK_POINTER: + type = baseType; + } + else if (kind == DK_POINTER) { /* For now, any pointer to an SOA type gets the slice property; if we add the capability to declare pointers as slices or not, we'll want to set this based on a type qualifier here. */ - type = new PointerType(type, variability, isConst, type->IsSOAType()); - if (child != NULL) - return child->GetType(type, ds); + const Type *ptrType = new PointerType(baseType, variability, isConst, + baseType->IsSOAType()); + if (child != NULL) { + child->InitFromType(ptrType, ds); + type = child->type; + name = child->name; + } else - return type; - break; - - case DK_REFERENCE: - if (hasUniformQual) + type = ptrType; + } + else if (kind == DK_REFERENCE) { + if (hasUniformQual) { Error(pos, "\"uniform\" qualifier is illegal to apply to references."); - if (hasVaryingQual) + return; + } + if (hasVaryingQual) { Error(pos, "\"varying\" qualifier is illegal to apply to references."); - if (isConst) + return; + } + if (isConst) { Error(pos, "\"const\" qualifier is to illegal apply to references."); - + return; + } // The parser should disallow this already, but double check. - if (dynamic_cast(type) != NULL) { + if (dynamic_cast(baseType) != NULL) { Error(pos, "References to references are illegal."); - return NULL; + return; } - type = new ReferenceType(type); - if (child != NULL) - return child->GetType(type, ds); + const Type *refType = new ReferenceType(baseType); + if (child != NULL) { + child->InitFromType(refType, ds); + type = child->type; + name = child->name; + } else - return type; - break; - - case DK_ARRAY: - if (Type::Equal(type, AtomicType::Void)) { + type = refType; + } + else if (kind == DK_ARRAY) { + if (Type::Equal(baseType, AtomicType::Void)) { Error(pos, "Arrays of \"void\" type are illegal."); - return NULL; + return; } - if (dynamic_cast(type)) { + if (dynamic_cast(baseType)) { Error(pos, "Arrays of references (type \"%s\") are illegal.", - type->GetString().c_str()); - return NULL; + baseType->GetString().c_str()); + return; } - type = new ArrayType(type, arraySize); - if (child) - return child->GetType(type, ds); + const Type *arrayType = new ArrayType(baseType, arraySize); + if (child != NULL) { + child->InitFromType(arrayType, ds); + type = child->type; + name = child->name; + } else - return type; - break; - - case DK_FUNCTION: { + type = arrayType; + } + else if (kind == DK_FUNCTION) { std::vector args; std::vector argNames; std::vector argDefaults; @@ -436,20 +397,40 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { for (unsigned int i = 0; i < functionParams.size(); ++i) { Declaration *d = functionParams[i]; - Symbol *sym = GetSymbolForFunctionParameter(i); - - if (d->declSpecs->storageClass != SC_NONE) - Error(sym->pos, "Storage class \"%s\" is illegal in " - "function parameter declaration for parameter \"%s\".", - lGetStorageClassName(d->declSpecs->storageClass), - sym->name.c_str()); - if (Type::Equal(sym->type, AtomicType::Void)) { - Error(sym->pos, "Parameter with type \"void\" illegal in function " - "parameter list."); - sym->type = NULL; + if (d == NULL) { + Assert(m->errorCount > 0); + continue; + } + if (d->declarators.size() == 0) { + // function declaration like foo(float), w/o a name for the + // parameter; wire up a placeholder Declarator for it + d->declarators.push_back(new Declarator(DK_BASE, pos)); + d->declarators[0]->InitFromDeclSpecs(d->declSpecs); } - const ArrayType *at = dynamic_cast(sym->type); + Assert(d->declarators.size() == 1); + Declarator *decl = d->declarators[0]; + + if (decl->name == "") { + // Give a name to any anonymous parameter declarations + char buf[32]; + sprintf(buf, "__anon_parameter_%d", i); + decl->name = buf; + } + decl->type = decl->type->ResolveUnboundVariability(Variability::Varying); + + if (d->declSpecs->storageClass != SC_NONE) + Error(decl->pos, "Storage class \"%s\" is illegal in " + "function parameter declaration for parameter \"%s\".", + lGetStorageClassName(d->declSpecs->storageClass), + decl->name.c_str()); + if (Type::Equal(decl->type, AtomicType::Void)) { + Error(decl->pos, "Parameter with type \"void\" illegal in function " + "parameter list."); + decl->type = NULL; + } + + const ArrayType *at = dynamic_cast(decl->type); if (at != NULL) { // As in C, arrays are passed to functions as pointers to // their element type. We'll just immediately make this @@ -459,69 +440,66 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { // report this differently than it was originally declared // in the function, but it's not clear that this is a // significant problem.) - if (at->GetElementType() == NULL) { + const Type *targetType = at->GetElementType(); + if (targetType == NULL) { Assert(m->errorCount > 0); - return NULL; + return; } - const Type *targetType = at->GetElementType(); - targetType = - targetType->ResolveUnboundVariability(Variability::Varying); - sym->type = PointerType::GetUniform(targetType); + decl->type = PointerType::GetUniform(targetType); // Make sure there are no unsized arrays (other than the // first dimension) in function parameter lists. - at = dynamic_cast(at->GetElementType()); + at = dynamic_cast(targetType); while (at != NULL) { if (at->GetElementCount() == 0) - Error(sym->pos, "Arrays with unsized dimensions in " + Error(decl->pos, "Arrays with unsized dimensions in " "dimensions after the first one are illegal in " "function parameter lists."); at = dynamic_cast(at->GetElementType()); } } - args.push_back(sym->type); - argNames.push_back(sym->name); - argPos.push_back(sym->pos); + args.push_back(decl->type); + argNames.push_back(decl->name); + argPos.push_back(decl->pos); Expr *init = NULL; - if (d->declarators.size()) { - // Try to find an initializer expression. - Declarator *decl = d->declarators[0]; - while (decl != NULL) { + // Try to find an initializer expression. + while (decl != NULL) { + if (decl->initExpr != NULL) { + decl->initExpr = TypeCheck(decl->initExpr); + decl->initExpr = Optimize(decl->initExpr); if (decl->initExpr != NULL) { - decl->initExpr = TypeCheck(decl->initExpr); - decl->initExpr = Optimize(decl->initExpr); - if (decl->initExpr != NULL) { - init = dynamic_cast(decl->initExpr); - if (init == NULL) - init = dynamic_cast(decl->initExpr); - if (init == NULL) - Error(decl->initExpr->pos, "Default value for parameter " - "\"%s\" must be a compile-time constant.", - sym->name.c_str()); - } - break; + init = dynamic_cast(decl->initExpr); + if (init == NULL) + init = dynamic_cast(decl->initExpr); + if (init == NULL) + Error(decl->initExpr->pos, "Default value for parameter " + "\"%s\" must be a compile-time constant.", + decl->name.c_str()); } - else - decl = decl->child; + break; } + else + decl = decl->child; } argDefaults.push_back(init); } - const Type *returnType = type; + const Type *returnType = baseType; if (returnType == NULL) { Error(pos, "No return type provided in function declaration."); - return NULL; + return; } if (dynamic_cast(returnType) != NULL) { Error(pos, "Illegal to return function type from function."); - return NULL; + return; } + returnType = returnType->ResolveUnboundVariability(Variability::Varying); + bool isExternC = ds && (ds->storageClass == SC_EXTERN_C); bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0); bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0); @@ -529,28 +507,27 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { if (isExported && isTask) { Error(pos, "Function can't have both \"task\" and \"export\" " "qualifiers"); - return NULL; + return; } if (isExternC && isTask) { Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" " "qualifiers"); - return NULL; + return; } if (isExternC && isExported) { Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" " "qualifiers"); - return NULL; + return; } if (child == NULL) { Assert(m->errorCount > 0); - return NULL; + return; } const FunctionType *functionType = new FunctionType(returnType, args, argNames, argDefaults, argPos, isTask, isExported, isExternC); - functionType = functionType->ResolveUnboundVariability(Variability::Varying); // handle any explicit __declspecs on the function if (ds != NULL) { @@ -572,60 +549,12 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { } } - return child->GetType(functionType, ds); - } - default: - FATAL("Unexpected decl kind"); - return NULL; + child->InitFromType(functionType, ds); + type = child->type; + name = child->name; } } - -const Type * -Declarator::GetType(DeclSpecs *ds) const { - const Type *baseType = ds->GetBaseType(pos); - const Type *type = GetType(baseType, ds); - - if (ds->declSpecList.size() > 0 && - type != NULL && - dynamic_cast(type) == NULL) { - Error(pos, "__declspec specifiers for non-function type \"%s\" are " - "not used.", type->GetString().c_str()); - } - - return type; -} - - -Symbol * -Declarator::GetSymbolForFunctionParameter(int paramNum) const { - Assert(paramNum < (int)functionParams.size()); - Declaration *d = functionParams[paramNum]; - - char buf[32]; - Symbol *sym; - if (d->declarators.size() == 0) { - // function declaration like foo(float), w/o a name for - // the parameter - sprintf(buf, "__anon_parameter_%d", paramNum); - sym = new Symbol(buf, pos); - sym->type = d->declSpecs->GetBaseType(pos); - } - else { - Assert(d->declarators.size() == 1); - sym = d->declarators[0]->GetSymbol(); - if (sym == NULL) { - // Handle more complex anonymous declarations like - // float (float **). - sprintf(buf, "__anon_parameter_%d", paramNum); - sym = new Symbol(buf, d->declarators[0]->pos); - sym->type = d->declarators[0]->GetType(d->declSpecs); - } - } - return sym; -} - - /////////////////////////////////////////////////////////////////////////// // Declaration @@ -655,27 +584,23 @@ Declaration::GetVariableDeclarations() const { for (unsigned int i = 0; i < declarators.size(); ++i) { Declarator *decl = declarators[i]; - if (decl == NULL) { + if (decl == NULL || decl->type == NULL) { // Ignore earlier errors Assert(m->errorCount > 0); continue; } - Symbol *sym = decl->GetSymbol(); - if (sym == NULL || sym->type == NULL) { - // Ignore errors - Assert(m->errorCount > 0); - continue; - } - sym->type = sym->type->ResolveUnboundVariability(Variability::Varying); - - if (Type::Equal(sym->type, AtomicType::Void)) - Error(sym->pos, "\"void\" type variable illegal in declaration."); - else if (dynamic_cast(sym->type) == NULL) { + if (Type::Equal(decl->type, AtomicType::Void)) + Error(decl->pos, "\"void\" type variable illegal in declaration."); + else if (dynamic_cast(decl->type) == NULL) { + decl->type = decl->type->ResolveUnboundVariability(Variability::Varying); + Symbol *sym = new Symbol(decl->name, decl->pos, decl->type, + decl->storageClass); m->symbolTable->AddVariable(sym); vars.push_back(VariableDeclaration(sym, decl->initExpr)); } } + return vars; } @@ -686,25 +611,20 @@ Declaration::DeclareFunctions() { for (unsigned int i = 0; i < declarators.size(); ++i) { Declarator *decl = declarators[i]; - if (decl == NULL) { + if (decl == NULL || decl->type == NULL) { // Ignore earlier errors Assert(m->errorCount > 0); continue; } - Symbol *sym = decl->GetSymbol(); - if (sym == NULL || sym->type == NULL) { - // Ignore errors - Assert(m->errorCount > 0); - continue; - } - sym->type = sym->type->ResolveUnboundVariability(Variability::Varying); - - if (dynamic_cast(sym->type) == NULL) + const FunctionType *ftype = + dynamic_cast(decl->type); + if (ftype == NULL) continue; bool isInline = (declSpecs->typeQualifiers & TYPEQUAL_INLINE); - m->AddFunctionDeclaration(sym, isInline); + m->AddFunctionDeclaration(decl->name, ftype, decl->storageClass, + isInline, decl->pos); } } @@ -718,6 +638,7 @@ Declaration::Print(int indent) const { declarators[i]->Print(indent+4); } + /////////////////////////////////////////////////////////////////////////// void @@ -748,21 +669,19 @@ GetStructTypesNamesPositions(const std::vector &sd, Declarator *d = (*sd[i]->declarators)[j]; d->InitFromDeclSpecs(&ds); - Symbol *sym = d->GetSymbol(); - - if (Type::Equal(sym->type, AtomicType::Void)) + if (Type::Equal(d->type, AtomicType::Void)) Error(d->pos, "\"void\" type illegal for struct member."); - elementTypes->push_back(sym->type); + elementTypes->push_back(d->type); - if (seenNames.find(sym->name) != seenNames.end()) + if (seenNames.find(d->name) != seenNames.end()) Error(d->pos, "Struct member \"%s\" has same name as a " - "previously-declared member.", sym->name.c_str()); + "previously-declared member.", d->name.c_str()); else - seenNames.insert(sym->name); + seenNames.insert(d->name); - elementNames->push_back(sym->name); - elementPositions->push_back(sym->pos); + elementNames->push_back(d->name); + elementPositions->push_back(d->pos); } } diff --git a/decl.h b/decl.h index ff96e149..ea2cb0fd 100644 --- a/decl.h +++ b/decl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -47,8 +47,8 @@ variables--here, that the declaration has the 'static' and 'uniform' qualifiers, and that it's basic type is 'int'. Then for each variable declaration, the Declaraiton class holds an instance of a Declarator, - which in turn records the per-variable information like the symbol - name, array size (if any), initializer expression, etc. + which in turn records the per-variable information like the name, array + size (if any), initializer expression, etc. */ #ifndef ISPC_DECL_H @@ -61,15 +61,6 @@ struct VariableDeclaration; class Declaration; class Declarator; -enum StorageClass { - SC_NONE, - SC_EXTERN, - SC_STATIC, - SC_TYPEDEF, - SC_EXTERN_C -}; - - /* Multiple qualifiers can be provided with types in declarations; therefore, they are set up so that they can be ANDed together into an int. */ @@ -141,25 +132,11 @@ public: Declarator(DeclaratorKind dk, SourcePos p); /** Once a DeclSpecs instance is available, this method completes the - initialization of the Symbol, setting its Type accordingly. + initialization of the type member. */ void InitFromDeclSpecs(DeclSpecs *ds); - /** Get the actual type of the combination of Declarator and the given - DeclSpecs. If an explicit base type is provided, the declarator is - applied to that type; otherwise the base type from the DeclSpecs is - used. */ - const Type *GetType(DeclSpecs *ds) const; - const Type *GetType(const Type *base, DeclSpecs *ds) const; - - /** Returns the symbol corresponding to the function declared by this - declarator and symbols for its arguments in *args. */ - Symbol *GetFunctionInfo(DeclSpecs *ds, std::vector *args); - - Symbol *GetSymbolForFunctionParameter(int paramNum) const; - - /** Returns the symbol associated with the declarator. */ - Symbol *GetSymbol() const; + void InitFromType(const Type *base, DeclSpecs *ds); void Print(int indent) const; @@ -180,18 +157,24 @@ public: /** Type qualifiers provided with the declarator. */ int typeQualifiers; + StorageClass storageClass; + /** For array declarators, this gives the declared size of the array. Unsized arrays have arraySize == 0. */ int arraySize; - /** Symbol associated with the declarator. */ - Symbol *sym; + /** Name associated with the declarator. */ + std::string name; /** Initialization expression for the variable. May be NULL. */ Expr *initExpr; + /** Type of the declarator. This is NULL until InitFromDeclSpecs() or + InitFromType() is called. */ + const Type *type; + /** For function declarations, this holds the Declaration *s for the - funciton's parameters. */ + function's parameters. */ std::vector functionParams; }; diff --git a/func.cpp b/func.cpp index c1ca7ee6..d6e24d72 100644 --- a/func.cpp +++ b/func.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2011, Intel Corporation + Copyright (c) 2011-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -66,9 +66,8 @@ #include #include -Function::Function(Symbol *s, const std::vector &a, Stmt *c) { +Function::Function(Symbol *s, Stmt *c) { sym = s; - args = a; code = c; maskSymbol = m->symbolTable->LookupVariable("__mask"); @@ -104,9 +103,17 @@ Function::Function(Symbol *s, const std::vector &a, Stmt *c) { const FunctionType *type = dynamic_cast(sym->type); Assert(type != NULL); - for (unsigned int i = 0; i < args.size(); ++i) - if (dynamic_cast(args[i]->type) == NULL) - args[i]->parentFunction = this; + for (int i = 0; i < type->GetNumParameters(); ++i) { + const char *paramName = type->GetParameterName(i).c_str(); + Symbol *sym = m->symbolTable->LookupVariable(paramName); + if (sym == NULL) + Assert(strncmp(paramName, "__anon_parameter_", 17) == 0); + args.push_back(sym); + + const Type *t = type->GetParameterType(i); + if (sym != NULL && dynamic_cast(t) == NULL) + sym->parentFunction = this; + } if (type->isTask) { threadIndexSym = m->symbolTable->LookupVariable("threadIndex"); @@ -145,7 +152,8 @@ Function::GetType() const { 'mem2reg' pass will in turn promote to SSA registers.. */ static void -lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector &args, +lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const + std::vector &args, FunctionEmitContext *ctx) { // We expect the argument structure to come in as a poitner to a // structure. Confirm and figure out its type here. @@ -160,6 +168,10 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vectorgetElementType(i); Symbol *sym = args[i]; + if (sym == NULL) + // anonymous parameter, so don't worry about it + return; + // allocate space to copy the parameter in to sym->storagePtr = ctx->AllocaInst(argType, sym->name.c_str()); @@ -240,6 +252,10 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, llvm::Function::arg_iterator argIter = function->arg_begin(); for (unsigned int i = 0; i < args.size(); ++i, ++argIter) { Symbol *sym = args[i]; + if (sym == NULL) + // anonymous function parameter + continue; + argIter->setName(sym->name.c_str()); // Allocate stack storage for the parameter and emit code diff --git a/func.h b/func.h index d0bf0731..6d0527fc 100644 --- a/func.h +++ b/func.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2011, Intel Corporation + Copyright (c) 2011-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ class Function { public: - Function(Symbol *sym, const std::vector &args, Stmt *code); + Function(Symbol *sym, Stmt *code); const Type *GetReturnType() const; const FunctionType *GetType() const; diff --git a/ispc.h b/ispc.h index 9e25baa0..b33cde02 100644 --- a/ispc.h +++ b/ispc.h @@ -116,6 +116,15 @@ class SymbolTable; class Type; struct VariableDeclaration; +enum StorageClass { + SC_NONE, + SC_EXTERN, + SC_STATIC, + SC_TYPEDEF, + SC_EXTERN_C +}; + + /** @brief Representation of a range of positions in a source file. This class represents a range of characters in a source file diff --git a/module.cpp b/module.cpp index 914a9bd2..d2d3afdd 100644 --- a/module.cpp +++ b/module.cpp @@ -231,64 +231,65 @@ Module::CompileFile() { void -Module::AddTypeDef(Symbol *sym) { +Module::AddTypeDef(const std::string &name, const Type *type, + SourcePos pos) { // Typedefs are easy; just add the mapping between the given name and // the given type. - symbolTable->AddType(sym->name.c_str(), sym->type, sym->pos); + symbolTable->AddType(name.c_str(), type, pos); } void -Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) { +Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initExpr, + bool isConst, StorageClass storageClass, SourcePos pos) { // These may be NULL due to errors in parsing; just gracefully return // here if so. - if (sym == NULL || sym->type == NULL) { - // But if these are NULL and there haven't been any previous - // errors, something surprising is going on + if (name == "" || type == NULL) { Assert(errorCount > 0); return; } - if (symbolTable->LookupFunction(sym->name.c_str())) { - Error(sym->pos, "Global variable \"%s\" shadows previously-declared " - "function.", sym->name.c_str()); + if (symbolTable->LookupFunction(name.c_str())) { + Error(pos, "Global variable \"%s\" shadows previously-declared " + "function.", name.c_str()); return; } - if (sym->storageClass == SC_EXTERN_C) { - Error(sym->pos, "extern \"C\" qualifier can only be used for " + if (storageClass == SC_EXTERN_C) { + Error(pos, "extern \"C\" qualifier can only be used for " "functions."); return; } - if (Type::Equal(sym->type, AtomicType::Void)) { - Error(sym->pos, "\"void\" type global variable is illegal."); + if (Type::Equal(type, AtomicType::Void)) { + Error(pos, "\"void\" type global variable is illegal."); return; } - sym->type = ArrayType::SizeUnsizedArrays(sym->type, initExpr); - if (sym->type == NULL) + type = ArrayType::SizeUnsizedArrays(type, initExpr); + if (type == NULL) return; - const ArrayType *at = dynamic_cast(sym->type); + const ArrayType *at = dynamic_cast(type); if (at != NULL && at->TotalElementCount() == 0) { - Error(sym->pos, "Illegal to declare a global variable with unsized " + Error(pos, "Illegal to declare a global variable with unsized " "array dimensions that aren't set with an initializer " "expression."); return; } - LLVM_TYPE_CONST llvm::Type *llvmType = sym->type->LLVMType(g->ctx); + LLVM_TYPE_CONST llvm::Type *llvmType = type->LLVMType(g->ctx); if (llvmType == NULL) return; // See if we have an initializer expression for the global. If so, // make sure it's a compile-time constant! llvm::Constant *llvmInitializer = NULL; - if (sym->storageClass == SC_EXTERN || sym->storageClass == SC_EXTERN_C) { + ConstExpr *constValue = NULL; + if (storageClass == SC_EXTERN || storageClass == SC_EXTERN_C) { if (initExpr != NULL) - Error(sym->pos, "Initializer can't be provided with \"extern\" " - "global variable \"%s\".", sym->name.c_str()); + Error(pos, "Initializer can't be provided with \"extern\" " + "global variable \"%s\".", name.c_str()); } else { if (initExpr != NULL) { @@ -299,27 +300,26 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) { // ExprList; they don't have types per se / can't type // convert themselves anyway.) if (dynamic_cast(initExpr) == NULL) - initExpr = TypeConvertExpr(initExpr, sym->type, "initializer"); + initExpr = TypeConvertExpr(initExpr, type, "initializer"); if (initExpr != NULL) { initExpr = Optimize(initExpr); // Fingers crossed, now let's see if we've got a // constant value.. - llvmInitializer = initExpr->GetConstant(sym->type); + llvmInitializer = initExpr->GetConstant(type); if (llvmInitializer != NULL) { - if (sym->type->IsConstType()) + if (type->IsConstType()) // Try to get a ConstExpr associated with // the symbol. This dynamic_cast can // validly fail, for example for types like // StructTypes where a ConstExpr can't // represent their values. - sym->constValue = - dynamic_cast(initExpr); + constValue = dynamic_cast(initExpr); } else Error(initExpr->pos, "Initializer for global variable \"%s\" " - "must be a constant.", sym->name.c_str()); + "must be a constant.", name.c_str()); } } } @@ -330,30 +330,33 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) { llvmInitializer = llvm::Constant::getNullValue(llvmType); } - Symbol *stSym = symbolTable->LookupVariable(sym->name.c_str()); + Symbol *sym = symbolTable->LookupVariable(name.c_str()); llvm::GlobalVariable *oldGV = NULL; - if (stSym != NULL) { + if (sym != NULL) { // We've already seen either a declaration or a definition of this // global. // If the type doesn't match with the previous one, issue an error. - if (!Type::Equal(sym->type, stSym->type)) { - Error(sym->pos, "Definition of variable \"%s\" conflicts with " - "definition at %s:%d.", sym->name.c_str(), - stSym->pos.name, stSym->pos.first_line); + if (!Type::Equal(sym->type, type) || + (sym->storageClass != SC_EXTERN && + sym->storageClass != SC_EXTERN_C && + sym->storageClass != storageClass)) { + Error(pos, "Definition of variable \"%s\" conflicts with " + "definition at %s:%d.", name.c_str(), + sym->pos.name, sym->pos.first_line); return; } llvm::GlobalVariable *gv = - llvm::dyn_cast(stSym->storagePtr); + llvm::dyn_cast(sym->storagePtr); Assert(gv != NULL); // And issue an error if this is a redefinition of a variable if (gv->hasInitializer() && sym->storageClass != SC_EXTERN && sym->storageClass != SC_EXTERN_C) { - Error(sym->pos, "Redefinition of variable \"%s\" is illegal. " + Error(pos, "Redefinition of variable \"%s\" is illegal. " "(Previous definition at %s:%d.)", sym->name.c_str(), - stSym->pos.name, stSym->pos.first_line); + sym->pos.name, sym->pos.first_line); return; } @@ -361,17 +364,12 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) { // of a previously-declared global. First, save the pointer to the // previous llvm::GlobalVariable oldGV = gv; - - // Now copy over all of the members of the current Symbol to the - // symbol in the symbol table. - *stSym = *sym; - // And copy the pointer of the one in the symbol table to sym, so - // that the operations below update storagePtr for the Symbol - // already in the symbol table. - sym = stSym; } - else + else { + sym = new Symbol(name, pos, type, storageClass); symbolTable->AddVariable(sym); + } + sym->constValue = constValue; llvm::GlobalValue::LinkageTypes linkage = (sym->storageClass == SC_STATIC) ? llvm::GlobalValue::InternalLinkage : @@ -393,10 +391,10 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) { } if (diBuilder) { - llvm::DIFile file = sym->pos.GetDIFile(); - diBuilder->createGlobalVariable(sym->name, + llvm::DIFile file = pos.GetDIFile(); + diBuilder->createGlobalVariable(name, file, - sym->pos.first_line, + pos.first_line, sym->type->GetDIType(file), (sym->storageClass == SC_STATIC), sym->storagePtr); @@ -487,22 +485,23 @@ lCheckForStructParameters(const FunctionType *ftype, SourcePos pos) { false if any errors were encountered. */ void -Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) { - const FunctionType *functionType = - dynamic_cast(funSym->type); +Module::AddFunctionDeclaration(const std::string &name, + const FunctionType *functionType, + StorageClass storageClass, bool isInline, + SourcePos pos) { Assert(functionType != NULL); // If a global variable with the same name has already been declared // issue an error. - if (symbolTable->LookupVariable(funSym->name.c_str()) != NULL) { - Error(funSym->pos, "Function \"%s\" shadows previously-declared global variable. " + if (symbolTable->LookupVariable(name.c_str()) != NULL) { + Error(pos, "Function \"%s\" shadows previously-declared global variable. " "Ignoring this definition.", - funSym->name.c_str()); + name.c_str()); return; } std::vector overloadFuncs; - symbolTable->LookupFunction(funSym->name.c_str(), &overloadFuncs); + symbolTable->LookupFunction(name.c_str(), &overloadFuncs); if (overloadFuncs.size() > 0) { for (unsigned int i = 0; i < overloadFuncs.size(); ++i) { Symbol *overloadFunc = overloadFuncs[i]; @@ -528,7 +527,7 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) { if (i == functionType->GetNumParameters()) { std::string thisRetType = functionType->GetReturnTypeString(); std::string otherRetType = ofType->GetReturnTypeString(); - Error(funSym->pos, "Illegal to overload function by return " + Error(pos, "Illegal to overload function by return " "type only. This function returns \"%s\" while " "previous declaration at %s:%d returns \"%s\".", thisRetType.c_str(), overloadFunc->pos.name, @@ -539,55 +538,54 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) { } } - if (funSym->storageClass == SC_EXTERN_C) { + if (storageClass == SC_EXTERN_C) { // Make sure the user hasn't supplied both an 'extern "C"' and a // 'task' qualifier with the function if (functionType->isTask) { - Error(funSym->pos, "\"task\" qualifier is illegal with C-linkage extern " - "function \"%s\". Ignoring this function.", funSym->name.c_str()); + Error(pos, "\"task\" qualifier is illegal with C-linkage extern " + "function \"%s\". Ignoring this function.", name.c_str()); return; } std::vector funcs; - symbolTable->LookupFunction(funSym->name.c_str(), &funcs); + symbolTable->LookupFunction(name.c_str(), &funcs); if (funcs.size() > 0) { if (funcs.size() > 1) { // Multiple functions with this name have already been declared; // can't overload here - Error(funSym->pos, "Can't overload extern \"C\" function \"%s\"; " + Error(pos, "Can't overload extern \"C\" function \"%s\"; " "%d functions with the same name have already been declared.", - funSym->name.c_str(), (int)funcs.size()); + name.c_str(), (int)funcs.size()); return; } // One function with the same name has been declared; see if it // has the same type as this one, in which case it's ok. - if (Type::Equal(funcs[0]->type, funSym->type)) + if (Type::Equal(funcs[0]->type, functionType)) return; else { - Error(funSym->pos, "Can't overload extern \"C\" function \"%s\".", - funSym->name.c_str()); + Error(pos, "Can't overload extern \"C\" function \"%s\".", + name.c_str()); return; } } } // Get the LLVM FunctionType - bool includeMask = (funSym->storageClass != SC_EXTERN_C); + bool includeMask = (storageClass != SC_EXTERN_C); LLVM_TYPE_CONST llvm::FunctionType *llvmFunctionType = functionType->LLVMFunctionType(g->ctx, includeMask); if (llvmFunctionType == NULL) return; // And create the llvm::Function - llvm::GlobalValue::LinkageTypes linkage = (funSym->storageClass == SC_STATIC || + llvm::GlobalValue::LinkageTypes linkage = (storageClass == SC_STATIC || isInline) ? llvm::GlobalValue::InternalLinkage : llvm::GlobalValue::ExternalLinkage; - std::string functionName; - if (funSym->storageClass == SC_EXTERN_C) - functionName = funSym->name; - else { - functionName = funSym->MangledName(); + + std::string functionName = name; + if (storageClass != SC_EXTERN_C) { + functionName += functionType->Mangle(); if (g->mangleFunctionsWithTarget) functionName += g->target.GetISAString(); } @@ -597,7 +595,7 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) { // Set function attributes: we never throw exceptions function->setDoesNotThrow(true); - if (!(funSym->storageClass == SC_EXTERN_C) && + if (storageClass != SC_EXTERN_C && !g->generateDebuggingSymbols && isInline) function->addFnAttr(llvm::Attribute::AlwaysInline); @@ -609,15 +607,15 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) { // 'export'ed. if (functionType->isExported && lRecursiveCheckValidParamType(functionType->GetReturnType())) - Error(funSym->pos, "Illegal to return a \"varying\" type from exported " - "function \"%s\"", funSym->name.c_str()); + Error(pos, "Illegal to return a \"varying\" type from exported " + "function \"%s\"", name.c_str()); if (functionType->isTask && Type::Equal(functionType->GetReturnType(), AtomicType::Void) == false) - Error(funSym->pos, "Task-qualified functions must have void return type."); + Error(pos, "Task-qualified functions must have void return type."); if (functionType->isExported || functionType->isExternC) - lCheckForStructParameters(functionType, funSym->pos); + lCheckForStructParameters(functionType, pos); // Loop over all of the arguments; process default values if present // and do other checks and parameter attribute setting. @@ -675,19 +673,33 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) { function->eraseFromParent(); function = module->getFunction(functionName); } - funSym->function = function; // Finally, we know all is good and we can add the function to the // symbol table + Symbol *funSym = new Symbol(name, pos, functionType, storageClass); + funSym->function = function; bool ok = symbolTable->AddFunction(funSym); Assert(ok); } void -Module::AddFunctionDefinition(Symbol *sym, const std::vector &args, +Module::AddFunctionDefinition(const std::string &name, const FunctionType *type, Stmt *code) { - ast->AddFunction(sym, args, code); + Symbol *sym = symbolTable->LookupFunction(name.c_str(), type); + if (sym == NULL) { + Assert(m->errorCount > 0); + return; + } + + // FIXME: because we encode the parameter names in the function type, + // we need to override the function type here in case the function had + // earlier been declared with anonymous parameter names but is now + // defined with actual names. This is yet another reason we shouldn't + // include the names in FunctionType... + sym->type = type; + + ast->AddFunction(sym, code); } diff --git a/module.h b/module.h index 9032548f..96231a5f 100644 --- a/module.h +++ b/module.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -59,21 +59,26 @@ public: int CompileFile(); /** Add a named type definition to the module. */ - void AddTypeDef(Symbol *sym); + void AddTypeDef(const std::string &name, const Type *type, + SourcePos pos); /** Add a new global variable corresponding to the given Symbol to the module. If non-NULL, initExpr gives the initiailizer expression for the global's inital value. */ - void AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst); + void AddGlobalVariable(const std::string &name, const Type *type, + Expr *initExpr, bool isConst, + StorageClass storageClass, SourcePos pos); /** Add a declaration of the function defined by the given function symbol to the module. */ - void AddFunctionDeclaration(Symbol *funSym, bool isInline); + void AddFunctionDeclaration(const std::string &name, + const FunctionType *ftype, + StorageClass sc, bool isInline, SourcePos pos); /** Adds the function described by the declaration information and the provided statements to the module. */ - void AddFunctionDefinition(Symbol *sym, const std::vector &args, - Stmt *code); + void AddFunctionDefinition(const std::string &name, + const FunctionType *ftype, Stmt *code); /** After a source file has been compiled, output can be generated in a number of different formats. */ diff --git a/parse.yy b/parse.yy index 8448a559..8a1e02ee 100644 --- a/parse.yy +++ b/parse.yy @@ -631,7 +631,9 @@ declaration_statement if ($1->declarators[i] == NULL) Assert(m->errorCount > 0); else - m->AddTypeDef($1->declarators[i]->GetSymbol()); + m->AddTypeDef($1->declarators[i]->name, + $1->declarators[i]->type, + $1->declarators[i]->pos); } $$ = NULL; } @@ -1174,7 +1176,7 @@ direct_declarator : TOKEN_IDENTIFIER { Declarator *d = new Declarator(DK_BASE, @1); - d->sym = new Symbol(yytext, @1); + d->name = yytext; $$ = d; } | '(' declarator ')' @@ -1349,8 +1351,10 @@ type_name { if ($1 == NULL || $2 == NULL) $$ = NULL; - else - $$ = $2->GetType($1, NULL); + else { + $2->InitFromType($1, NULL); + $$ = $2->type; + } } ; @@ -1854,11 +1858,14 @@ function_definition } compound_statement { - std::vector args; if ($2 != NULL) { - Symbol *sym = $2->GetFunctionInfo($1, &args); - if (sym != NULL) - m->AddFunctionDefinition(sym, args, $4); + $2->InitFromDeclSpecs($1); + const FunctionType *funcType = + dynamic_cast($2->type); + if (funcType == NULL) + Assert(m->errorCount > 0); + else + m->AddFunctionDefinition($2->name, funcType, $4); } m->symbolTable->PopScope(); // push in lAddFunctionParams(); } @@ -1968,35 +1975,27 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) { // Error happened earlier during parsing return; + decl->InitFromDeclSpecs(ds); if (ds->storageClass == SC_TYPEDEF) - m->AddTypeDef(decl->GetSymbol()); + m->AddTypeDef(decl->name, decl->type, decl->pos); else { - const Type *t = decl->GetType(ds); - if (t == NULL) { + if (decl->type == NULL) { Assert(m->errorCount > 0); return; } - Symbol *sym = decl->GetSymbol(); - if (sym == NULL) { - Assert(m->errorCount > 0); - return; - } - - const FunctionType *ft = dynamic_cast(t); + decl->type = decl->type->ResolveUnboundVariability(Variability::Varying); + + const FunctionType *ft = dynamic_cast(decl->type); if (ft != NULL) { - sym->type = ft; - sym->storageClass = ds->storageClass; bool isInline = (ds->typeQualifiers & TYPEQUAL_INLINE); - m->AddFunctionDeclaration(sym, isInline); + m->AddFunctionDeclaration(decl->name, ft, ds->storageClass, + isInline, decl->pos); } else { - if (sym->type == NULL) - Assert(m->errorCount > 0); - else - sym->type = sym->type->ResolveUnboundVariability(Variability::Varying); bool isConst = (ds->typeQualifiers & TYPEQUAL_CONST) != 0; - m->AddGlobalVariable(sym, decl->initExpr, isConst); + m->AddGlobalVariable(decl->name, decl->type, decl->initExpr, + isConst, decl->storageClass, decl->pos); } } } @@ -2025,16 +2024,13 @@ lAddFunctionParams(Declarator *decl) { // now loop over its parameters and add them to the symbol table for (unsigned int i = 0; i < decl->functionParams.size(); ++i) { Declaration *pdecl = decl->functionParams[i]; - if (pdecl == NULL || pdecl->declarators.size() == 0) - // zero size declarators array corresponds to an anonymous - // parameter - continue; - Assert(pdecl->declarators.size() == 1); - Symbol *sym = pdecl->declarators[0]->GetSymbol(); - if (sym == NULL || sym->type == NULL) + Assert(pdecl != NULL && pdecl->declarators.size() == 1); + Declarator *declarator = pdecl->declarators[0]; + if (declarator == NULL) Assert(m->errorCount > 0); else { - sym->type = sym->type->ResolveUnboundVariability(Variability::Varying); + Symbol *sym = new Symbol(declarator->name, declarator->pos, + declarator->type, declarator->storageClass); #ifndef NDEBUG bool ok = m->symbolTable->AddVariable(sym); if (ok == false) diff --git a/sym.cpp b/sym.cpp index f60dc1aa..1a503c91 100644 --- a/sym.cpp +++ b/sym.cpp @@ -56,12 +56,6 @@ Symbol::Symbol(const std::string &n, SourcePos p, const Type *t, } -std::string -Symbol::MangledName() const { - return name + type->Mangle(); -} - - /////////////////////////////////////////////////////////////////////////// // SymbolTable diff --git a/sym.h b/sym.h index 8e14495a..24eb810f 100644 --- a/sym.h +++ b/sym.h @@ -67,13 +67,6 @@ public: Symbol(const std::string &name, SourcePos pos, const Type *t = NULL, StorageClass sc = SC_NONE); - /** This method should only be called for function symbols; for them, - it returns a mangled version of the function name with the argument - types encoded into the returned name. This is used to generate - unique symbols in object files for overloaded functions. - */ - std::string MangledName() const; - SourcePos pos; /*!< Source file position where the symbol was defined */ std::string name; /*!< Symbol's name */ llvm::Value *storagePtr; /*!< For symbols with storage associated with diff --git a/tests/func-anon-param.ispc b/tests/func-anon-param.ispc new file mode 100644 index 00000000..8bf97065 --- /dev/null +++ b/tests/func-anon-param.ispc @@ -0,0 +1,15 @@ + + +export uniform int width() { return programCount; } + +float foo(float &) { return 1; } +float bar(uniform float []) { return 2; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float x = 0; + RET[programIndex] = foo(x) + bar(aFOO); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 3; +} diff --git a/tests/global-decl-define.ispc b/tests/global-decl-define.ispc new file mode 100644 index 00000000..44fb92a7 --- /dev/null +++ b/tests/global-decl-define.ispc @@ -0,0 +1,14 @@ + + +export uniform int width() { return programCount; } + +extern int foo; +int foo = 1; + +export void f_f(uniform float RET[], uniform float aFOO[]) { + RET[programIndex] = foo; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1; +} diff --git a/tests/ptr-cast-complex.ispc b/tests/ptr-cast-complex.ispc new file mode 100644 index 00000000..afdbf5e7 --- /dev/null +++ b/tests/ptr-cast-complex.ispc @@ -0,0 +1,18 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int x[2][10]; + for (uniform int i = 0; i < 2; ++i) { + for (uniform int j = 0; j < 10; ++j) { + x[i][j] = 10*i+j; + } + } + + uniform int (* varying y)[10] = x; + RET[programIndex] = y[1][programIndex % 5]; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 10+ (programIndex % 5); +} From f0d013ee76132e306cd352a18ccc81227797fcc7 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 12 Apr 2012 20:19:41 -0700 Subject: [PATCH 067/173] Fix incorrect assert. Issue #241 --- stmt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stmt.cpp b/stmt.cpp index bf7fa661..53a451ae 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -2823,7 +2823,7 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) { Expr *maskVecExpr = new SymbolExpr(maskSym, pos); std::vector mmFuns; m->symbolTable->LookupFunction("__movmsk", &mmFuns); - Assert(mmFuns.size() == (g->target.isa == Target::GENERIC ? 1 : 2)); + Assert(mmFuns.size() == (g->target.maskBitCount == 32 ? 2 : 1)); FunctionSymbolExpr *movmskFunc = new FunctionSymbolExpr("__movmsk", mmFuns, pos); ExprList *movmskArgs = new ExprList(maskVecExpr, pos); From f4a2ef28e3c6f6db77da20ae760c3cedf4c52ee2 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 13 Apr 2012 19:42:07 -0700 Subject: [PATCH 068/173] Fix crashes from malformed programs. --- decl.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/decl.cpp b/decl.cpp index 942df0ce..581cca3c 100644 --- a/decl.cpp +++ b/decl.cpp @@ -410,6 +410,10 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { Assert(d->declarators.size() == 1); Declarator *decl = d->declarators[0]; + if (decl == NULL || decl->type == NULL) { + Assert(m->errorCount > 0); + continue; + } if (decl->name == "") { // Give a name to any anonymous parameter declarations From 17b7148300a9043e9511b32fcf1f4f9f4488fbca Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 13 Apr 2012 19:50:45 -0700 Subject: [PATCH 069/173] Initial implementation of FunctionType::GetDIType --- type.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/type.cpp b/type.cpp index 3795a3dc..e1372902 100644 --- a/type.cpp +++ b/type.cpp @@ -2535,9 +2535,22 @@ FunctionType::LLVMType(llvm::LLVMContext *ctx) const { llvm::DIType FunctionType::GetDIType(llvm::DIDescriptor scope) const { - // @todo need to implement FunctionType::GetDIType() - FATAL("need to implement FunctionType::GetDIType()"); - return llvm::DIType(); + std::vector retArgTypes; + + retArgTypes.push_back(returnType->GetDIType(scope)); + for (int i = 0; i < GetNumParameters(); ++i) { + const Type *t = GetParameterType(i); + if (t == NULL) + return llvm::DIType(); + retArgTypes.push_back(t->GetDIType(scope)); + } + + llvm::DIArray retArgTypesArray = + m->diBuilder->getOrCreateArray(llvm::ArrayRef(retArgTypes)); + llvm::DIType diType = + // FIXME: DIFile + m->diBuilder->createSubroutineType(llvm::DIFile(), retArgTypesArray); + return diType; } From 098c4910ded1e1fda240d1d9b772b6d46da12dfa Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sun, 15 Apr 2012 20:08:51 -0700 Subject: [PATCH 070/173] Remove support for building with LLVM 2.9. A forthcoming change uses some features of LLVM 3.0's new type system, and it's not worth back-porting this to also all work with LLVM 2.9. --- builtins.cpp | 2 -- cbackend.cpp | 6 ---- ctx.cpp | 34 +------------------- expr.cpp | 4 --- ispc.h | 4 +-- llvmutil.cpp | 9 +----- llvmutil.h | 4 --- main.cpp | 20 ++---------- module.cpp | 27 ---------------- module.h | 2 -- opt.cpp | 88 ++-------------------------------------------------- type.cpp | 37 ---------------------- 12 files changed, 9 insertions(+), 228 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index 3738858f..ac763f7b 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -620,9 +620,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length, std::string(linkError); if (llvm::Linker::LinkModules(module, bcModule, -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::Linker::DestroySource, -#endif // LLVM_3_0 &linkError)) Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str()); lSetInternalFunctions(module); diff --git a/cbackend.cpp b/cbackend.cpp index c3f6d0f7..71ce7de6 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -12,10 +12,6 @@ // //===----------------------------------------------------------------------===// -#ifdef LLVM_2_9 -#warning "The C++ backend isn't supported when building with LLVM 2.9" -#else - #include #ifndef _MSC_VER @@ -4462,5 +4458,3 @@ WriteCXXFile(llvm::Module *module, const char *fn, int vectorWidth, return true; } - -#endif // LLVM_2_9 diff --git a/ctx.cpp b/ctx.cpp index 5f5258e8..00ef1f7a 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1359,13 +1359,8 @@ lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) { llvm::GlobalValue::InternalLinkage, sConstant, s); llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) }; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef arrayRef(&indices[0], &indices[2]); return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock); -#else - return llvm::GetElementPtrInst::Create(sPtr, &indices[0], &indices[2], - "sptr", bblock); -#endif } @@ -2067,16 +2062,10 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, // The easy case: both the base pointer and the indices are // uniform, so just emit the regular LLVM GEP instruction llvm::Value *ind[1] = { index }; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef arrayRef(&ind[0], &ind[1]); llvm::Instruction *inst = llvm::GetElementPtrInst::Create(basePtr, arrayRef, name ? name : "gep", bblock); -#else - llvm::Instruction *inst = - llvm::GetElementPtrInst::Create(basePtr, &ind[0], &ind[1], - name ? name : "gep", bblock); -#endif AddDebugPos(inst); return inst; } @@ -2133,16 +2122,10 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0 // The easy case: both the base pointer and the indices are // uniform, so just emit the regular LLVM GEP instruction llvm::Value *indices[2] = { index0, index1 }; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef arrayRef(&indices[0], &indices[2]); llvm::Instruction *inst = llvm::GetElementPtrInst::Create(basePtr, arrayRef, name ? name : "gep", bblock); -#else - llvm::Instruction *inst = - llvm::GetElementPtrInst::Create(basePtr, &indices[0], &indices[2], - name ? name : "gep", bblock); -#endif AddDebugPos(inst); return inst; } @@ -2222,16 +2205,10 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, if (ptrType == NULL || ptrType->IsUniformType()) { // If the pointer is uniform, we can use the regular LLVM GEP. llvm::Value *offsets[2] = { LLVMInt32(0), LLVMInt32(elementNum) }; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef arrayRef(&offsets[0], &offsets[2]); resultPtr = llvm::GetElementPtrInst::Create(basePtr, arrayRef, name ? name : "struct_offset", bblock); -#else - resultPtr = - llvm::GetElementPtrInst::Create(basePtr, &offsets[0], &offsets[2], - name ? name : "struct_offset", bblock); -#endif } else { // Otherwise do the math to find the offset and add it to the given @@ -3014,10 +2991,7 @@ FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, llvm::PHINode * FunctionEmitContext::PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count, const char *name) { - llvm::PHINode *pn = llvm::PHINode::Create(type, -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) - count, -#endif // LLVM_3_0 + llvm::PHINode *pn = llvm::PHINode::Create(type, count, name ? name : "phi", bblock); AddDebugPos(pn); return pn; @@ -3086,14 +3060,8 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, if (llvm::isa(func->getType()) == false) { // Regular 'uniform' function call--just one function or function // pointer, so just emit the IR directly. -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::Instruction *ci = llvm::CallInst::Create(func, argVals, name ? name : "", bblock); -#else - llvm::Instruction *ci = - llvm::CallInst::Create(func, argVals.begin(), argVals.end(), - name ? name : "", bblock); -#endif AddDebugPos(ci); return ci; } diff --git a/expr.cpp b/expr.cpp index b6955bc3..e22217a0 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3701,14 +3701,10 @@ ExprList::GetConstant(const Type *type) const { } if (dynamic_cast(type) != NULL) { -#if defined(LLVM_2_9) - return llvm::ConstantStruct::get(*g->ctx, cv, false); -#else LLVM_TYPE_CONST llvm::StructType *llvmStructType = llvm::dyn_cast(collectionType->LLVMType(g->ctx)); Assert(llvmStructType != NULL); return llvm::ConstantStruct::get(llvmStructType, cv); -#endif } else { LLVM_TYPE_CONST llvm::Type *lt = type->LLVMType(g->ctx); diff --git a/ispc.h b/ispc.h index b33cde02..480c4bb1 100644 --- a/ispc.h +++ b/ispc.h @@ -40,8 +40,8 @@ #define ISPC_VERSION "1.2.2dev" -#if !defined(LLVM_2_9) && !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn) -#error "Only LLVM 2.9, 3.0, and the 3.1 development branch are supported" +#if !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn) +#error "Only LLVM 3.0, and the 3.1 development branch are supported" #endif #if defined(_WIN32) || defined(_WIN64) diff --git a/llvmutil.cpp b/llvmutil.cpp index 9f4fc658..6012d4eb 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -1443,11 +1443,8 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore, llvm::Instruction *phiInsertPos = phi->getParent()->begin(); llvm::PHINode *scalarPhi = llvm::PHINode::Create(vt->getElementType(), -#ifndef LLVM_2_9 phi->getNumIncomingValues(), -#endif // !LLVM_2_9 - newName, - phiInsertPos); + newName, phiInsertPos); phiMap[phi] = scalarPhi; for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) { @@ -1521,12 +1518,8 @@ LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2, int32_t shuf[], shufVec.push_back(LLVMInt32(shuf[i])); } -#ifndef LLVM_2_9 llvm::ArrayRef aref(&shufVec[0], &shufVec[shufSize]); llvm::Value *vec = llvm::ConstantVector::get(aref); -#else // LLVM_2_9 - llvm::Value *vec = llvm::ConstantVector::get(shufVec); -#endif return new llvm::ShuffleVectorInst(v1, v2, vec, "shuffle", insertBefore); } diff --git a/llvmutil.h b/llvmutil.h index 96cdf079..0025bf8e 100644 --- a/llvmutil.h +++ b/llvmutil.h @@ -49,11 +49,7 @@ namespace llvm { } // llvm::Type *s are no longer const in llvm 3.0 -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) #define LLVM_TYPE_CONST -#else -#define LLVM_TYPE_CONST const -#endif /** This structure holds pointers to a variety of LLVM types; code diff --git a/main.cpp b/main.cpp index cc6fd778..a98a35bb 100644 --- a/main.cpp +++ b/main.cpp @@ -45,14 +45,8 @@ #include #endif // ISPC_IS_WINDOWS #include -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) - #include - #include -#else - #include - #include - #include -#endif +#include +#include #ifdef ISPC_IS_WINDOWS #define strcasecmp stricmp @@ -66,9 +60,7 @@ static void lPrintVersion() { printf("Intel(r) SPMD Program Compiler (ispc), %s (build %s @ %s, LLVM %s)\n", ISPC_VERSION, BUILD_VERSION, BUILD_DATE, -#ifdef LLVM_2_9 - "2.9" -#elif defined(LLVM_3_0) || defined(LLVM_3_0svn) +#if defined(LLVM_3_0) "3.0" #elif defined(LLVM_3_1) || defined(LLVM_3_1svn) "3.1" @@ -93,9 +85,7 @@ usage(int ret) { printf(" ={%s}\n", Target::SupportedTargetCPUs().c_str()); printf(" [-D]\t\t\t\t#define given value when running preprocessor\n"); printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n"); -#ifndef LLVM_2_9 printf(" [--emit-c++]\t\t\tEmit a C++ source file as output\n"); -#endif // !LLVM_2_9 printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n"); printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n"); printf(" [-g]\t\t\t\tGenerate debugging information\n"); @@ -220,9 +210,7 @@ int main(int Argc, char *Argv[]) { LLVMInitializeX86AsmPrinter(); LLVMInitializeX86AsmParser(); LLVMInitializeX86Disassembler(); -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) LLVMInitializeX86TargetMC(); -#endif char *file = NULL; const char *headerFileName = NULL; @@ -279,10 +267,8 @@ int main(int Argc, char *Argv[]) { } else if (!strcmp(argv[i], "--emit-asm")) ot = Module::Asm; -#ifndef LLVM_2_9 else if (!strcmp(argv[i], "--emit-c++")) ot = Module::CXX; -#endif // !LLVM_2_9 else if (!strcmp(argv[i], "--emit-llvm")) ot = Module::Bitcode; else if (!strcmp(argv[i], "--emit-obj")) diff --git a/module.cpp b/module.cpp index d2d3afdd..7ccc6db6 100644 --- a/module.cpp +++ b/module.cpp @@ -706,10 +706,8 @@ Module::AddFunctionDefinition(const std::string &name, const FunctionType *type, bool Module::writeOutput(OutputType outputType, const char *outFileName, const char *includeFileName) { -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) if (diBuilder != NULL && outputType != Header) diBuilder->finalize(); -#endif // LLVM_3_0 // First, issue a warning if the output file suffix and the type of // file being created seem to mismatch. This can help catch missing @@ -731,14 +729,12 @@ Module::writeOutput(OutputType outputType, const char *outFileName, if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj")) fileType = "object"; break; -#ifndef LLVM_2_9 case CXX: if (strcasecmp(suffix, "c") && strcasecmp(suffix, "cc") && strcasecmp(suffix, "c++") && strcasecmp(suffix, "cxx") && strcasecmp(suffix, "cpp")) fileType = "c++"; break; -#endif // !LLVM_2_9 case Header: if (strcasecmp(suffix, "h") && strcasecmp(suffix, "hh") && strcasecmp(suffix, "hpp")) @@ -754,14 +750,12 @@ Module::writeOutput(OutputType outputType, const char *outFileName, return writeHeader(outFileName); else if (outputType == Bitcode) return writeBitcode(module, outFileName); -#ifndef LLVM_2_9 else if (outputType == CXX) { extern bool WriteCXXFile(llvm::Module *module, const char *fn, int vectorWidth, const char *includeName); return WriteCXXFile(module, outFileName, g->target.vectorWidth, includeFileName); } -#endif // !LLVM_2_9 else return writeObjectFileOrAssembly(outputType, outFileName); } @@ -1172,18 +1166,12 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre llvm::raw_fd_ostream stderrRaw(2, false); -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) clang::TextDiagnosticPrinter *diagPrinter = new clang::TextDiagnosticPrinter(stderrRaw, clang::DiagnosticOptions()); llvm::IntrusiveRefCntPtr diagIDs(new clang::DiagnosticIDs); clang::DiagnosticsEngine *diagEngine = new clang::DiagnosticsEngine(diagIDs, diagPrinter); inst.setDiagnostics(diagEngine); -#else - clang::TextDiagnosticPrinter *diagPrinter = - new clang::TextDiagnosticPrinter(stderrRaw, clang::DiagnosticOptions()); - inst.createDiagnostics(0, NULL, diagPrinter); -#endif clang::TargetOptions &options = inst.getTargetOpts(); llvm::Triple triple(module->getTargetTriple()); @@ -1209,9 +1197,7 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre clang::HeaderSearchOptions &headerOpts = inst.getHeaderSearchOpts(); headerOpts.UseBuiltinIncludes = 0; -#ifndef LLVM_2_9 headerOpts.UseStandardSystemIncludes = 0; -#endif // !LLVM_2_9 headerOpts.UseStandardCXXIncludes = 0; if (g->debugPrint) headerOpts.Verbose = 1; @@ -1549,24 +1535,13 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc, for (; argIter != dispatchFunc->arg_end(); ++argIter) args.push_back(argIter); if (voidReturn) { -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::CallInst::Create(targetFuncs[i], args, "", callBBlock); -#else - llvm::CallInst::Create(targetFuncs[i], args.begin(), args.end(), - "", callBBlock); -#endif llvm::ReturnInst::Create(*g->ctx, callBBlock); } else { -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::Value *retValue = llvm::CallInst::Create(targetFuncs[i], args, "ret_value", callBBlock); -#else - llvm::Value *retValue = - llvm::CallInst::Create(targetFuncs[i], args.begin(), args.end(), - "ret_value", callBBlock); -#endif llvm::ReturnInst::Create(*g->ctx, retValue, callBBlock); } @@ -1664,13 +1639,11 @@ Module::CompileAndOutput(const char *srcFile, const char *arch, const char *cpu, return errorCount > 0; } else { -#ifndef LLVM_2_9 if (outputType == CXX) { Error(SourcePos(), "Illegal to specify more then one target when " "compiling C++ output."); return 1; } -#endif // !LLVM_2_9 // The user supplied multiple targets std::vector targets = lExtractTargets(target); diff --git a/module.h b/module.h index 96231a5f..d62728c8 100644 --- a/module.h +++ b/module.h @@ -85,9 +85,7 @@ public: enum OutputType { Asm, /** Generate text assembly language output */ Bitcode, /** Generate LLVM IR bitcode output */ Object, /** Generate a native object file */ -#ifndef LLVM_2_9 CXX, /** Generate a C++ file */ -#endif // !LLVM_2_9 Header /** Generate a C/C++ header file with declarations of 'export'ed functions, global variables, and the types used by them. */ diff --git a/opt.cpp b/opt.cpp index 5bc3737e..a16d4bad 100644 --- a/opt.cpp +++ b/opt.cpp @@ -59,9 +59,6 @@ #include #include #include -#ifdef LLVM_2_9 - #include -#endif // LLVM_2_9 #include #include #include @@ -188,13 +185,8 @@ static llvm::Instruction * lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, const char *name, llvm::Instruction *insertBefore = NULL) { llvm::Value *args[2] = { arg0, arg1 }; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef newArgArray(&args[0], &args[2]); return llvm::CallInst::Create(func, newArgArray, name, insertBefore); -#else - return llvm::CallInst::Create(func, &args[0], &args[2], - name, insertBefore); -#endif } @@ -203,13 +195,8 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, const char *name, llvm::Instruction *insertBefore = NULL) { llvm::Value *args[3] = { arg0, arg1, arg2 }; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef newArgArray(&args[0], &args[3]); return llvm::CallInst::Create(func, newArgArray, name, insertBefore); -#else - return llvm::CallInst::Create(func, &args[0], &args[3], - name, insertBefore); -#endif } @@ -219,13 +206,8 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, llvm::Value *arg3, const char *name, llvm::Instruction *insertBefore = NULL) { llvm::Value *args[4] = { arg0, arg1, arg2, arg3 }; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef newArgArray(&args[0], &args[4]); return llvm::CallInst::Create(func, newArgArray, name, insertBefore); -#else - return llvm::CallInst::Create(func, &args[0], &args[4], - name, insertBefore); -#endif } #endif @@ -234,28 +216,19 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, llvm::Value *arg3, llvm::Value *arg4, const char *name, llvm::Instruction *insertBefore = NULL) { llvm::Value *args[5] = { arg0, arg1, arg2, arg3, arg4 }; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef newArgArray(&args[0], &args[5]); return llvm::CallInst::Create(func, newArgArray, name, insertBefore); -#else - return llvm::CallInst::Create(func, &args[0], &args[5], - name, insertBefore); -#endif } + static llvm::Instruction * lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, llvm::Value *arg3, llvm::Value *arg4, llvm::Value *arg5, const char *name, llvm::Instruction *insertBefore = NULL) { llvm::Value *args[6] = { arg0, arg1, arg2, arg3, arg4, arg5 }; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef newArgArray(&args[0], &args[6]); return llvm::CallInst::Create(func, newArgArray, name, insertBefore); -#else - return llvm::CallInst::Create(func, &args[0], &args[6], - name, insertBefore); -#endif } @@ -263,14 +236,9 @@ static llvm::Instruction * lGEPInst(llvm::Value *ptr, llvm::Value *offset, const char *name, llvm::Instruction *insertBefore) { llvm::Value *index[1] = { offset }; -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef arrayRef(&index[0], &index[1]); return llvm::GetElementPtrInst::Create(ptr, arrayRef, name, insertBefore); -#else - return llvm::GetElementPtrInst::Create(ptr, &index[0], &index[1], - name, insertBefore); -#endif } @@ -295,9 +263,7 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(new llvm::TargetData(module)); } -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) optPM.add(llvm::createIndVarSimplifyPass()); -#endif if (optLevel == 0) { // This is more or less the minimum set of optimizations that we @@ -421,32 +387,6 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateVSelMovmskOptPass()); -#if defined(LLVM_2_9) - llvm::createStandardModulePasses(&optPM, 3, - false /* opt size */, - true /* unit at a time */, - g->opt.unrollLoops, - true /* simplify lib calls */, - false /* may have exceptions */, - llvm::createFunctionInliningPass()); - llvm::createStandardLTOPasses(&optPM, true /* internalize pass */, - true /* inline once again */, - false /* verify after each pass */); - llvm::createStandardFunctionPasses(&optPM, 3); - - optPM.add(CreateIsCompileTimeConstantPass(true)); - optPM.add(CreateIntrinsicsOptPass()); - optPM.add(CreateVSelMovmskOptPass()); - - llvm::createStandardModulePasses(&optPM, 3, - false /* opt size */, - true /* unit at a time */, - g->opt.unrollLoops, - true /* simplify lib calls */, - false /* may have exceptions */, - llvm::createFunctionInliningPass()); - -#else funcPM.add(llvm::createTypeBasedAliasAnalysisPass()); funcPM.add(llvm::createBasicAliasAnalysisPass()); funcPM.add(llvm::createCFGSimplificationPass()); @@ -542,7 +482,7 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createStripDeadPrototypesPass()); optPM.add(llvm::createGlobalDCEPass()); optPM.add(llvm::createConstantMergePass()); -#endif + optPM.add(CreateMakeInternalFuncsStaticPass()); optPM.add(llvm::createGlobalDCEPass()); } @@ -633,22 +573,18 @@ IntrinsicsOpt::IntrinsicsOpt() llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse_movmsk_ps); maskInstructions.push_back(sseMovmsk); maskInstructions.push_back(m->module->getFunction("__movmsk")); -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::Function *avxMovmsk = llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_movmsk_ps_256); Assert(avxMovmsk != NULL); maskInstructions.push_back(avxMovmsk); -#endif // And all of the blend instructions blendInstructions.push_back(BlendInstruction( llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse41_blendvps), 0xf, 0, 1, 2)); -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) blendInstructions.push_back(BlendInstruction( llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_blendv_ps_256), 0xff, 0, 1, 2)); -#endif } @@ -746,7 +682,6 @@ lIsUndef(llvm::Value *value) { bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::Function *avxMaskedLoad32 = llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskload_ps_256); llvm::Function *avxMaskedLoad64 = @@ -757,7 +692,6 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskstore_pd_256); Assert(avxMaskedLoad32 != NULL && avxMaskedStore32 != NULL); Assert(avxMaskedLoad64 != NULL && avxMaskedStore64 != NULL); -#endif bool modifiedAny = false; restart: @@ -829,7 +763,6 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } } -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) else if (callInst->getCalledFunction() == avxMaskedLoad32 || callInst->getCalledFunction() == avxMaskedLoad64) { llvm::Value *factor = callInst->getArgOperand(1); @@ -896,7 +829,6 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } } -#endif } return modifiedAny; } @@ -1132,11 +1064,7 @@ lExtractFromInserts(llvm::Value *v, unsigned int index) { return NULL; Assert(iv->hasIndices() && iv->getNumIndices() == 1); -#ifdef LLVM_2_9 - if (*(iv->idx_begin()) == index) -#else if (iv->getIndices()[0] == index) -#endif return iv->getInsertedValueOperand(); else return lExtractFromInserts(iv->getAggregateOperand(), index); @@ -1253,24 +1181,16 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets, } Assert(base != NULL); -#ifdef LLVM_2_9 - *offsets = llvm::ConstantVector::get(delta); -#else llvm::ArrayRef deltas(&delta[0], &delta[elements.size()]); *offsets = llvm::ConstantVector::get(deltas); -#endif return base; } llvm::ExtractValueInst *ev = llvm::dyn_cast(ptrs); if (ev != NULL) { Assert(ev->getNumIndices() == 1); -#ifdef LLVM_2_9 - int index = *(ev->idx_begin()); -#else int index = ev->getIndices()[0]; -#endif ptrs = lExtractFromInserts(ev->getAggregateOperand(), index); if (ptrs != NULL) return lGetBasePtrAndOffsets(ptrs, offsets, insertBefore); @@ -3415,13 +3335,9 @@ lCoalesceGathers(const std::vector &coalesceGroup) { memory. */ static bool lInstructionMayWriteToMemory(llvm::Instruction *inst) { -#ifdef LLVM_2_9 - if (llvm::isa(inst)) -#else if (llvm::isa(inst) || llvm::isa(inst) || llvm::isa(inst)) -#endif // !LLVM_2_9 // FIXME: we could be less conservative and try to allow stores if // we are sure that the pointers don't overlap.. return true; diff --git a/type.cpp b/type.cpp index e1372902..1a433c1f 100644 --- a/type.cpp +++ b/type.cpp @@ -81,11 +81,7 @@ lCreateDIArray(llvm::DIType eltType, int count) { llvm::Value *sub = m->diBuilder->getOrCreateSubrange(lowerBound, upperBound); std::vector subs; subs.push_back(sub); -#ifdef LLVM_2_9 - llvm::DIArray subArray = m->diBuilder->getOrCreateArray(&subs[0], subs.size()); -#else llvm::DIArray subArray = m->diBuilder->getOrCreateArray(subs); -#endif uint64_t size = eltType.getSizeInBits() * count; uint64_t align = eltType.getAlignInBits(); @@ -518,12 +514,7 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const { else if (variability == Variability::Varying) { llvm::DIType unifType = GetAsUniformType()->GetDIType(scope); llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target.vectorWidth-1); -#ifdef LLVM_2_9 - llvm::Value *suba[] = { sub }; - llvm::DIArray subArray = m->diBuilder->getOrCreateArray(suba, 1); -#else llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub); -#endif // LLVM_2_9 uint64_t size = unifType.getSizeInBits() * g->target.vectorWidth; uint64_t align = unifType.getAlignInBits() * g->target.vectorWidth; return m->diBuilder->createVectorType(size, align, unifType, subArray); @@ -767,14 +758,8 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const { m->diBuilder->createEnumerator(enumerators[i]->name, enumeratorValue); enumeratorDescriptors.push_back(descriptor); } -#ifdef LLVM_2_9 - llvm::DIArray elementArray = - m->diBuilder->getOrCreateArray(&enumeratorDescriptors[0], - enumeratorDescriptors.size()); -#else llvm::DIArray elementArray = m->diBuilder->getOrCreateArray(enumeratorDescriptors); -#endif llvm::DIFile diFile = pos.GetDIFile(); llvm::DIType diType = @@ -789,12 +774,7 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const { return diType; case Variability::Varying: { llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target.vectorWidth-1); -#ifdef LLVM_2_9 - llvm::Value *suba[] = { sub }; - llvm::DIArray subArray = m->diBuilder->getOrCreateArray(suba, 1); -#else llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub); -#endif // !LLVM_2_9 uint64_t size = diType.getSizeInBits() * g->target.vectorWidth; uint64_t align = diType.getAlignInBits() * g->target.vectorWidth; return m->diBuilder->createVectorType(size, align, diType, subArray); @@ -1686,12 +1666,7 @@ llvm::DIType VectorType::GetDIType(llvm::DIDescriptor scope) const { llvm::DIType eltType = base->GetDIType(scope); llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, numElements-1); -#ifdef LLVM_2_9 - llvm::Value *subs[1] = { sub }; - llvm::DIArray subArray = m->diBuilder->getOrCreateArray(subs, 1); -#else llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub); -#endif uint64_t sizeBits = eltType.getSizeInBits() * numElements; @@ -1976,17 +1951,10 @@ StructType::GetDIType(llvm::DIDescriptor scope) const { llvm::DIFile diFile = elementPositions[i].GetDIFile(); int line = elementPositions[i].first_line; -#ifdef LLVM_2_9 - llvm::DIType fieldType = - m->diBuilder->createMemberType(elementNames[i], diFile, line, - eltSize, eltAlign, currentSize, 0, - eltType); -#else llvm::DIType fieldType = m->diBuilder->createMemberType(scope, elementNames[i], diFile, line, eltSize, eltAlign, currentSize, 0, eltType); -#endif // LLVM_2_9 elementLLVMTypes.push_back(fieldType); currentSize += eltSize; @@ -1997,12 +1965,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const { if (currentSize > 0 && (currentSize % align)) currentSize += align - (currentSize % align); -#ifdef LLVM_2_9 - llvm::DIArray elements = m->diBuilder->getOrCreateArray(&elementLLVMTypes[0], - elementLLVMTypes.size()); -#else llvm::DIArray elements = m->diBuilder->getOrCreateArray(elementLLVMTypes); -#endif llvm::DIFile diFile = pos.GetDIFile(); return m->diBuilder->createStructType(scope, name, diFile, pos.first_line, currentSize, align, 0, elements); From fefa86e0cfb9af0c54cd202b06a8eafc2e01e98f Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sun, 15 Apr 2012 20:11:27 -0700 Subject: [PATCH 071/173] Remove LLVM_TYPE_CONST #define / usage. Now with LLVM 3.0 and beyond, types aren't const. --- builtins.cpp | 6 +- ctx.cpp | 172 +++++++++++++++++++++++++-------------------------- ctx.h | 20 +++--- expr.cpp | 92 +++++++++++++-------------- func.cpp | 4 +- ispc.cpp | 28 ++++----- ispc.h | 10 +-- llvmutil.cpp | 102 +++++++++++++++--------------- llvmutil.h | 71 ++++++++++----------- module.cpp | 8 +-- opt.cpp | 40 ++++++------ stmt.cpp | 8 +-- type.cpp | 34 +++++----- type.h | 20 +++--- 14 files changed, 303 insertions(+), 312 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index ac763f7b..405c8290 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -291,7 +291,7 @@ lCheckModuleIntrinsics(llvm::Module *module) { if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) { llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID(); Assert(id != 0); - LLVM_TYPE_CONST llvm::Type *intrinsicType = + llvm::Type *intrinsicType = llvm::Intrinsic::getType(*g->ctx, id); intrinsicType = llvm::PointerType::get(intrinsicType, 0); Assert(func->getType() == intrinsicType); @@ -641,7 +641,7 @@ lDefineConstantInt(const char *name, int val, llvm::Module *module, new Symbol(name, SourcePos(), AtomicType::UniformInt32->GetAsConstType(), SC_STATIC); pw->constValue = new ConstExpr(pw->type, val, SourcePos()); - LLVM_TYPE_CONST llvm::Type *ltype = LLVMTypes::Int32Type; + llvm::Type *ltype = LLVMTypes::Int32Type; llvm::Constant *linit = LLVMInt32(val); pw->storagePtr = new llvm::GlobalVariable(*module, ltype, true, llvm::GlobalValue::InternalLinkage, @@ -681,7 +681,7 @@ lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) { pi[i] = i; pidx->constValue = new ConstExpr(pidx->type, pi, SourcePos()); - LLVM_TYPE_CONST llvm::Type *ltype = LLVMTypes::Int32VectorType; + llvm::Type *ltype = LLVMTypes::Int32VectorType; llvm::Constant *linit = LLVMInt32Vector(pi); pidx->storagePtr = new llvm::GlobalVariable(*module, ltype, true, llvm::GlobalValue::InternalLinkage, linit, diff --git a/ctx.cpp b/ctx.cpp index 00ef1f7a..4e7b3479 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -251,7 +251,7 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, if (!returnType || Type::Equal(returnType, AtomicType::Void)) returnValuePtr = NULL; else { - LLVM_TYPE_CONST llvm::Type *ftype = returnType->LLVMType(g->ctx); + llvm::Type *ftype = returnType->LLVMType(g->ctx); returnValuePtr = AllocaInst(ftype, "return_value_memory"); } @@ -1050,7 +1050,7 @@ FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault, caseBlocks = new std::vector >(bbCases); nextBlocks = new std::map(bbNext); switchConditionWasUniform = - (llvm::isa(expr->getType()) == false); + (llvm::isa(expr->getType()) == false); if (switchConditionWasUniform == true) { // For a uniform switch condition, just wire things up to the LLVM @@ -1325,12 +1325,12 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) { if (g->target.maskBitCount == 1) return b; - LLVM_TYPE_CONST llvm::ArrayType *at = - llvm::dyn_cast(b->getType()); + llvm::ArrayType *at = + llvm::dyn_cast(b->getType()); if (at) { // If we're given an array of vectors of i1s, then do the // conversion for each of the elements - LLVM_TYPE_CONST llvm::Type *boolArrayType = + llvm::Type *boolArrayType = llvm::ArrayType::get(LLVMTypes::BoolVectorType, at->getNumElements()); llvm::Value *ret = llvm::UndefValue::get(boolArrayType); @@ -1493,16 +1493,16 @@ FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym) { Otherwise return zero. */ static int -lArrayVectorWidth(LLVM_TYPE_CONST llvm::Type *t) { - LLVM_TYPE_CONST llvm::ArrayType *arrayType = - llvm::dyn_cast(t); +lArrayVectorWidth(llvm::Type *t) { + llvm::ArrayType *arrayType = + llvm::dyn_cast(t); if (arrayType == NULL) return 0; // We shouldn't be seeing arrays of anything but vectors being passed // to things like FunctionEmitContext::BinaryOperator() as operands. - LLVM_TYPE_CONST llvm::VectorType *vectorElementType = - llvm::dyn_cast(arrayType->getElementType()); + llvm::VectorType *vectorElementType = + llvm::dyn_cast(arrayType->getElementType()); Assert((vectorElementType != NULL && (int)vectorElementType->getNumElements() == g->target.vectorWidth)); @@ -1520,7 +1520,7 @@ FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst, } Assert(v0->getType() == v1->getType()); - LLVM_TYPE_CONST llvm::Type *type = v0->getType(); + llvm::Type *type = v0->getType(); int arraySize = lArrayVectorWidth(type); if (arraySize == 0) { llvm::Instruction *bop = @@ -1554,7 +1554,7 @@ FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) { // Similarly to BinaryOperator, do the operation on all the elements of // the array if we're given an array type; otherwise just do the // regular llvm operation. - LLVM_TYPE_CONST llvm::Type *type = v->getType(); + llvm::Type *type = v->getType(); int arraySize = lArrayVectorWidth(type); if (arraySize == 0) { llvm::Instruction *binst = @@ -1579,18 +1579,18 @@ FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) { // Given the llvm Type that represents an ispc VectorType, return an // equally-shaped type with boolean elements. (This is the type that will // be returned from CmpInst with ispc VectorTypes). -static LLVM_TYPE_CONST llvm::Type * -lGetMatchingBoolVectorType(LLVM_TYPE_CONST llvm::Type *type) { - LLVM_TYPE_CONST llvm::ArrayType *arrayType = - llvm::dyn_cast(type); +static llvm::Type * +lGetMatchingBoolVectorType(llvm::Type *type) { + llvm::ArrayType *arrayType = + llvm::dyn_cast(type); Assert(arrayType != NULL); - LLVM_TYPE_CONST llvm::VectorType *vectorElementType = - llvm::dyn_cast(arrayType->getElementType()); + llvm::VectorType *vectorElementType = + llvm::dyn_cast(arrayType->getElementType()); Assert(vectorElementType != NULL); Assert((int)vectorElementType->getNumElements() == g->target.vectorWidth); - LLVM_TYPE_CONST llvm::Type *base = + llvm::Type *base = llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth); return llvm::ArrayType::get(base, arrayType->getNumElements()); } @@ -1607,7 +1607,7 @@ FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, } Assert(v0->getType() == v1->getType()); - LLVM_TYPE_CONST llvm::Type *type = v0->getType(); + llvm::Type *type = v0->getType(); int arraySize = lArrayVectorWidth(type); if (arraySize == 0) { llvm::Instruction *ci = @@ -1617,7 +1617,7 @@ FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, return ci; } else { - LLVM_TYPE_CONST llvm::Type *boolType = lGetMatchingBoolVectorType(type); + llvm::Type *boolType = lGetMatchingBoolVectorType(type); llvm::Value *ret = llvm::UndefValue::get(boolType); for (int i = 0; i < arraySize; ++i) { llvm::Value *a = ExtractInst(v0, i); @@ -1638,10 +1638,10 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) { } llvm::Value *ret = NULL; - LLVM_TYPE_CONST llvm::Type *eltType = value->getType(); + llvm::Type *eltType = value->getType(); - LLVM_TYPE_CONST llvm::PointerType *pt = - llvm::dyn_cast(eltType); + llvm::PointerType *pt = + llvm::dyn_cast(eltType); if (pt != NULL) { // Varying pointers are represented as vectors of i32/i64s ret = llvm::UndefValue::get(LLVMTypes::VoidPointerVectorType); @@ -1665,7 +1665,7 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) { llvm::Value * FunctionEmitContext::BitCastInst(llvm::Value *value, - LLVM_TYPE_CONST llvm::Type *type, + llvm::Type *type, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); @@ -1686,11 +1686,11 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) { return NULL; } - if (llvm::isa(value->getType())) + if (llvm::isa(value->getType())) // no-op for varying pointers; they're already vectors of ints return value; - LLVM_TYPE_CONST llvm::Type *type = LLVMTypes::PointerIntType; + llvm::Type *type = LLVMTypes::PointerIntType; llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name ? name : "ptr2int", bblock); AddDebugPos(inst); @@ -1700,15 +1700,15 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) { llvm::Value * FunctionEmitContext::PtrToIntInst(llvm::Value *value, - LLVM_TYPE_CONST llvm::Type *toType, + llvm::Type *toType, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); return NULL; } - LLVM_TYPE_CONST llvm::Type *fromType = value->getType(); - if (llvm::isa(fromType)) { + llvm::Type *fromType = value->getType(); + if (llvm::isa(fromType)) { // varying pointer if (fromType == toType) // already the right type--done @@ -1731,15 +1731,15 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Value * FunctionEmitContext::IntToPtrInst(llvm::Value *value, - LLVM_TYPE_CONST llvm::Type *toType, + llvm::Type *toType, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); return NULL; } - LLVM_TYPE_CONST llvm::Type *fromType = value->getType(); - if (llvm::isa(fromType)) { + llvm::Type *fromType = value->getType(); + if (llvm::isa(fromType)) { // varying pointer if (fromType == toType) // done @@ -1761,7 +1761,7 @@ FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Instruction * -FunctionEmitContext::TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, +FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); @@ -1779,7 +1779,7 @@ FunctionEmitContext::TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *t llvm::Instruction * FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value, - LLVM_TYPE_CONST llvm::Type *type, const char *name) { + llvm::Type *type, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); return NULL; @@ -1795,7 +1795,7 @@ FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Instruction * -FunctionEmitContext::FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, +FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); @@ -1812,7 +1812,7 @@ FunctionEmitContext::FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type * llvm::Instruction * -FunctionEmitContext::SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, +FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); @@ -1829,7 +1829,7 @@ FunctionEmitContext::SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *ty llvm::Instruction * -FunctionEmitContext::ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, +FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); @@ -1860,7 +1860,7 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, llvm::Value *scale = g->target.SizeOf(scaleType->LLVMType(g->ctx), bblock); bool indexIsVarying = - llvm::isa(index->getType()); + llvm::isa(index->getType()); llvm::Value *offset = NULL; if (indexIsVarying == false) { // Truncate or sign extend the index as appropriate to a 32 or @@ -1904,7 +1904,7 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, // Smear out the pointer to be varying; either the base pointer or the // index must be varying for this method to be called. bool baseIsUniform = - (llvm::isa(basePtr->getType())); + (llvm::isa(basePtr->getType())); Assert(baseIsUniform == false || indexIsVarying == true); llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr, "ptr_smear") : basePtr; @@ -1916,18 +1916,18 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, void FunctionEmitContext::MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1) { - LLVM_TYPE_CONST llvm::Type *type0 = (*v0)->getType(); - LLVM_TYPE_CONST llvm::Type *type1 = (*v1)->getType(); + llvm::Type *type0 = (*v0)->getType(); + llvm::Type *type1 = (*v1)->getType(); // First, promote to a vector type if one of the two values is a vector // type - if (llvm::isa(type0) && - !llvm::isa(type1)) { + if (llvm::isa(type0) && + !llvm::isa(type1)) { *v1 = SmearUniform(*v1, "smear_v1"); type1 = (*v1)->getType(); } - if (!llvm::isa(type0) && - llvm::isa(type1)) { + if (!llvm::isa(type0) && + llvm::isa(type1)) { *v0 = SmearUniform(*v0, "smear_v0"); type0 = (*v0)->getType(); } @@ -1964,7 +1964,7 @@ lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth, ctx->MatchIntegerTypes(&indexValue, &ptrSliceOffset); - LLVM_TYPE_CONST llvm::Type *indexType = indexValue->getType(); + llvm::Type *indexType = indexValue->getType(); llvm::Value *shift = LLVMIntAsType(logWidth, indexType); llvm::Value *mask = LLVMIntAsType(soaWidth-1, indexType); @@ -1992,10 +1992,10 @@ FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) { // Create a small struct where the first element is the type of the // given pointer and the second element is the type of the offset // value. - std::vector eltTypes; + std::vector eltTypes; eltTypes.push_back(ptr->getType()); eltTypes.push_back(offset->getType()); - LLVM_TYPE_CONST llvm::StructType *st = + llvm::StructType *st = llvm::StructType::get(*g->ctx, eltTypes); llvm::Value *ret = llvm::UndefValue::get(st); @@ -2023,7 +2023,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, } if (ptrType->IsSlice()) { - Assert(llvm::isa(basePtr->getType())); + Assert(llvm::isa(basePtr->getType())); llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1); if (ptrType->IsFrozenSlice() == false) { @@ -2051,12 +2051,12 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, // Double-check consistency between the given pointer type and its LLVM // type. if (ptrType->IsUniformType()) - Assert(llvm::isa(basePtr->getType())); + Assert(llvm::isa(basePtr->getType())); else if (ptrType->IsVaryingType()) - Assert(llvm::isa(basePtr->getType())); + Assert(llvm::isa(basePtr->getType())); bool indexIsVaryingType = - llvm::isa(index->getType()); + llvm::isa(index->getType()); if (indexIsVaryingType == false && ptrType->IsUniformType() == true) { // The easy case: both the base pointer and the indices are @@ -2096,7 +2096,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0 // Similar to the 1D GEP implementation above, for non-frozen slice // pointers we do the two-step indexing calculation and then pass // the new major index on to a recursive GEP call. - Assert(llvm::isa(basePtr->getType())); + Assert(llvm::isa(basePtr->getType())); llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1); if (ptrType->IsFrozenSlice() == false) { llvm::Value *newSliceOffset; @@ -2113,9 +2113,9 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0 } bool index0IsVaryingType = - llvm::isa(index0->getType()); + llvm::isa(index0->getType()); bool index1IsVaryingType = - llvm::isa(index1->getType()); + llvm::isa(index1->getType()); if (index0IsVaryingType == false && index1IsVaryingType == false && ptrType->IsUniformType() == true) { @@ -2140,7 +2140,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0 Assert(st != NULL); bool ptr0IsUniform = - llvm::isa(ptr0->getType()); + llvm::isa(ptr0->getType()); const Type *ptr0BaseType = st->GetElementType(); const Type *ptr0Type = ptr0IsUniform ? PointerType::GetUniform(ptr0BaseType) : @@ -2175,7 +2175,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, // unfortunate... llvm::Value *basePtr = fullBasePtr; bool baseIsSlicePtr = - llvm::isa(fullBasePtr->getType()); + llvm::isa(fullBasePtr->getType()); const PointerType *rpt; if (baseIsSlicePtr) { Assert(ptrType != NULL); @@ -2263,8 +2263,8 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) { return NULL; } - LLVM_TYPE_CONST llvm::PointerType *pt = - llvm::dyn_cast(ptr->getType()); + llvm::PointerType *pt = + llvm::dyn_cast(ptr->getType()); Assert(pt != NULL); // FIXME: it's not clear to me that we generate unaligned vector loads @@ -2272,7 +2272,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) { // optimization passes that lower gathers to vector loads, I think..) // So remove this?? int align = 0; - if (llvm::isa(pt->getElementType())) + if (llvm::isa(pt->getElementType())) align = 1; llvm::Instruction *inst = new llvm::LoadInst(ptr, name ? name : "load", false /* not volatile */, @@ -2332,7 +2332,7 @@ FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, // If we have a struct/array, we need to decompose it into // individual element loads to fill in the result structure since // the SOA slice of values we need isn't contiguous in memory... - LLVM_TYPE_CONST llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx); + llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx); llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType); for (int i = 0; i < ct->GetElementCount(); ++i) { @@ -2416,7 +2416,7 @@ FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, Assert(ptrType->IsVaryingType()); const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType(); - LLVM_TYPE_CONST llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx); + llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx); const CollectionType *collectionType = dynamic_cast(ptrType->GetBaseType()); @@ -2524,7 +2524,7 @@ FunctionEmitContext::addGSMetadata(llvm::Value *v, SourcePos pos) { llvm::Value * -FunctionEmitContext::AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType, +FunctionEmitContext::AllocaInst(llvm::Type *llvmType, const char *name, int align, bool atEntryBlock) { if (llvmType == NULL) { @@ -2550,10 +2550,10 @@ FunctionEmitContext::AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType, // unlikely that this array will be loaded into varying variables with // what will be aligned accesses if the uniform -> varying load is done // in regular chunks. - LLVM_TYPE_CONST llvm::ArrayType *arrayType = - llvm::dyn_cast(llvmType); + llvm::ArrayType *arrayType = + llvm::dyn_cast(llvmType); if (align == 0 && arrayType != NULL && - !llvm::isa(arrayType->getElementType())) + !llvm::isa(arrayType->getElementType())) align = 4 * g->target.nativeVectorWidth; if (align != 0) @@ -2760,7 +2760,7 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, Assert(pt != NULL || dynamic_cast(valueType) != NULL); - LLVM_TYPE_CONST llvm::Type *type = value->getType(); + llvm::Type *type = value->getType(); const char *funcName = NULL; if (pt != NULL) funcName = g->target.is32Bit ? "__pseudo_scatter32_32" : @@ -2957,7 +2957,7 @@ FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) { } llvm::Instruction *ei = NULL; - if (llvm::isa(v->getType())) + if (llvm::isa(v->getType())) ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), name ? name : "extract", bblock); else @@ -2977,7 +2977,7 @@ FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, } llvm::Instruction *ii = NULL; - if (llvm::isa(v->getType())) + if (llvm::isa(v->getType())) ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt), name ? name : "insert", bblock); else @@ -2989,7 +2989,7 @@ FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, llvm::PHINode * -FunctionEmitContext::PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count, +FunctionEmitContext::PhiNode(llvm::Type *type, int count, const char *name) { llvm::PHINode *pn = llvm::PHINode::Create(type, count, name ? name : "phi", bblock); @@ -3019,18 +3019,18 @@ FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0, function has. */ static unsigned int lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) { - LLVM_TYPE_CONST llvm::FunctionType *ft = - llvm::dyn_cast(callee->getType()); + llvm::FunctionType *ft = + llvm::dyn_cast(callee->getType()); if (ft == NULL) { - LLVM_TYPE_CONST llvm::PointerType *pt = - llvm::dyn_cast(callee->getType()); + llvm::PointerType *pt = + llvm::dyn_cast(callee->getType()); if (pt == NULL) { // varying--in this case, it must be the version of the // function that takes a mask return funcType->GetNumParameters() + 1; } - ft = llvm::dyn_cast(pt->getElementType()); + ft = llvm::dyn_cast(pt->getElementType()); } Assert(ft != NULL); @@ -3057,7 +3057,7 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, if (argVals.size() + 1 == calleeArgCount) argVals.push_back(GetFullMask()); - if (llvm::isa(func->getType()) == false) { + if (llvm::isa(func->getType()) == false) { // Regular 'uniform' function call--just one function or function // pointer, so just emit the IR directly. llvm::Instruction *ci = @@ -3085,7 +3085,7 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, // First allocate memory to accumulate the various program // instances' return values... const Type *returnType = funcType->GetReturnType(); - LLVM_TYPE_CONST llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx); + llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx); llvm::Value *resultPtr = NULL; if (llvmReturnType->isVoidTy() == false) resultPtr = AllocaInst(llvmReturnType); @@ -3152,9 +3152,9 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, // bitcast the i32/64 function pointer to the actual function // pointer type (the variant that includes a mask). - LLVM_TYPE_CONST llvm::Type *llvmFuncType = + llvm::Type *llvmFuncType = funcType->LLVMFunctionType(g->ctx, true); - LLVM_TYPE_CONST llvm::Type *llvmFPtrType = + llvm::Type *llvmFPtrType = llvm::PointerType::get(llvmFuncType, 0); llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType); @@ -3251,14 +3251,14 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee, launchedTasks = true; Assert(llvm::isa(callee)); - LLVM_TYPE_CONST llvm::Type *argType = + llvm::Type *argType = (llvm::dyn_cast(callee))->arg_begin()->getType(); Assert(llvm::PointerType::classof(argType)); - LLVM_TYPE_CONST llvm::PointerType *pt = - llvm::dyn_cast(argType); + llvm::PointerType *pt = + llvm::dyn_cast(argType); Assert(llvm::StructType::classof(pt->getElementType())); - LLVM_TYPE_CONST llvm::StructType *argStructType = - static_cast(pt->getElementType()); + llvm::StructType *argStructType = + static_cast(pt->getElementType()); Assert(argStructType->getNumElements() == argVals.size() + 1); llvm::Function *falloc = m->module->getFunction("ISPCAlloc"); @@ -3356,7 +3356,7 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, return ptr; // Find the size of a uniform element of the varying type - LLVM_TYPE_CONST llvm::Type *llvmBaseUniformType = + llvm::Type *llvmBaseUniformType = baseType->GetAsUniformType()->LLVMType(g->ctx); llvm::Value *unifSize = g->target.SizeOf(llvmBaseUniformType, bblock); unifSize = SmearUniform(unifSize); diff --git a/ctx.h b/ctx.h index 8cb24a06..6c3f2887 100644 --- a/ctx.h +++ b/ctx.h @@ -380,23 +380,23 @@ public: array, for pointer types). */ llvm::Value *SmearUniform(llvm::Value *value, const char *name = NULL); - llvm::Value *BitCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, + llvm::Value *BitCastInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); llvm::Value *PtrToIntInst(llvm::Value *value, const char *name = NULL); - llvm::Value *PtrToIntInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, + llvm::Value *PtrToIntInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); - llvm::Value *IntToPtrInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, + llvm::Value *IntToPtrInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); - llvm::Instruction *TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, + llvm::Instruction *TruncInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); llvm::Instruction *CastInst(llvm::Instruction::CastOps op, llvm::Value *value, - LLVM_TYPE_CONST llvm::Type *type, const char *name = NULL); - llvm::Instruction *FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, + llvm::Type *type, const char *name = NULL); + llvm::Instruction *FPCastInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); - llvm::Instruction *SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, + llvm::Instruction *SExtInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); - llvm::Instruction *ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, + llvm::Instruction *ZExtInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); /** Given two integer-typed values (but possibly one vector and the @@ -448,7 +448,7 @@ public: instruction is added at the start of the function in the entry basic block; if it should be added to the current basic block, then the atEntryBlock parameter should be false. */ - llvm::Value *AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType, + llvm::Value *AllocaInst(llvm::Type *llvmType, const char *name = NULL, int align = 0, bool atEntryBlock = true); @@ -485,7 +485,7 @@ public: llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name = NULL); - llvm::PHINode *PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count, + llvm::PHINode *PhiNode(llvm::Type *type, int count, const char *name = NULL); llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, const char *name = NULL); diff --git a/expr.cpp b/expr.cpp index e22217a0..fdaf878c 100644 --- a/expr.cpp +++ b/expr.cpp @@ -637,7 +637,7 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr, // instead we'll make a constant static global that holds the // constant value and emit a memcpy to put its value into the // pointer we have. - LLVM_TYPE_CONST llvm::Type *llvmType = symType->LLVMType(g->ctx); + llvm::Type *llvmType = symType->LLVMType(g->ctx); if (llvmType == NULL) { Assert(m->errorCount > 0); return; @@ -771,7 +771,7 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr, else { // If we don't have enough initializer values, initialize the // rest as zero. - LLVM_TYPE_CONST llvm::Type *llvmType = elementType->LLVMType(g->ctx); + llvm::Type *llvmType = elementType->LLVMType(g->ctx); if (llvmType == NULL) { Assert(m->errorCount > 0); return; @@ -905,7 +905,7 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) { // a recursive call to lLLVMConstantValue(). const Type *baseType = vectorType->GetBaseType(); llvm::Constant *constElement = lLLVMConstantValue(baseType, ctx, value); - LLVM_TYPE_CONST llvm::Type *llvmVectorType = vectorType->LLVMType(ctx); + llvm::Type *llvmVectorType = vectorType->LLVMType(ctx); // Now create a constant version of the corresponding LLVM type that we // use to represent the VectorType. @@ -914,8 +914,8 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) { // LLVM ArrayTypes leaks into the code here; it feels like this detail // should be better encapsulated? if (baseType->IsUniformType()) { - LLVM_TYPE_CONST llvm::VectorType *lvt = - llvm::dyn_cast(llvmVectorType); + llvm::VectorType *lvt = + llvm::dyn_cast(llvmVectorType); Assert(lvt != NULL); std::vector vals; for (unsigned int i = 0; i < lvt->getNumElements(); ++i) @@ -923,8 +923,8 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) { return llvm::ConstantVector::get(vals); } else { - LLVM_TYPE_CONST llvm::ArrayType *lat = - llvm::dyn_cast(llvmVectorType); + llvm::ArrayType *lat = + llvm::dyn_cast(llvmVectorType); Assert(lat != NULL); std::vector vals; for (unsigned int i = 0; i < lat->getNumElements(); ++i) @@ -1439,7 +1439,7 @@ lEmitBinaryPointerArith(BinaryExpr::Op op, llvm::Value *value0, // Now divide by the size of the type that the pointer // points to in order to return the difference in elements. - LLVM_TYPE_CONST llvm::Type *llvmElementType = + llvm::Type *llvmElementType = ptrType->GetBaseType()->LLVMType(g->ctx); llvm::Value *size = g->target.SizeOf(llvmElementType, ctx->GetCurrentBasicBlock()); @@ -1648,7 +1648,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, // Allocate temporary storage for the return value const Type *retType = Type::MoreGeneralType(type0, type1, pos, lOpString(op)); - LLVM_TYPE_CONST llvm::Type *llvmRetType = retType->LLVMType(g->ctx); + llvm::Type *llvmRetType = retType->LLVMType(g->ctx); llvm::Value *retPtr = ctx->AllocaInst(llvmRetType, "logical_op_mem"); llvm::BasicBlock *bbSkipEvalValue1 = ctx->CreateBasicBlock("skip_eval_1"); @@ -3010,7 +3010,7 @@ SelectExpr::GetValue(FunctionEmitContext *ctx) const { // Temporary storage to store the values computed for each // expression, if any. (These stay as uninitialized memory if we // short circuit around the corresponding expression.) - LLVM_TYPE_CONST llvm::Type *exprType = + llvm::Type *exprType = expr1->GetType()->LLVMType(g->ctx); llvm::Value *expr1Ptr = ctx->AllocaInst(exprType); llvm::Value *expr2Ptr = ctx->AllocaInst(exprType); @@ -3690,7 +3690,7 @@ ExprList::GetConstant(const Type *type) const { return NULL; } - LLVM_TYPE_CONST llvm::Type *llvmType = elementType->LLVMType(g->ctx); + llvm::Type *llvmType = elementType->LLVMType(g->ctx); if (llvmType == NULL) { Assert(m->errorCount > 0); return NULL; @@ -3701,23 +3701,23 @@ ExprList::GetConstant(const Type *type) const { } if (dynamic_cast(type) != NULL) { - LLVM_TYPE_CONST llvm::StructType *llvmStructType = - llvm::dyn_cast(collectionType->LLVMType(g->ctx)); + llvm::StructType *llvmStructType = + llvm::dyn_cast(collectionType->LLVMType(g->ctx)); Assert(llvmStructType != NULL); return llvm::ConstantStruct::get(llvmStructType, cv); } else { - LLVM_TYPE_CONST llvm::Type *lt = type->LLVMType(g->ctx); - LLVM_TYPE_CONST llvm::ArrayType *lat = - llvm::dyn_cast(lt); + llvm::Type *lt = type->LLVMType(g->ctx); + llvm::ArrayType *lat = + llvm::dyn_cast(lt); if (lat != NULL) return llvm::ConstantArray::get(lat, cv); else { // uniform short vector type Assert(type->IsUniformType() && dynamic_cast(type) != NULL); - LLVM_TYPE_CONST llvm::VectorType *lvt = - llvm::dyn_cast(lt); + llvm::VectorType *lvt = + llvm::dyn_cast(lt); Assert(lvt != NULL); // Uniform short vectors are stored as vectors of length @@ -3994,10 +3994,10 @@ IndexExpr::GetBaseSymbol() const { static llvm::Value * lConvertToSlicePointer(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType *slicePtrType) { - LLVM_TYPE_CONST llvm::Type *llvmSlicePtrType = + llvm::Type *llvmSlicePtrType = slicePtrType->LLVMType(g->ctx); - LLVM_TYPE_CONST llvm::StructType *sliceStructType = - llvm::dyn_cast(llvmSlicePtrType); + llvm::StructType *sliceStructType = + llvm::dyn_cast(llvmSlicePtrType); Assert(sliceStructType != NULL && sliceStructType->getElementType(0) == ptr->getType()); @@ -5675,7 +5675,7 @@ ConstExpr::GetConstant(const Type *type) const { // The only time we should get here is if we have an integer '0' // constant that should be turned into a NULL pointer of the // appropriate type. - LLVM_TYPE_CONST llvm::Type *llvmType = type->LLVMType(g->ctx); + llvm::Type *llvmType = type->LLVMType(g->ctx); if (llvmType == NULL) { Assert(m->errorCount > 0); return NULL; @@ -5788,7 +5788,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, switch (toType->basicType) { case AtomicType::TYPE_FLOAT: { - LLVM_TYPE_CONST llvm::Type *targetType = + llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::FloatType : LLVMTypes::FloatVectorType; switch (fromType->basicType) { @@ -5832,7 +5832,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; } case AtomicType::TYPE_DOUBLE: { - LLVM_TYPE_CONST llvm::Type *targetType = + llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::DoubleType : LLVMTypes::DoubleVectorType; switch (fromType->basicType) { @@ -5870,7 +5870,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; } case AtomicType::TYPE_INT8: { - LLVM_TYPE_CONST llvm::Type *targetType = + llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::Int8Type : LLVMTypes::Int8VectorType; switch (fromType->basicType) { @@ -5906,7 +5906,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; } case AtomicType::TYPE_UINT8: { - LLVM_TYPE_CONST llvm::Type *targetType = + llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::Int8Type : LLVMTypes::Int8VectorType; switch (fromType->basicType) { @@ -5948,7 +5948,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; } case AtomicType::TYPE_INT16: { - LLVM_TYPE_CONST llvm::Type *targetType = + llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::Int16Type : LLVMTypes::Int16VectorType; switch (fromType->basicType) { @@ -5988,7 +5988,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; } case AtomicType::TYPE_UINT16: { - LLVM_TYPE_CONST llvm::Type *targetType = + llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::Int16Type : LLVMTypes::Int16VectorType; switch (fromType->basicType) { @@ -6034,7 +6034,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; } case AtomicType::TYPE_INT32: { - LLVM_TYPE_CONST llvm::Type *targetType = + llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::Int32Type : LLVMTypes::Int32VectorType; switch (fromType->basicType) { @@ -6074,7 +6074,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; } case AtomicType::TYPE_UINT32: { - LLVM_TYPE_CONST llvm::Type *targetType = + llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::Int32Type : LLVMTypes::Int32VectorType; switch (fromType->basicType) { @@ -6120,7 +6120,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; } case AtomicType::TYPE_INT64: { - LLVM_TYPE_CONST llvm::Type *targetType = + llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::Int64Type : LLVMTypes::Int64VectorType; switch (fromType->basicType) { @@ -6158,7 +6158,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; } case AtomicType::TYPE_UINT64: { - LLVM_TYPE_CONST llvm::Type *targetType = + llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::Int64Type : LLVMTypes::Int64VectorType; switch (fromType->basicType) { @@ -6302,7 +6302,7 @@ lUniformValueToVarying(FunctionEmitContext *ctx, llvm::Value *value, const CollectionType *collectionType = dynamic_cast(type); if (collectionType != NULL) { - LLVM_TYPE_CONST llvm::Type *llvmType = + llvm::Type *llvmType = type->GetAsVaryingType()->LLVMType(g->ctx); llvm::Value *retValue = llvm::UndefValue::get(llvmType); @@ -6404,10 +6404,10 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { Assert(dynamic_cast(toType) != NULL); if (toType->IsBoolType()) { // convert pointer to bool - LLVM_TYPE_CONST llvm::Type *lfu = + llvm::Type *lfu = fromType->GetAsUniformType()->LLVMType(g->ctx); - LLVM_TYPE_CONST llvm::PointerType *llvmFromUnifType = - llvm::dyn_cast(lfu); + llvm::PointerType *llvmFromUnifType = + llvm::dyn_cast(lfu); llvm::Value *nullPtrValue = llvm::ConstantPointerNull::get(llvmFromUnifType); @@ -6436,7 +6436,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { if (toType->IsVaryingType() && fromType->IsUniformType()) value = ctx->SmearUniform(value); - LLVM_TYPE_CONST llvm::Type *llvmToType = toType->LLVMType(g->ctx); + llvm::Type *llvmToType = toType->LLVMType(g->ctx); if (llvmToType == NULL) return NULL; return ctx->PtrToIntInst(value, llvmToType, "ptr_typecast"); @@ -6479,7 +6479,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { Assert(Type::EqualIgnoringConst(toArrayType->GetBaseType(), fromArrayType->GetBaseType())); llvm::Value *v = expr->GetValue(ctx); - LLVM_TYPE_CONST llvm::Type *ptype = toType->LLVMType(g->ctx); + llvm::Type *ptype = toType->LLVMType(g->ctx); return ctx->BitCastInst(v, ptype); //, "array_cast_0size"); } @@ -6501,7 +6501,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { Assert(Type::EqualIgnoringConst(toArray->GetBaseType(), fromArray->GetBaseType())); llvm::Value *v = expr->GetValue(ctx); - LLVM_TYPE_CONST llvm::Type *ptype = toType->LLVMType(g->ctx); + llvm::Type *ptype = toType->LLVMType(g->ctx); return ctx->BitCastInst(v, ptype); //, "array_cast_0size"); } @@ -6589,7 +6589,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { if (toType->IsVaryingType() && fromType->IsUniformType()) exprVal = ctx->SmearUniform(exprVal); - LLVM_TYPE_CONST llvm::Type *llvmToType = toType->LLVMType(g->ctx); + llvm::Type *llvmToType = toType->LLVMType(g->ctx); if (llvmToType == NULL) return NULL; @@ -6828,7 +6828,7 @@ lConvertPointerConstant(llvm::Constant *c, const Type *constType) { if (constType->IsVaryingType()) return llvm::ConstantVector::get(smear); else { - LLVM_TYPE_CONST llvm::ArrayType *at = + llvm::ArrayType *at = llvm::ArrayType::get(LLVMTypes::PointerIntType, count); return llvm::ConstantArray::get(at, smear); } @@ -6881,7 +6881,7 @@ ReferenceExpr::GetValue(FunctionEmitContext *ctx) const { // value is NULL if the expression is a temporary; in this case, we'll // allocate storage for it so that we can return the pointer to that... const Type *type; - LLVM_TYPE_CONST llvm::Type *llvmType; + llvm::Type *llvmType; if ((type = expr->GetType()) == NULL || (llvmType = type->LLVMType(g->ctx)) == NULL) { Assert(m->errorCount > 0); @@ -7299,7 +7299,7 @@ SizeOfExpr::GetValue(FunctionEmitContext *ctx) const { if (t == NULL) return NULL; - LLVM_TYPE_CONST llvm::Type *llvmType = t->LLVMType(g->ctx); + llvm::Type *llvmType = t->LLVMType(g->ctx); if (llvmType == NULL) return NULL; @@ -7913,7 +7913,7 @@ NullPointerExpr::GetConstant(const Type *type) const { if (pt == NULL) return NULL; - LLVM_TYPE_CONST llvm::Type *llvmType = type->LLVMType(g->ctx); + llvm::Type *llvmType = type->LLVMType(g->ctx); if (llvmType == NULL) { Assert(m->errorCount > 0); return NULL; @@ -8059,7 +8059,7 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const { // Initialize the memory pointed to by the pointer for the // current lane. ctx->SetCurrentBasicBlock(bbInit); - LLVM_TYPE_CONST llvm::Type *ptrType = + llvm::Type *ptrType = retType->GetAsUniformType()->LLVMType(g->ctx); llvm::Value *ptr = ctx->IntToPtrInst(p, ptrType); InitSymbol(ptr, allocType, initExpr, ctx, pos); @@ -8075,7 +8075,7 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const { // For uniform news, we just need to cast the void * to be a // pointer of the return type and to run the code for initializers, // if present. - LLVM_TYPE_CONST llvm::Type *ptrType = retType->LLVMType(g->ctx); + llvm::Type *ptrType = retType->LLVMType(g->ctx); ptrValue = ctx->BitCastInst(ptrValue, ptrType, "cast_new_ptr"); if (initExpr != NULL) diff --git a/func.cpp b/func.cpp index d6e24d72..29dd9ecf 100644 --- a/func.cpp +++ b/func.cpp @@ -165,7 +165,7 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const llvm::dyn_cast(pt->getElementType()); // Get the type of the argument we're copying in and its Symbol pointer - LLVM_TYPE_CONST llvm::Type *argType = argStructType->getElementType(i); + llvm::Type *argType = argStructType->getElementType(i); Symbol *sym = args[i]; if (sym == NULL) @@ -435,7 +435,7 @@ Function::GenerateIR() { Assert(type != NULL); if (type->isExported) { if (!type->isTask) { - LLVM_TYPE_CONST llvm::FunctionType *ftype = + llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx); llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage; std::string functionName = sym->name; diff --git a/ispc.cpp b/ispc.cpp index e9357832..dce3ed77 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -457,7 +457,7 @@ Target::GetISAString() const { static bool -lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) { +lGenericTypeLayoutIndeterminate(llvm::Type *type) { if (type->isPrimitiveType() || type->isIntegerTy()) return false; @@ -466,18 +466,18 @@ lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) { type == LLVMTypes::Int1VectorType) return true; - LLVM_TYPE_CONST llvm::ArrayType *at = - llvm::dyn_cast(type); + llvm::ArrayType *at = + llvm::dyn_cast(type); if (at != NULL) return lGenericTypeLayoutIndeterminate(at->getElementType()); - LLVM_TYPE_CONST llvm::PointerType *pt = - llvm::dyn_cast(type); + llvm::PointerType *pt = + llvm::dyn_cast(type); if (pt != NULL) return false; - LLVM_TYPE_CONST llvm::StructType *st = - llvm::dyn_cast(type); + llvm::StructType *st = + llvm::dyn_cast(type); if (st != NULL) { for (int i = 0; i < (int)st->getNumElements(); ++i) if (lGenericTypeLayoutIndeterminate(st->getElementType(i))) @@ -485,18 +485,18 @@ lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) { return false; } - Assert(llvm::isa(type)); + Assert(llvm::isa(type)); return true; } llvm::Value * -Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type, +Target::SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd) { if (isa == Target::GENERIC && lGenericTypeLayoutIndeterminate(type)) { llvm::Value *index[1] = { LLVMInt32(1) }; - LLVM_TYPE_CONST llvm::PointerType *ptrType = llvm::PointerType::get(type, 0); + llvm::PointerType *ptrType = llvm::PointerType::get(type, 0); llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType); #if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef arrayRef(&index[0], &index[1]); @@ -529,12 +529,12 @@ Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type, llvm::Value * -Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element, +Target::StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd) { if (isa == Target::GENERIC && lGenericTypeLayoutIndeterminate(type) == true) { llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) }; - LLVM_TYPE_CONST llvm::PointerType *ptrType = llvm::PointerType::get(type, 0); + llvm::PointerType *ptrType = llvm::PointerType::get(type, 0); llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType); #if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef arrayRef(&indices[0], &indices[2]); @@ -556,8 +556,8 @@ Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element, const llvm::TargetData *td = GetTargetMachine()->getTargetData(); Assert(td != NULL); - LLVM_TYPE_CONST llvm::StructType *structType = - llvm::dyn_cast(type); + llvm::StructType *structType = + llvm::dyn_cast(type); Assert(structType != NULL); const llvm::StructLayout *sl = td->getStructLayout(structType); Assert(sl != NULL); diff --git a/ispc.h b/ispc.h index 480c4bb1..d93788ec 100644 --- a/ispc.h +++ b/ispc.h @@ -92,12 +92,6 @@ namespace llvm { class Value; } -// llvm::Type *s are no longer const in llvm 3.0 -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) -#define LLVM_TYPE_CONST -#else -#define LLVM_TYPE_CONST const -#endif class ArrayType; class AST; @@ -191,13 +185,13 @@ struct Target { const char *GetISAString() const; /** Returns the size of the given type */ - llvm::Value *SizeOf(LLVM_TYPE_CONST llvm::Type *type, + llvm::Value *SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd); /** Given a structure type and an element number in the structure, returns a value corresponding to the number of bytes from the start of the structure where the element is located. */ - llvm::Value *StructOffset(LLVM_TYPE_CONST llvm::Type *type, + llvm::Value *StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd); /** llvm Target object representing this target. */ diff --git a/llvmutil.cpp b/llvmutil.cpp index 6012d4eb..5febaadf 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -43,44 +43,44 @@ #include #include -LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL; -LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::PointerIntType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL; +llvm::Type *LLVMTypes::VoidType = NULL; +llvm::PointerType *LLVMTypes::VoidPointerType = NULL; +llvm::Type *LLVMTypes::PointerIntType = NULL; +llvm::Type *LLVMTypes::BoolType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16Type = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32Type = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64Type = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleType = NULL; +llvm::Type *LLVMTypes::Int8Type = NULL; +llvm::Type *LLVMTypes::Int16Type = NULL; +llvm::Type *LLVMTypes::Int32Type = NULL; +llvm::Type *LLVMTypes::Int64Type = NULL; +llvm::Type *LLVMTypes::FloatType = NULL; +llvm::Type *LLVMTypes::DoubleType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8PointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16PointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32PointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64PointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatPointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoublePointerType = NULL; +llvm::Type *LLVMTypes::Int8PointerType = NULL; +llvm::Type *LLVMTypes::Int16PointerType = NULL; +llvm::Type *LLVMTypes::Int32PointerType = NULL; +llvm::Type *LLVMTypes::Int64PointerType = NULL; +llvm::Type *LLVMTypes::FloatPointerType = NULL; +llvm::Type *LLVMTypes::DoublePointerType = NULL; -LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::MaskType = NULL; -LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::BoolVectorType = NULL; +llvm::VectorType *LLVMTypes::MaskType = NULL; +llvm::VectorType *LLVMTypes::BoolVectorType = NULL; -LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int1VectorType = NULL; -LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int8VectorType = NULL; -LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int16VectorType = NULL; -LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int32VectorType = NULL; -LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int64VectorType = NULL; -LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::FloatVectorType = NULL; -LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::DoubleVectorType = NULL; +llvm::VectorType *LLVMTypes::Int1VectorType = NULL; +llvm::VectorType *LLVMTypes::Int8VectorType = NULL; +llvm::VectorType *LLVMTypes::Int16VectorType = NULL; +llvm::VectorType *LLVMTypes::Int32VectorType = NULL; +llvm::VectorType *LLVMTypes::Int64VectorType = NULL; +llvm::VectorType *LLVMTypes::FloatVectorType = NULL; +llvm::VectorType *LLVMTypes::DoubleVectorType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8VectorPointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16VectorPointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32VectorPointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL; +llvm::Type *LLVMTypes::Int8VectorPointerType = NULL; +llvm::Type *LLVMTypes::Int16VectorPointerType = NULL; +llvm::Type *LLVMTypes::Int32VectorPointerType = NULL; +llvm::Type *LLVMTypes::Int64VectorPointerType = NULL; +llvm::Type *LLVMTypes::FloatVectorPointerType = NULL; +llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL; -LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::VoidPointerVectorType = NULL; +llvm::VectorType *LLVMTypes::VoidPointerVectorType = NULL; llvm::Constant *LLVMTrue = NULL; llvm::Constant *LLVMFalse = NULL; @@ -473,9 +473,9 @@ LLVMBoolVector(const bool *bvec) { llvm::Constant * -LLVMIntAsType(int64_t val, LLVM_TYPE_CONST llvm::Type *type) { - LLVM_TYPE_CONST llvm::VectorType *vecType = - llvm::dyn_cast(type); +LLVMIntAsType(int64_t val, llvm::Type *type) { + llvm::VectorType *vecType = + llvm::dyn_cast(type); if (vecType != NULL) { llvm::Constant *v = llvm::ConstantInt::get(vecType->getElementType(), @@ -491,9 +491,9 @@ LLVMIntAsType(int64_t val, LLVM_TYPE_CONST llvm::Type *type) { llvm::Constant * -LLVMUIntAsType(uint64_t val, LLVM_TYPE_CONST llvm::Type *type) { - LLVM_TYPE_CONST llvm::VectorType *vecType = - llvm::dyn_cast(type); +LLVMUIntAsType(uint64_t val, llvm::Type *type) { + llvm::VectorType *vecType = + llvm::dyn_cast(type); if (vecType != NULL) { llvm::Constant *v = llvm::ConstantInt::get(vecType->getElementType(), @@ -642,8 +642,8 @@ LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth, bool LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) { // Make sure we do in fact have a vector of integer values here - LLVM_TYPE_CONST llvm::VectorType *vt = - llvm::dyn_cast(v->getType()); + llvm::VectorType *vt = + llvm::dyn_cast(v->getType()); Assert(vt != NULL); Assert(llvm::isa(vt->getElementType())); @@ -696,7 +696,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength, static bool lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength, std::vector &seenPhis) { - if (llvm::isa(val->getType()) == false) { + if (llvm::isa(val->getType()) == false) { // If we've worked down to a constant int, then the moment of truth // has arrived... llvm::ConstantInt *ci = llvm::dyn_cast(val); @@ -780,7 +780,7 @@ static bool lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength, std::vector &seenPhis, bool &canAdd) { - Assert(llvm::isa(val->getType())); + Assert(llvm::isa(val->getType())); // Make sure the base value is a positive power of 2 Assert(baseValue > 0 && (baseValue & (baseValue-1)) == 0); @@ -790,7 +790,7 @@ lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength, int64_t vecVals[ISPC_MAX_NVEC]; int nElts; - if (llvm::isa(val->getType()) && + if (llvm::isa(val->getType()) && LLVMExtractVectorInts(val, vecVals, &nElts)) { // If we have a vector of compile-time constant integer values, // then go ahead and check them directly.. @@ -1074,8 +1074,8 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength, */ bool LLVMVectorValuesAllEqual(llvm::Value *v) { - LLVM_TYPE_CONST llvm::VectorType *vt = - llvm::dyn_cast(v->getType()); + llvm::VectorType *vt = + llvm::dyn_cast(v->getType()); Assert(vt != NULL); int vectorLength = vt->getNumElements(); @@ -1344,8 +1344,8 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride, */ bool LLVMVectorIsLinear(llvm::Value *v, int stride) { - LLVM_TYPE_CONST llvm::VectorType *vt = - llvm::dyn_cast(v->getType()); + llvm::VectorType *vt = + llvm::dyn_cast(v->getType()); Assert(vt != NULL); int vectorLength = vt->getNumElements(); @@ -1399,8 +1399,8 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore, return llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt", insertBefore); - LLVM_TYPE_CONST llvm::VectorType *vt = - llvm::dyn_cast(v->getType()); + llvm::VectorType *vt = + llvm::dyn_cast(v->getType()); Assert(vt != NULL); std::string newName = v->getName().str() + std::string(".elt0"); @@ -1489,8 +1489,8 @@ LLVMConcatVectors(llvm::Value *v1, llvm::Value *v2, llvm::Instruction *insertBefore) { Assert(v1->getType() == v2->getType()); - LLVM_TYPE_CONST llvm::VectorType *vt = - llvm::dyn_cast(v1->getType()); + llvm::VectorType *vt = + llvm::dyn_cast(v1->getType()); Assert(vt != NULL); int32_t identity[ISPC_MAX_NVEC]; diff --git a/llvmutil.h b/llvmutil.h index 0025bf8e..de50ae70 100644 --- a/llvmutil.h +++ b/llvmutil.h @@ -48,53 +48,50 @@ namespace llvm { class InsertElementInst; } -// llvm::Type *s are no longer const in llvm 3.0 -#define LLVM_TYPE_CONST - /** This structure holds pointers to a variety of LLVM types; code elsewhere can use them from here, ratherthan needing to make more verbose LLVM API calls. */ struct LLVMTypes { - static LLVM_TYPE_CONST llvm::Type *VoidType; - static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType; - static LLVM_TYPE_CONST llvm::Type *PointerIntType; - static LLVM_TYPE_CONST llvm::Type *BoolType; + static llvm::Type *VoidType; + static llvm::PointerType *VoidPointerType; + static llvm::Type *PointerIntType; + static llvm::Type *BoolType; - static LLVM_TYPE_CONST llvm::Type *Int8Type; - static LLVM_TYPE_CONST llvm::Type *Int16Type; - static LLVM_TYPE_CONST llvm::Type *Int32Type; - static LLVM_TYPE_CONST llvm::Type *Int64Type; - static LLVM_TYPE_CONST llvm::Type *FloatType; - static LLVM_TYPE_CONST llvm::Type *DoubleType; + static llvm::Type *Int8Type; + static llvm::Type *Int16Type; + static llvm::Type *Int32Type; + static llvm::Type *Int64Type; + static llvm::Type *FloatType; + static llvm::Type *DoubleType; - static LLVM_TYPE_CONST llvm::Type *Int8PointerType; - static LLVM_TYPE_CONST llvm::Type *Int16PointerType; - static LLVM_TYPE_CONST llvm::Type *Int32PointerType; - static LLVM_TYPE_CONST llvm::Type *Int64PointerType; - static LLVM_TYPE_CONST llvm::Type *FloatPointerType; - static LLVM_TYPE_CONST llvm::Type *DoublePointerType; + static llvm::Type *Int8PointerType; + static llvm::Type *Int16PointerType; + static llvm::Type *Int32PointerType; + static llvm::Type *Int64PointerType; + static llvm::Type *FloatPointerType; + static llvm::Type *DoublePointerType; - static LLVM_TYPE_CONST llvm::VectorType *MaskType; + static llvm::VectorType *MaskType; - static LLVM_TYPE_CONST llvm::VectorType *BoolVectorType; - static LLVM_TYPE_CONST llvm::VectorType *Int1VectorType; - static LLVM_TYPE_CONST llvm::VectorType *Int8VectorType; - static LLVM_TYPE_CONST llvm::VectorType *Int16VectorType; - static LLVM_TYPE_CONST llvm::VectorType *Int32VectorType; - static LLVM_TYPE_CONST llvm::VectorType *Int64VectorType; - static LLVM_TYPE_CONST llvm::VectorType *FloatVectorType; - static LLVM_TYPE_CONST llvm::VectorType *DoubleVectorType; + static llvm::VectorType *BoolVectorType; + static llvm::VectorType *Int1VectorType; + static llvm::VectorType *Int8VectorType; + static llvm::VectorType *Int16VectorType; + static llvm::VectorType *Int32VectorType; + static llvm::VectorType *Int64VectorType; + static llvm::VectorType *FloatVectorType; + static llvm::VectorType *DoubleVectorType; - static LLVM_TYPE_CONST llvm::Type *Int8VectorPointerType; - static LLVM_TYPE_CONST llvm::Type *Int16VectorPointerType; - static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType; - static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType; - static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType; - static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType; + static llvm::Type *Int8VectorPointerType; + static llvm::Type *Int16VectorPointerType; + static llvm::Type *Int32VectorPointerType; + static llvm::Type *Int64VectorPointerType; + static llvm::Type *FloatVectorPointerType; + static llvm::Type *DoubleVectorPointerType; - static LLVM_TYPE_CONST llvm::VectorType *VoidPointerVectorType; + static llvm::VectorType *VoidPointerVectorType; }; /** These variables hold the corresponding LLVM constant values as a @@ -171,11 +168,11 @@ extern llvm::Constant *LLVMDoubleVector(double f); /** Returns a constant integer or vector (according to the given type) of the given signed integer value. */ -extern llvm::Constant *LLVMIntAsType(int64_t, LLVM_TYPE_CONST llvm::Type *t); +extern llvm::Constant *LLVMIntAsType(int64_t, llvm::Type *t); /** Returns a constant integer or vector (according to the given type) of the given unsigned integer value. */ -extern llvm::Constant *LLVMUIntAsType(uint64_t, LLVM_TYPE_CONST llvm::Type *t); +extern llvm::Constant *LLVMUIntAsType(uint64_t, llvm::Type *t); /** Returns an LLVM boolean vector based on the given array of values. The array should have g->target.vectorWidth elements. */ diff --git a/module.cpp b/module.cpp index 7ccc6db6..48bbf81b 100644 --- a/module.cpp +++ b/module.cpp @@ -278,7 +278,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE return; } - LLVM_TYPE_CONST llvm::Type *llvmType = type->LLVMType(g->ctx); + llvm::Type *llvmType = type->LLVMType(g->ctx); if (llvmType == NULL) return; @@ -573,7 +573,7 @@ Module::AddFunctionDeclaration(const std::string &name, // Get the LLVM FunctionType bool includeMask = (storageClass != SC_EXTERN_C); - LLVM_TYPE_CONST llvm::FunctionType *llvmFunctionType = + llvm::FunctionType *llvmFunctionType = functionType->LLVMFunctionType(g->ctx, includeMask); if (llvmFunctionType == NULL) return; @@ -1405,7 +1405,7 @@ lAddExtractedGlobals(llvm::Module *module, for (unsigned int i = 0; i < globals[firstActive].size(); ++i) { RewriteGlobalInfo &rgi = globals[firstActive][i]; llvm::GlobalVariable *gv = rgi.gv; - LLVM_TYPE_CONST llvm::Type *type = gv->getType()->getElementType(); + llvm::Type *type = gv->getType()->getElementType(); llvm::Constant *initializer = rgi.init; // Create a new global in the given model that matches the original @@ -1469,7 +1469,7 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc, // we'll start by generating an 'extern' declaration of each one that // we have in the current module so that we can then call out to that. llvm::Function *targetFuncs[Target::NUM_ISAS]; - LLVM_TYPE_CONST llvm::FunctionType *ftype = NULL; + llvm::FunctionType *ftype = NULL; for (int i = 0; i < Target::NUM_ISAS; ++i) { if (funcs.func[i] == NULL) { diff --git a/opt.cpp b/opt.cpp index a16d4bad..6630331a 100644 --- a/opt.cpp +++ b/opt.cpp @@ -984,7 +984,7 @@ static llvm::Value * lCheckForActualPointer(llvm::Value *v) { if (v == NULL) return NULL; - else if (llvm::isa(v->getType())) + else if (llvm::isa(v->getType())) return v; else if (llvm::isa(v)) return v; @@ -1908,8 +1908,8 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { } else if (maskAsInt == allOnMask) { // The mask is all on, so turn this into a regular store - LLVM_TYPE_CONST llvm::Type *rvalueType = rvalue->getType(); - LLVM_TYPE_CONST llvm::Type *ptrType = + llvm::Type *rvalueType = rvalue->getType(); + llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0); lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst); @@ -2011,7 +2011,7 @@ MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { } else if (maskAsInt == allOnMask) { // The mask is all on, so turn this into a regular load - LLVM_TYPE_CONST llvm::Type *ptrType = + llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0); ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load", callInst); @@ -2069,17 +2069,17 @@ lIsSafeToBlend(llvm::Value *lvalue) { else { llvm::AllocaInst *ai = llvm::dyn_cast(lvalue); if (ai) { - LLVM_TYPE_CONST llvm::Type *type = ai->getType(); - LLVM_TYPE_CONST llvm::PointerType *pt = - llvm::dyn_cast(type); + llvm::Type *type = ai->getType(); + llvm::PointerType *pt = + llvm::dyn_cast(type); assert(pt != NULL); type = pt->getElementType(); - LLVM_TYPE_CONST llvm::ArrayType *at; - while ((at = llvm::dyn_cast(type))) { + llvm::ArrayType *at; + while ((at = llvm::dyn_cast(type))) { type = at->getElementType(); } - LLVM_TYPE_CONST llvm::VectorType *vt = - llvm::dyn_cast(type); + llvm::VectorType *vt = + llvm::dyn_cast(type); return (vt != NULL && (int)vt->getNumElements() == g->target.vectorWidth); } @@ -2232,7 +2232,7 @@ lComputeCommonPointer(llvm::Value *base, llvm::Value *offsets, struct ScatterImpInfo { ScatterImpInfo(const char *pName, const char *msName, - LLVM_TYPE_CONST llvm::Type *vpt, int a) + llvm::Type *vpt, int a) : align(a) { pseudoFunc = m->module->getFunction(pName); maskedStoreFunc = m->module->getFunction(msName); @@ -2241,7 +2241,7 @@ struct ScatterImpInfo { } llvm::Function *pseudoFunc; llvm::Function *maskedStoreFunc; - LLVM_TYPE_CONST llvm::Type *vecPtrType; + llvm::Type *vecPtrType; const int align; }; @@ -2742,7 +2742,7 @@ lCoalescePerfInfo(const std::vector &coalesceGroup, */ llvm::Value * lGEPAndLoad(llvm::Value *basePtr, int64_t offset, int align, - llvm::Instruction *insertBefore, LLVM_TYPE_CONST llvm::Type *type) { + llvm::Instruction *insertBefore, llvm::Type *type) { llvm::Value *ptr = lGEPInst(basePtr, LLVMInt64(offset), "new_base", insertBefore); ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(type, 0), @@ -2796,7 +2796,7 @@ lEmitLoads(llvm::Value *basePtr, std::vector &loadOps, } case 4: { // 4-wide vector load - LLVM_TYPE_CONST llvm::VectorType *vt = + llvm::VectorType *vt = llvm::VectorType::get(LLVMTypes::Int32Type, 4); loadOps[i].load = lGEPAndLoad(basePtr, start, align, insertBefore, vt); @@ -2804,7 +2804,7 @@ lEmitLoads(llvm::Value *basePtr, std::vector &loadOps, } case 8: { // 8-wide vector load - LLVM_TYPE_CONST llvm::VectorType *vt = + llvm::VectorType *vt = llvm::VectorType::get(LLVMTypes::Int32Type, 8); loadOps[i].load = lGEPAndLoad(basePtr, start, align, insertBefore, vt); @@ -2896,7 +2896,7 @@ lApplyLoad2(llvm::Value *result, const CoalescedLoadOp &load, Assert(set[elt] == false && set[elt+1] == false); // In this case, we bitcast from a 4xi32 to a 2xi64 vector - LLVM_TYPE_CONST llvm::Type *vec2x64Type = + llvm::Type *vec2x64Type = llvm::VectorType::get(LLVMTypes::Int64Type, 2); result = new llvm::BitCastInst(result, vec2x64Type, "to2x64", insertBefore); @@ -2908,7 +2908,7 @@ lApplyLoad2(llvm::Value *result, const CoalescedLoadOp &load, "insert64", insertBefore); // And back to 4xi32. - LLVM_TYPE_CONST llvm::Type *vec4x32Type = + llvm::Type *vec4x32Type = llvm::VectorType::get(LLVMTypes::Int32Type, 4); result = new llvm::BitCastInst(result, vec4x32Type, "to4x32", insertBefore); @@ -2988,7 +2988,7 @@ lApplyLoad4(llvm::Value *result, const CoalescedLoadOp &load, static llvm::Value * lAssemble4Vector(const std::vector &loadOps, const int64_t offsets[4], llvm::Instruction *insertBefore) { - LLVM_TYPE_CONST llvm::Type *returnType = + llvm::Type *returnType = llvm::VectorType::get(LLVMTypes::Int32Type, 4); llvm::Value *result = llvm::UndefValue::get(returnType); @@ -3128,7 +3128,7 @@ lApplyLoad12s(llvm::Value *result, const std::vector &loadOps, static llvm::Value * lAssemble4Vector(const std::vector &loadOps, const int64_t offsets[4], llvm::Instruction *insertBefore) { - LLVM_TYPE_CONST llvm::Type *returnType = + llvm::Type *returnType = llvm::VectorType::get(LLVMTypes::Int32Type, 4); llvm::Value *result = llvm::UndefValue::get(returnType); diff --git a/stmt.cpp b/stmt.cpp index 53a451ae..4f8c0f12 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -189,7 +189,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { } } - LLVM_TYPE_CONST llvm::Type *llvmType = sym->type->LLVMType(g->ctx); + llvm::Type *llvmType = sym->type->LLVMType(g->ctx); if (llvmType == NULL) { Assert(m->errorCount > 0); return; @@ -2497,7 +2497,7 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) { else { argTypes.push_back(t); - LLVM_TYPE_CONST llvm::Type *llvmExprType = type->LLVMType(g->ctx); + llvm::Type *llvmExprType = type->LLVMType(g->ctx); llvm::Value *ptr = ctx->AllocaInst(llvmExprType, "print_arg"); llvm::Value *val = expr->GetValue(ctx); if (!val) @@ -2537,7 +2537,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const { std::string argTypes; if (values == NULL) { - LLVM_TYPE_CONST llvm::Type *ptrPtrType = + llvm::Type *ptrPtrType = llvm::PointerType::get(LLVMTypes::VoidPointerType, 0); args[4] = llvm::Constant::getNullValue(ptrPtrType); } @@ -2549,7 +2549,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const { int nArgs = elist ? elist->exprs.size() : 1; // Allocate space for the array of pointers to values to be printed - LLVM_TYPE_CONST llvm::Type *argPtrArrayType = + llvm::Type *argPtrArrayType = llvm::ArrayType::get(LLVMTypes::VoidPointerType, nArgs); llvm::Value *argPtrArray = ctx->AllocaInst(argPtrArrayType, "print_arg_ptrs"); diff --git a/type.cpp b/type.cpp index 1a433c1f..0e4ecfe1 100644 --- a/type.cpp +++ b/type.cpp @@ -414,7 +414,7 @@ AtomicType::GetCDeclaration(const std::string &name) const { } -LLVM_TYPE_CONST llvm::Type * +llvm::Type * AtomicType::LLVMType(llvm::LLVMContext *ctx) const { Assert(variability.type != Variability::Unbound); bool isUniform = (variability == Variability::Uniform); @@ -725,7 +725,7 @@ EnumType::GetCDeclaration(const std::string &varName) const { } -LLVM_TYPE_CONST llvm::Type * +llvm::Type * EnumType::LLVMType(llvm::LLVMContext *ctx) const { Assert(variability != Variability::Unbound); @@ -1083,7 +1083,7 @@ PointerType::GetCDeclaration(const std::string &name) const { } -LLVM_TYPE_CONST llvm::Type * +llvm::Type * PointerType::LLVMType(llvm::LLVMContext *ctx) const { if (baseType == NULL) { Assert(m->errorCount > 0); @@ -1098,7 +1098,7 @@ PointerType::LLVMType(llvm::LLVMContext *ctx) const { switch (variability.type) { case Variability::Uniform: { - LLVM_TYPE_CONST llvm::Type *ptype = NULL; + llvm::Type *ptype = NULL; const FunctionType *ftype = dynamic_cast(baseType); if (ftype != NULL) // Get the type of the function variant that takes the mask as the @@ -1178,14 +1178,14 @@ ArrayType::ArrayType(const Type *c, int a) } -LLVM_TYPE_CONST llvm::ArrayType * +llvm::ArrayType * ArrayType::LLVMType(llvm::LLVMContext *ctx) const { if (child == NULL) { Assert(m->errorCount > 0); return NULL; } - LLVM_TYPE_CONST llvm::Type *ct = child->LLVMType(ctx); + llvm::Type *ct = child->LLVMType(ctx); if (ct == NULL) { Assert(m->errorCount > 0); return NULL; @@ -1630,14 +1630,14 @@ VectorType::GetElementType() const { } -LLVM_TYPE_CONST llvm::Type * +llvm::Type * VectorType::LLVMType(llvm::LLVMContext *ctx) const { if (base == NULL) { Assert(m->errorCount > 0); return NULL; } - LLVM_TYPE_CONST llvm::Type *bt = base->LLVMType(ctx); + llvm::Type *bt = base->LLVMType(ctx); if (!bt) return NULL; @@ -1912,9 +1912,9 @@ StructType::GetCDeclaration(const std::string &n) const { } -LLVM_TYPE_CONST llvm::Type * +llvm::Type * StructType::LLVMType(llvm::LLVMContext *ctx) const { - std::vector llvmTypes; + std::vector llvmTypes; for (int i = 0; i < GetElementCount(); ++i) { const Type *type = GetElementType(i); if (type == NULL) @@ -2257,14 +2257,14 @@ ReferenceType::GetCDeclaration(const std::string &name) const { } -LLVM_TYPE_CONST llvm::Type * +llvm::Type * ReferenceType::LLVMType(llvm::LLVMContext *ctx) const { if (targetType == NULL) { Assert(m->errorCount > 0); return NULL; } - LLVM_TYPE_CONST llvm::Type *t = targetType->LLVMType(ctx); + llvm::Type *t = targetType->LLVMType(ctx); if (t == NULL) { Assert(m->errorCount > 0); return NULL; @@ -2489,7 +2489,7 @@ FunctionType::GetCDeclaration(const std::string &fname) const { } -LLVM_TYPE_CONST llvm::Type * +llvm::Type * FunctionType::LLVMType(llvm::LLVMContext *ctx) const { FATAL("FunctionType::LLVMType() shouldn't be called"); return NULL; @@ -2540,13 +2540,13 @@ FunctionType::GetReturnTypeString() const { } -LLVM_TYPE_CONST llvm::FunctionType * +llvm::FunctionType * FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask) const { if (isTask == true) Assert(includeMask == true); // Get the LLVM Type *s for the function arguments - std::vector llvmArgTypes; + std::vector llvmArgTypes; for (unsigned int i = 0; i < paramTypes.size(); ++i) { if (paramTypes[i] == NULL) { Assert(m->errorCount > 0); @@ -2554,7 +2554,7 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask) const { } Assert(Type::Equal(paramTypes[i], AtomicType::Void) == false); - LLVM_TYPE_CONST llvm::Type *t = paramTypes[i]->LLVMType(ctx); + llvm::Type *t = paramTypes[i]->LLVMType(ctx); if (t == NULL) { Assert(m->errorCount > 0); return NULL; @@ -2566,7 +2566,7 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask) const { if (includeMask) llvmArgTypes.push_back(LLVMTypes::MaskType); - std::vector callTypes; + std::vector callTypes; if (isTask) { // Tasks take three arguments: a pointer to a struct that holds the // actual task arguments, the thread index, and the total number of diff --git a/type.h b/type.h index fdcc41e6..f81ea062 100644 --- a/type.h +++ b/type.h @@ -187,7 +187,7 @@ public: virtual std::string GetCDeclaration(const std::string &name) const = 0; /** Returns the LLVM type corresponding to this ispc type */ - virtual LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const = 0; + virtual llvm::Type *LLVMType(llvm::LLVMContext *ctx) const = 0; /** Returns the DIType (LLVM's debugging information structure), corresponding to this type. */ @@ -269,7 +269,7 @@ public: std::string Mangle() const; std::string GetCDeclaration(const std::string &name) const; - LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; + llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; llvm::DIType GetDIType(llvm::DIDescriptor scope) const; /** This enumerator records the basic types that AtomicTypes can be @@ -343,7 +343,7 @@ public: std::string Mangle() const; std::string GetCDeclaration(const std::string &name) const; - LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; + llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; llvm::DIType GetDIType(llvm::DIDescriptor scope) const; /** Provides the enumerators defined in the enum definition. */ @@ -425,7 +425,7 @@ public: std::string Mangle() const; std::string GetCDeclaration(const std::string &name) const; - LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; + llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; llvm::DIType GetDIType(llvm::DIDescriptor scope) const; static PointerType *Void; @@ -523,7 +523,7 @@ public: std::string GetCDeclaration(const std::string &name) const; llvm::DIType GetDIType(llvm::DIDescriptor scope) const; - LLVM_TYPE_CONST llvm::ArrayType *LLVMType(llvm::LLVMContext *ctx) const; + llvm::ArrayType *LLVMType(llvm::LLVMContext *ctx) const; /** This method returns the total number of elements in the array, including all dimensions if this is a multidimensional array. */ @@ -589,7 +589,7 @@ public: std::string Mangle() const; std::string GetCDeclaration(const std::string &name) const; - LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; + llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; llvm::DIType GetDIType(llvm::DIDescriptor scope) const; int GetElementCount() const; @@ -639,7 +639,7 @@ public: std::string Mangle() const; std::string GetCDeclaration(const std::string &name) const; - LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; + llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; llvm::DIType GetDIType(llvm::DIDescriptor scope) const; /** Returns the type of the structure element with the given name (if any). @@ -719,7 +719,7 @@ public: std::string Mangle() const; std::string GetCDeclaration(const std::string &name) const; - LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; + llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; llvm::DIType GetDIType(llvm::DIDescriptor scope) const; private: @@ -771,7 +771,7 @@ public: std::string Mangle() const; std::string GetCDeclaration(const std::string &fname) const; - LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; + llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; llvm::DIType GetDIType(llvm::DIDescriptor scope) const; const Type *GetReturnType() const { return returnType; } @@ -782,7 +782,7 @@ public: function type. The \c includeMask parameter indicates whether the llvm::FunctionType should have a mask as the last argument in its function signature. */ - LLVM_TYPE_CONST llvm::FunctionType *LLVMFunctionType(llvm::LLVMContext *ctx, + llvm::FunctionType *LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask = false) const; int GetNumParameters() const { return (int)paramTypes.size(); } From 99a27fe2413196b1873d3af45f418381dd7bb1ec Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 16 Apr 2012 06:27:21 -0700 Subject: [PATCH 072/173] Add support for forward declarations of structures. Now a declaration like 'struct Foo;' can be used to establish the name of a struct type, without providing a definition. One can pass pointers to such types around the system, but can't do much else with them (as in C/C++). Issue #125. --- ctx.cpp | 24 +- expr.cpp | 13 +- module.cpp | 12 +- parse.yy | 8 +- sym.cpp | 95 +++--- sym.h | 11 +- tests/struct-forward-decl-2.ispc | 36 +++ tests/struct-forward-decl.ispc | 33 ++ tests_errors/deref-3.ispc | 2 +- tests_errors/struct-ref-undecl-1.ispc | 5 + tests_errors/struct-ref-undecl-2.ispc | 5 + type.cpp | 417 ++++++++++++++++++++------ type.h | 49 ++- 13 files changed, 537 insertions(+), 173 deletions(-) create mode 100644 tests/struct-forward-decl-2.ispc create mode 100644 tests/struct-forward-decl.ispc create mode 100644 tests_errors/struct-ref-undecl-1.ispc create mode 100644 tests_errors/struct-ref-undecl-2.ispc diff --git a/ctx.cpp b/ctx.cpp index 4e7b3479..c76ec1b8 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -2609,14 +2609,22 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, const PointerType *pt = dynamic_cast(valueType); if (pt != NULL) { if (pt->IsSlice()) { - // For masked stores of (varying) slice pointers to memory, we - // grab the equivalent StructType and make a recursive call to - // maskedStore, giving it that type for the pointer type; that - // in turn will lead to the base pointer and offset index being - // mask stored to memory.. - const StructType *sliceStructType = pt->GetSliceStructType(); - ptrType = PointerType::GetUniform(sliceStructType); - maskedStore(value, ptr, ptrType, mask); + // Masked store of (varying) slice pointer. + Assert(pt->IsVaryingType()); + + // First, extract the pointer from the slice struct and masked + // store that. + llvm::Value *v0 = ExtractInst(value, 0); + llvm::Value *p0 = AddElementOffset(ptr, 0, ptrType); + maskedStore(v0, p0, PointerType::GetUniform(pt->GetAsNonSlice()), + mask); + + // And then do same for the integer offset + llvm::Value *v1 = ExtractInst(value, 1); + llvm::Value *p1 = AddElementOffset(ptr, 1, ptrType); + const Type *offsetType = AtomicType::VaryingInt32; + maskedStore(v1, p1, PointerType::GetUniform(offsetType), mask); + return; } diff --git a/expr.cpp b/expr.cpp index fdaf878c..b43f9e54 100644 --- a/expr.cpp +++ b/expr.cpp @@ -4692,12 +4692,13 @@ MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos, exprType = pointerType->GetBaseType(); if (derefLValue == true && pointerType == NULL) { - if (dynamic_cast(exprType->GetReferenceTarget()) != NULL) - Error(p, "Dereference operator \"->\" can't be applied to non-pointer " + const Type *targetType = exprType->GetReferenceTarget(); + if (dynamic_cast(targetType) != NULL) + Error(p, "Member operator \"->\" can't be applied to non-pointer " "type \"%s\". Did you mean to use \".\"?", exprType->GetString().c_str()); else - Error(p, "Dereference operator \"->\" can't be applied to non-struct " + Error(p, "Member operator \"->\" can't be applied to non-struct " "pointer type \"%s\".", exprType->GetString().c_str()); return NULL; } @@ -4713,6 +4714,12 @@ MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos, return new StructMemberExpr(e, id, p, idpos, derefLValue); else if (dynamic_cast(exprType) != NULL) return new VectorMemberExpr(e, id, p, idpos, derefLValue); + else if (dynamic_cast(exprType)) { + Error(p, "Member operator \"%s\" can't be applied to declared " + "but not defined struct type \"%s\".", derefLValue ? "->" : ".", + exprType->GetString().c_str()); + return NULL; + } else { Error(p, "Member operator \"%s\" can't be used with expression of " "\"%s\" type.", derefLValue ? "->" : ".", diff --git a/module.cpp b/module.cpp index 48bbf81b..d082255f 100644 --- a/module.cpp +++ b/module.cpp @@ -1477,10 +1477,14 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc, continue; } - // Grab the type of the function as well. - if (ftype != NULL) - Assert(ftype == funcs.func[i]->getFunctionType()); - else + // Grab the type of the function as well. Note that the various + // functions will have different types if they have arguments that + // are pointers to structs, due to the fact that we mangle LLVM + // struct type names with the target vector width. However, + // because we only allow uniform stuff to pass through the + // export'ed function layer, they should all have the same memory + // layout, so this is benign.. + if (ftype == NULL) ftype = funcs.func[i]->getFunctionType(); targetFuncs[i] = diff --git a/parse.yy b/parse.yy index 8a1e02ee..30144a67 100644 --- a/parse.yy +++ b/parse.yy @@ -874,7 +874,6 @@ struct_or_union_specifier std::vector elementPositions; GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames, &elementPositions); - // FIXME: should be unbound $$ = new StructType("", elementTypes, elementNames, elementPositions, false, Variability::Unbound, @1); } @@ -892,10 +891,9 @@ struct_or_union_specifier | struct_or_union struct_or_union_name { const Type *st = m->symbolTable->LookupType($2); - if (!st) { - std::vector alternates = m->symbolTable->ClosestTypeMatch($2); - std::string alts = lGetAlternates(alternates); - Error(@2, "Struct type \"%s\" unknown.%s", $2, alts.c_str()); + if (st == NULL) { + st = new UndefinedStructType($2, Variability::Unbound, false, @2); + m->symbolTable->AddType($2, st, @2); } else if (dynamic_cast(st) == NULL) Error(@2, "Type \"%s\" is not a struct type! (%s)", $2, diff --git a/sym.cpp b/sym.cpp index 1a503c91..8c7e04a6 100644 --- a/sym.cpp +++ b/sym.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -66,7 +66,7 @@ SymbolTable::SymbolTable() { SymbolTable::~SymbolTable() { // Otherwise we have mismatched push/pop scopes - Assert(variables.size() == 1 && types.size() == 1); + Assert(variables.size() == 1); PopScope(); } @@ -74,7 +74,6 @@ SymbolTable::~SymbolTable() { void SymbolTable::PushScope() { variables.push_back(new SymbolMapType); - types.push_back(new TypeMapType); } @@ -83,10 +82,6 @@ SymbolTable::PopScope() { Assert(variables.size() > 1); delete variables.back(); variables.pop_back(); - - Assert(types.size() > 1); - delete types.back(); - types.pop_back(); } @@ -186,26 +181,17 @@ SymbolTable::LookupFunction(const char *name, const FunctionType *type) { bool SymbolTable::AddType(const char *name, const Type *type, SourcePos pos) { - // Like AddVariable(), we go backwards through the type maps, working - // from innermost scope to outermost. - for (int i = types.size()-1; i >= 0; --i) { - TypeMapType &sm = *(types[i]); - if (sm.find(name) != sm.end()) { - if (i == (int)types.size() - 1) { - Error(pos, "Ignoring redefinition of type \"%s\".", name); - return false; - } - else { - Warning(pos, "Type \"%s\" shadows type declared in outer scope.", name); - TypeMapType &sm = *(types.back()); - sm[name] = type; - return true; - } - } + const Type *t = LookupType(name); + if (t != NULL && dynamic_cast(t) == NULL) { + // If we have a previous declaration of anything other than an + // UndefinedStructType with this struct name, issue an error. If + // we have an UndefinedStructType, then we'll fall through to the + // code below that adds the definition to the type map. + Error(pos, "Ignoring redefinition of type \"%s\".", name); + return false; } - TypeMapType &sm = *(types.back()); - sm[name] = type; + types[name] = type; return true; } @@ -213,11 +199,9 @@ SymbolTable::AddType(const char *name, const Type *type, SourcePos pos) { const Type * SymbolTable::LookupType(const char *name) const { // Again, search through the type maps backward to get scoping right. - for (int i = types.size()-1; i >= 0; --i) { - TypeMapType &sm = *(types[i]); - if (sm.find(name) != sm.end()) - return sm[name]; - } + TypeMapType::const_iterator iter = types.find(name); + if (iter != types.end()) + return iter->second; return NULL; } @@ -282,21 +266,19 @@ SymbolTable::closestTypeMatch(const char *str, bool structsVsEnums) const { const int maxDelta = 2; std::vector matches[maxDelta+1]; - for (unsigned int i = 0; i < types.size(); ++i) { - TypeMapType::const_iterator iter; - for (iter = types[i]->begin(); iter != types[i]->end(); ++iter) { - // Skip over either StructTypes or EnumTypes, depending on the - // value of the structsVsEnums parameter - bool isEnum = (dynamic_cast(iter->second) != NULL); - if (isEnum && structsVsEnums) - continue; - else if (!isEnum && !structsVsEnums) - continue; + TypeMapType::const_iterator iter; + for (iter = types.begin(); iter != types.end(); ++iter) { + // Skip over either StructTypes or EnumTypes, depending on the + // value of the structsVsEnums parameter + bool isEnum = (dynamic_cast(iter->second) != NULL); + if (isEnum && structsVsEnums) + continue; + else if (!isEnum && !structsVsEnums) + continue; - int dist = StringEditDistance(str, iter->first, maxDelta+1); - if (dist <= maxDelta) - matches[dist].push_back(iter->first); - } + int dist = StringEditDistance(str, iter->first, maxDelta+1); + if (dist <= maxDelta) + matches[dist].push_back(iter->first); } for (int i = 0; i <= maxDelta; ++i) { @@ -336,16 +318,12 @@ SymbolTable::Print() { depth = 0; fprintf(stderr, "Named types:\n---------------\n"); - for (unsigned int i = 0; i < types.size(); ++i) { - TypeMapType &sm = *types[i]; - TypeMapType::iterator siter = sm.begin(); - while (siter != sm.end()) { - fprintf(stderr, "%*c", depth, ' '); - fprintf(stderr, "%s -> %s\n", siter->first.c_str(), - siter->second->GetString().c_str()); - ++siter; - } - depth += 4; + TypeMapType::iterator siter = types.begin(); + while (siter != types.end()) { + fprintf(stderr, "%*c", depth, ' '); + fprintf(stderr, "%s -> %s\n", siter->first.c_str(), + siter->second->GetString().c_str()); + ++siter; } } @@ -376,14 +354,11 @@ SymbolTable::RandomSymbol() { const Type * SymbolTable::RandomType() { - int v = ispcRand() % types.size(); - if (types[v]->size() == 0) - return NULL; - int count = ispcRand() % types[v]->size(); - TypeMapType::iterator iter = types[v]->begin(); + int count = types.size(); + TypeMapType::iterator iter = types.begin(); while (count-- > 0) { ++iter; - Assert(iter != types[v]->end()); + Assert(iter != types.end()); } return iter->second; } diff --git a/sym.h b/sym.h index 24eb810f..43c8ff16 100644 --- a/sym.h +++ b/sym.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -201,6 +201,9 @@ public: /** Adds the named type to the symbol table. This is used for both struct definitions (where struct Foo causes type \c Foo to be added to the symbol table) as well as for typedefs. + For structs with forward declarations ("struct Foo;") and are thus + UndefinedStructTypes, this method replaces these with an actual + struct definition if one is provided. @param name Name of the type to be added @param type Type that \c name represents @@ -265,12 +268,10 @@ private: typedef std::map > FunctionMapType; FunctionMapType functions; - /** Type definitions can also be scoped. A new \c TypeMapType - is added to the back of the \c types \c vector each time a new scope - is entered. (And it's removed when the scope exits). + /** Type definitions can't currently be scoped. */ typedef std::map TypeMapType; - std::vector types; + TypeMapType types; }; diff --git a/tests/struct-forward-decl-2.ispc b/tests/struct-forward-decl-2.ispc new file mode 100644 index 00000000..2660c541 --- /dev/null +++ b/tests/struct-forward-decl-2.ispc @@ -0,0 +1,36 @@ + +export uniform int width() { return programCount; } + +struct Foo; + +void bing(Foo * uniform); + +struct Foo { + int i; + varying float f; + Foo * uniform next; +}; + +void bar(Foo * uniform f) { + bing(f); +} + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform Foo fa, fb; + fa.next = &fb; + fb.f = aFOO[programIndex]; + fb.i = 100; + bar(&fa); + RET[programIndex] = fb.f; +} + + +void bing(Foo * uniform f) { + f = f->next; + f->f *= 2; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 2 + 2*programIndex; +} diff --git a/tests/struct-forward-decl.ispc b/tests/struct-forward-decl.ispc new file mode 100644 index 00000000..54f09be6 --- /dev/null +++ b/tests/struct-forward-decl.ispc @@ -0,0 +1,33 @@ + +export uniform int width() { return programCount; } + +struct Foo; + +void bing(varying Foo * uniform); + +struct Foo { + float f; + int i; +}; + +void bar(varying Foo * uniform f) { + bing(f); +} + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + Foo f; + f.f = aFOO[programIndex]; + f.i = programIndex; + bar(&f); + RET[programIndex] = f.f; +} + + +void bing(varying Foo * uniform f) { + f->f *= 2; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 2 + 2*programIndex; +} diff --git a/tests_errors/deref-3.ispc b/tests_errors/deref-3.ispc index 19d4e82d..d7e6e906 100644 --- a/tests_errors/deref-3.ispc +++ b/tests_errors/deref-3.ispc @@ -1,4 +1,4 @@ -// Dereference operator "->" can't be applied to non-pointer type "varying struct Foo" +// Member operator "->" can't be applied to non-pointer type "varying struct Foo" struct Foo { int x; }; diff --git a/tests_errors/struct-ref-undecl-1.ispc b/tests_errors/struct-ref-undecl-1.ispc new file mode 100644 index 00000000..0d851117 --- /dev/null +++ b/tests_errors/struct-ref-undecl-1.ispc @@ -0,0 +1,5 @@ +// Member operator "." can't be applied to declared but not defined struct type + +struct Foo; + +int bar(Foo & foo) { return foo.x; } diff --git a/tests_errors/struct-ref-undecl-2.ispc b/tests_errors/struct-ref-undecl-2.ispc new file mode 100644 index 00000000..bb233ccc --- /dev/null +++ b/tests_errors/struct-ref-undecl-2.ispc @@ -0,0 +1,5 @@ +// Member operator "->" can't be applied to declared but not defined struct type + +struct Foo; + +int bar(Foo * uniform foo) { return foo->x; } diff --git a/type.cpp b/type.cpp index 0e4ecfe1..64e832bb 100644 --- a/type.cpp +++ b/type.cpp @@ -42,6 +42,7 @@ #include "module.h" #include +#include #include #include #include @@ -59,7 +60,7 @@ static bool lShouldPrintName(const std::string &name) { if (name.size() == 0) return false; - else if (name[0] != '_') + else if (name[0] != '_' && name[0] != '$') return true; else return (name.size() == 1) || (name[1] != '_'); @@ -946,42 +947,6 @@ PointerType::GetAsFrozenSlice() const { } -/** Returns a structure corresponding to the pointer representation for - slice pointers; the first member of this structure is a uniform or - varying pointer, and the second element is either a uniform or varying - int32. - */ -const StructType * -PointerType::GetSliceStructType() const { - Assert(isSlice == true); - - std::vector eltTypes; - eltTypes.push_back(GetAsNonSlice()); - switch (variability.type) { - case Variability::Uniform: - eltTypes.push_back(AtomicType::UniformInt32); - break; - case Variability::Varying: - eltTypes.push_back(AtomicType::VaryingInt32); - break; - default: - FATAL("Unexpected variability in PointerType::GetSliceStructType()"); - } - - std::vector eltNames; - std::vector eltPos; - - eltNames.push_back("ptr"); - eltNames.push_back("offset"); - - eltPos.push_back(SourcePos()); - eltPos.push_back(SourcePos()); - - return new StructType("__ptr_slice_tmp", eltTypes, eltNames, eltPos, isConst, - Variability::Uniform, SourcePos()); -} - - const PointerType * PointerType::ResolveUnboundVariability(Variability v) const { if (baseType == NULL) { @@ -1090,11 +1055,30 @@ PointerType::LLVMType(llvm::LLVMContext *ctx) const { return NULL; } - if (isSlice) - // Slice pointers are represented as a structure with a pointer and - // an integer offset; the corresponding ispc type is returned by - // GetSliceStructType(). - return GetSliceStructType()->LLVMType(ctx); + if (isSlice) { + llvm::Type *types[2]; + types[0] = GetAsNonSlice()->LLVMType(ctx); + + switch (variability.type) { + case Variability::Uniform: + types[1] = LLVMTypes::Int32Type; + break; + case Variability::Varying: + types[1] = LLVMTypes::Int32VectorType; + break; + case Variability::SOA: + types[1] = llvm::ArrayType::get(LLVMTypes::Int32Type, + variability.soaWidth); + break; + default: + FATAL("unexpected variability for slice pointer in " + "PointerType::LLVMType"); + } + + llvm::ArrayRef typesArrayRef = + llvm::ArrayRef(types, 2); + return llvm::StructType::get(*g->ctx, typesArrayRef); + } switch (variability.type) { case Variability::Uniform: { @@ -1721,12 +1705,103 @@ VectorType::getVectorMemoryCount() const { /////////////////////////////////////////////////////////////////////////// // StructType +// We maintain a map from struct names to LLVM struct types so that we can +// uniquely get the llvm::StructType * for a given ispc struct type. Note +// that we need to mangle the name a bit so that we can e.g. differentiate +// between the uniform and varying variants of a given struct type. This +// is handled by lMangleStructName() below. +static std::map lStructTypeMap; + +/** Using a struct's name, its variability, and the vector width for the + current compilation target, this function generates a string that + encodes that full structure type, for use in the lStructTypeMap. Note + that the vector width is needed in order to differentiate between + 'varying' structs with different compilation targets, which have + different memory layouts... + */ +static std::string +lMangleStructName(const std::string &name, Variability variability) { + char buf[32]; + std::string n; + + // Encode vector width + sprintf(buf, "v%d", g->target.vectorWidth); + n += buf; + + // Variability + switch (variability.type) { + case Variability::Uniform: + n += "_uniform_"; + break; + case Variability::Varying: + n += "_varying_"; + break; + case Variability::SOA: + sprintf(buf, "_soa%d_", variability.soaWidth); + n += buf; + break; + default: + FATAL("Unexpected varaibility in lMangleStructName()"); + } + + // And stuff the name at the end.... + n += name; + return n; +} + + StructType::StructType(const std::string &n, const std::vector &elts, const std::vector &en, const std::vector &ep, bool ic, Variability v, SourcePos p) : name(n), elementTypes(elts), elementNames(en), elementPositions(ep), variability(v), isConst(ic), pos(p) { + if (variability != Variability::Unbound) { + // For structs with non-unbound variability, we'll create the + // correspoing LLVM struct type now, if one hasn't been made + // already. + + // Create a unique anonymous struct name if we have an anonymous + // struct (name == ""), or if we are creating a derived type from + // an anonymous struct (e.g. the varying variant--name == '$'). + if (name == "" || name[0] == '$') { + char buf[16]; + static int count = 0; + sprintf(buf, "$anon%d", count); + name = buf; + ++count; + } + + // If a non-opaque LLVM struct for this type has already been + // created, we're done. For an opaque struct type, we'll override + // the old definition now that we have a full definition. + std::string mname = lMangleStructName(name, variability); + if (lStructTypeMap.find(mname) != lStructTypeMap.end() && + lStructTypeMap[mname]->isOpaque() == false) + return; + + // Actually make the LLVM struct + std::vector elementTypes; + for (int i = 0; i < GetElementCount(); ++i) { + const Type *type = GetElementType(i); + if (type == NULL) { + Assert(m->errorCount > 0); + return; + } + elementTypes.push_back(type->LLVMType(g->ctx)); + } + + if (lStructTypeMap.find(mname) == lStructTypeMap.end()) { + // New struct definition + llvm::StructType *st = + llvm::StructType::create(*g->ctx, elementTypes, mname); + lStructTypeMap[mname] = st; + } + else { + // Definition for what was before just a declaration + lStructTypeMap[mname]->setBody(elementTypes); + } + } } @@ -1854,31 +1929,34 @@ StructType::GetAsNonConstType() const { std::string StructType::GetString() const { std::string ret; - if (isConst) ret += "const "; + if (isConst) + ret += "const "; ret += variability.GetString(); ret += " "; - // Don't print the entire struct declaration, just print the struct's name. - // @todo Do we need a separate method that prints the declaration? -#if 0 - ret += std::string("struct { ") + name; - for (unsigned int i = 0; i < elementTypes.size(); ++i) { - ret += elementTypes[i]->GetString(); - ret += " "; - ret += elementNames[i]; - ret += "; "; + if (name[0] == '$') { + // Print the whole anonymous struct declaration + ret += std::string("struct { ") + name; + for (unsigned int i = 0; i < elementTypes.size(); ++i) { + ret += elementTypes[i]->GetString(); + ret += " "; + ret += elementNames[i]; + ret += "; "; + } + ret += "}"; } - ret += "}"; -#else - ret += "struct "; - ret += name; -#endif + else { + ret += "struct "; + ret += name; + } + return ret; } -std::string -StructType::Mangle() const { +/** Mangle a struct name for use in function name mangling. */ +static std::string +lMangleStruct(Variability variability, bool isConst, const std::string &name) { Assert(variability != Variability::Unbound); std::string ret; @@ -1890,6 +1968,12 @@ StructType::Mangle() const { ret += name + std::string("]"); return ret; } + + +std::string +StructType::Mangle() const { + return lMangleStruct(variability, isConst, name); +} std::string @@ -1897,15 +1981,16 @@ StructType::GetCDeclaration(const std::string &n) const { std::string ret; if (isConst) ret += "const "; ret += std::string("struct ") + name; - if (lShouldPrintName(n)) + if (lShouldPrintName(n)) { ret += std::string(" ") + n; - if (variability.soaWidth > 0) { - char buf[32]; - // This has to match the naming scheme used in lEmitStructDecls() - // in module.cpp - sprintf(buf, "_SOA%d", variability.soaWidth); - ret += buf; + if (variability.soaWidth > 0) { + char buf[32]; + // This has to match the naming scheme used in lEmitStructDecls() + // in module.cpp + sprintf(buf, "_SOA%d", variability.soaWidth); + ret += buf; + } } return ret; @@ -1914,14 +1999,13 @@ StructType::GetCDeclaration(const std::string &n) const { llvm::Type * StructType::LLVMType(llvm::LLVMContext *ctx) const { - std::vector llvmTypes; - for (int i = 0; i < GetElementCount(); ++i) { - const Type *type = GetElementType(i); - if (type == NULL) - return NULL; - llvmTypes.push_back(type->LLVMType(ctx)); + Assert(variability != Variability::Unbound); + std::string mname = lMangleStructName(name, variability); + if (lStructTypeMap.find(mname) == lStructTypeMap.end()) { + Assert(m->errorCount > 0); + return NULL; } - return llvm::StructType::get(*ctx, llvmTypes); + return lStructTypeMap[mname]; } @@ -2037,6 +2121,170 @@ StructType::checkIfCanBeSOA(const StructType *st) { } +/////////////////////////////////////////////////////////////////////////// +// UndefinedStructType + +UndefinedStructType::UndefinedStructType(const std::string &n, + const Variability var, bool ic, + SourcePos p) + : name(n), variability(var), isConst(ic), pos(p) { + Assert(name != ""); + if (variability != Variability::Unbound) { + // Create a new opaque LLVM struct type for this struct name + std::string mname = lMangleStructName(name, variability); + if (lStructTypeMap.find(mname) == lStructTypeMap.end()) + lStructTypeMap[mname] = llvm::StructType::create(*g->ctx, mname); + } +} + + +Variability +UndefinedStructType::GetVariability() const { + return variability; +} + + +bool +UndefinedStructType::IsBoolType() const { + return false; +} + + +bool +UndefinedStructType::IsFloatType() const { + return false; +} + + +bool +UndefinedStructType::IsIntType() const { + return false; +} + + +bool +UndefinedStructType::IsUnsignedType() const { + return false; +} + + +bool +UndefinedStructType::IsConstType() const { + return isConst; +} + + +const Type * +UndefinedStructType::GetBaseType() const { + return this; +} + + +const UndefinedStructType * +UndefinedStructType::GetAsVaryingType() const { + if (variability == Variability::Varying) + return this; + return new UndefinedStructType(name, Variability::Varying, isConst, pos); +} + + +const UndefinedStructType * +UndefinedStructType::GetAsUniformType() const { + if (variability == Variability::Uniform) + return this; + return new UndefinedStructType(name, Variability::Uniform, isConst, pos); +} + + +const UndefinedStructType * +UndefinedStructType::GetAsUnboundVariabilityType() const { + if (variability == Variability::Unbound) + return this; + return new UndefinedStructType(name, Variability::Unbound, isConst, pos); +} + + +const UndefinedStructType * +UndefinedStructType::GetAsSOAType(int width) const { + FATAL("UndefinedStructType::GetAsSOAType() shouldn't be called."); + return NULL; +} + + +const UndefinedStructType * +UndefinedStructType::ResolveUnboundVariability(Variability v) const { + if (variability != Variability::Unbound) + return this; + return new UndefinedStructType(name, v, isConst, pos); +} + + +const UndefinedStructType * +UndefinedStructType::GetAsConstType() const { + if (isConst) + return this; + return new UndefinedStructType(name, variability, true, pos); +} + + +const UndefinedStructType * +UndefinedStructType::GetAsNonConstType() const { + if (isConst == false) + return this; + return new UndefinedStructType(name, variability, false, pos); +} + + +std::string +UndefinedStructType::GetString() const { + std::string ret; + if (isConst) ret += "const "; + ret += variability.GetString(); + ret += " struct "; + ret += name; + return ret; +} + + +std::string +UndefinedStructType::Mangle() const { + return lMangleStruct(variability, isConst, name); +} + + +std::string +UndefinedStructType::GetCDeclaration(const std::string &n) const { + std::string ret; + if (isConst) ret += "const "; + ret += std::string("struct ") + name; + if (lShouldPrintName(n)) + ret += std::string(" ") + n; + return ret; +} + + +llvm::Type * +UndefinedStructType::LLVMType(llvm::LLVMContext *ctx) const { + Assert(variability != Variability::Unbound); + std::string mname = lMangleStructName(name, variability); + if (lStructTypeMap.find(mname) == lStructTypeMap.end()) { + Assert(m->errorCount > 0); + return NULL; + } + return lStructTypeMap[mname]; +} + + +llvm::DIType +UndefinedStructType::GetDIType(llvm::DIDescriptor scope) const { + llvm::DIFile diFile = pos.GetDIFile(); + llvm::DIArray elements; + return m->diBuilder->createStructType(scope, name, diFile, pos.first_line, + 0 /* size */, 0 /* align */, + 0 /* flags */, elements); +} + + /////////////////////////////////////////////////////////////////////////// // ReferenceType @@ -2889,20 +3137,19 @@ lCheckTypeEquality(const Type *a, const Type *b, bool ignoreConst) { const StructType *sta = dynamic_cast(a); const StructType *stb = dynamic_cast(b); - if (sta != NULL && stb != NULL) { - if (sta->GetElementCount() != stb->GetElementCount()) + const UndefinedStructType *usta = + dynamic_cast(a); + const UndefinedStructType *ustb = + dynamic_cast(b); + if ((sta != NULL || usta != NULL) && (stb != NULL || ustb != NULL)) { + // Report both defuned and undefined structs as equal if their + // names are the same. + if (a->GetVariability() != b->GetVariability()) return false; - if (sta->GetStructName() != stb->GetStructName()) - return false; - if (sta->GetVariability() != stb->GetVariability()) - return false; - for (int i = 0; i < sta->GetElementCount(); ++i) - // FIXME: is this redundant now? - if (!lCheckTypeEquality(sta->GetElementType(i), stb->GetElementType(i), - ignoreConst)) - return false; - return true; + std::string namea = sta ? sta->GetStructName() : usta->GetStructName(); + std::string nameb = stb ? stb->GetStructName() : ustb->GetStructName(); + return (namea == nameb); } const PointerType *pta = dynamic_cast(a); diff --git a/type.h b/type.h index f81ea062..e0560ce5 100644 --- a/type.h +++ b/type.h @@ -409,7 +409,6 @@ public: const PointerType *GetAsSlice() const; const PointerType *GetAsNonSlice() const; const PointerType *GetAsFrozenSlice() const; - const StructType *GetSliceStructType() const; const Type *GetBaseType() const; const PointerType *GetAsVaryingType() const; @@ -668,7 +667,7 @@ public: private: static bool checkIfCanBeSOA(const StructType *st); - const std::string name; + /*const*/ std::string name; /** The types of the struct elements. Note that we store these with uniform/varying exactly as they were declared in the source file. (In other words, even if this struct has a varying qualifier and @@ -690,6 +689,52 @@ private: }; +/** Type implementation representing a struct name that has been declared + but where the struct members haven't been defined (i.e. "struct Foo;"). + This class doesn't do much besides serve as a placeholder that other + code can use to detect the presence of such as truct. + */ +class UndefinedStructType : public Type { +public: + UndefinedStructType(const std::string &name, const Variability variability, + bool isConst, SourcePos pos); + + Variability GetVariability() const; + + bool IsBoolType() const; + bool IsFloatType() const; + bool IsIntType() const; + bool IsUnsignedType() const; + bool IsConstType() const; + + const Type *GetBaseType() const; + const UndefinedStructType *GetAsVaryingType() const; + const UndefinedStructType *GetAsUniformType() const; + const UndefinedStructType *GetAsUnboundVariabilityType() const; + const UndefinedStructType *GetAsSOAType(int width) const; + const UndefinedStructType *ResolveUnboundVariability(Variability v) const; + + const UndefinedStructType *GetAsConstType() const; + const UndefinedStructType *GetAsNonConstType() const; + + std::string GetString() const; + std::string Mangle() const; + std::string GetCDeclaration(const std::string &name) const; + + llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; + llvm::DIType GetDIType(llvm::DIDescriptor scope) const; + + /** Returns the name of the structure type. (e.g. struct Foo -> "Foo".) */ + const std::string &GetStructName() const { return name; } + +private: + const std::string name; + const Variability variability; + const bool isConst; + const SourcePos pos; +}; + + /** @brief Type representing a reference to another (non-reference) type. */ class ReferenceType : public Type { From a0c9f7823bb702cb6d59f2c89b71edb411dbc829 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 17 Apr 2012 15:09:42 -0700 Subject: [PATCH 073/173] C++ backend fixes. Handle calls to llvm.trap() Declare functions before globals Handle memset() --- cbackend.cpp | 133 ++++++++++++++++++++++++++------------------------- 1 file changed, 69 insertions(+), 64 deletions(-) diff --git a/cbackend.cpp b/cbackend.cpp index 71ce7de6..e9dd7fe0 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -2090,6 +2090,8 @@ bool CWriter::doInitialization(Module &M) { Out << "int fflush(void *);\n"; Out << "int printf(const unsigned char *, ...);\n"; Out << "uint8_t *memcpy(uint8_t *, uint8_t *, uint64_t );\n"; + Out << "uint8_t *memset(uint8_t *, uint8_t, uint64_t );\n"; + Out << "void memset_pattern16(void *, const void *, uint64_t );\n"; Out << "}\n\n"; generateCompilerSpecificCode(Out, TD); @@ -2199,69 +2201,6 @@ bool CWriter::doInitialization(Module &M) { } } - // Output the global variable definitions and contents... - if (!M.global_empty()) { - Out << "\n\n/* Global Variable Definitions and Initialization */\n"; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (!I->isDeclaration()) { - // Ignore special globals, such as debug info. - if (getGlobalVariableClass(I)) - continue; - - if (I->hasLocalLinkage()) - Out << "static "; - else if (I->hasDLLImportLinkage()) - Out << "__declspec(dllimport) "; - else if (I->hasDLLExportLinkage()) - Out << "__declspec(dllexport) "; - - // Thread Local Storage - if (I->isThreadLocal()) - Out << "__thread "; - - printType(Out, I->getType()->getElementType(), false, - GetValueName(I)); - if (I->hasLinkOnceLinkage()) - Out << " __attribute__((common))"; - else if (I->hasWeakLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - else if (I->hasCommonLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - - if (I->hasHiddenVisibility()) - Out << " __HIDDEN__"; - - // If the initializer is not null, emit the initializer. If it is null, - // we try to avoid emitting large amounts of zeros. The problem with - // this, however, occurs when the variable has weak linkage. In this - // case, the assembler will complain about the variable being both weak - // and common, so we disable this optimization. - // FIXME common linkage should avoid this problem. - if (!I->getInitializer()->isNullValue()) { - Out << " = " ; - writeOperand(I->getInitializer(), false); - } else if (I->hasWeakLinkage()) { - // We have to specify an initializer, but it doesn't have to be - // complete. If the value is an aggregate, print out { 0 }, and let - // the compiler figure out the rest of the zeros. - Out << " = " ; - if (I->getInitializer()->getType()->isStructTy() || - I->getInitializer()->getType()->isVectorTy()) { - Out << "{ 0 }"; - } else if (I->getInitializer()->getType()->isArrayTy()) { - // As with structs and vectors, but with an extra set of braces - // because arrays are wrapped in structs. - Out << "{ { 0 } }"; - } else { - // Just print it out normally. - writeOperand(I->getInitializer(), false); - } - } - Out << ";\n"; - } - } - // Function declarations Out << "\n/* Function Declarations */\n"; Out << "extern \"C\" {\n"; @@ -2362,6 +2301,69 @@ bool CWriter::doInitialization(Module &M) { printIntrinsicDefinition(**I, Out); } + // Output the global variable definitions and contents... + if (!M.global_empty()) { + Out << "\n\n/* Global Variable Definitions and Initialization */\n"; + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (!I->isDeclaration()) { + // Ignore special globals, such as debug info. + if (getGlobalVariableClass(I)) + continue; + + if (I->hasLocalLinkage()) + Out << "static "; + else if (I->hasDLLImportLinkage()) + Out << "__declspec(dllimport) "; + else if (I->hasDLLExportLinkage()) + Out << "__declspec(dllexport) "; + + // Thread Local Storage + if (I->isThreadLocal()) + Out << "__thread "; + + printType(Out, I->getType()->getElementType(), false, + GetValueName(I)); + if (I->hasLinkOnceLinkage()) + Out << " __attribute__((common))"; + else if (I->hasWeakLinkage()) + Out << " __ATTRIBUTE_WEAK__"; + else if (I->hasCommonLinkage()) + Out << " __ATTRIBUTE_WEAK__"; + + if (I->hasHiddenVisibility()) + Out << " __HIDDEN__"; + + // If the initializer is not null, emit the initializer. If it is null, + // we try to avoid emitting large amounts of zeros. The problem with + // this, however, occurs when the variable has weak linkage. In this + // case, the assembler will complain about the variable being both weak + // and common, so we disable this optimization. + // FIXME common linkage should avoid this problem. + if (!I->getInitializer()->isNullValue()) { + Out << " = " ; + writeOperand(I->getInitializer(), false); + } else if (I->hasWeakLinkage()) { + // We have to specify an initializer, but it doesn't have to be + // complete. If the value is an aggregate, print out { 0 }, and let + // the compiler figure out the rest of the zeros. + Out << " = " ; + if (I->getInitializer()->getType()->isStructTy() || + I->getInitializer()->getType()->isVectorTy()) { + Out << "{ 0 }"; + } else if (I->getInitializer()->getType()->isArrayTy()) { + // As with structs and vectors, but with an extra set of braces + // because arrays are wrapped in structs. + Out << "{ { 0 } }"; + } else { + // Just print it out normally. + writeOperand(I->getInitializer(), false); + } + } + Out << ";\n"; + } + } + return false; } @@ -3417,6 +3419,7 @@ void CWriter::lowerIntrinsics(Function &F) { case Intrinsic::ppc_altivec_lvsl: case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: + case Intrinsic::trap: // We directly implement these intrinsics break; default: @@ -3584,7 +3587,6 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, // If this is an intrinsic that directly corresponds to a GCC // builtin, we emit it here. const char *BuiltinName = ""; - Function *F = I.getCalledFunction(); #define GET_GCC_BUILTIN_NAME #include "llvm/Intrinsics.gen" #undef GET_GCC_BUILTIN_NAME @@ -3727,6 +3729,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, writeOperand(I.getArgOperand(1)); Out << ")"; return true; + case Intrinsic::trap: + Out << "abort()"; + return true; } } From b9d6ba2aa042286f87ab02efae944a54e9ef95c3 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 17 Apr 2012 15:10:30 -0700 Subject: [PATCH 074/173] Always set target info, even when compiling to generic targets. This allows the SROA pass eliminate a lot of allocas and loads and stores, which helps a lot for performance. --- opt.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/opt.cpp b/opt.cpp index 6630331a..70649d39 100644 --- a/opt.cpp +++ b/opt.cpp @@ -256,12 +256,10 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createVerifierPass()); - if (g->target.isa != Target::GENERIC) { - llvm::TargetLibraryInfo *targetLibraryInfo = - new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple())); - optPM.add(targetLibraryInfo); - optPM.add(new llvm::TargetData(module)); - } + llvm::TargetLibraryInfo *targetLibraryInfo = + new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple())); + optPM.add(targetLibraryInfo); + optPM.add(new llvm::TargetData(module)); optPM.add(llvm::createIndVarSimplifyPass()); From 55d5c07d00ceb4c94247d5030ce728d4ad783f44 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 16 Apr 2012 08:00:13 -0700 Subject: [PATCH 075/173] Issue errors when doing illegal things with incomplete struct types. Issue an error, rather than crashing, if the user has declared a struct type but not defined it and subsequently tries to: - dynamically allocate an instance of the struct type - do pointer math with a pointer to the struct type - compute the size of the struct type --- expr.cpp | 45 ++++++++++++++++++++++-- tests_errors/undef-struct-new.ispc | 7 ++++ tests_errors/undef-struct-ptrmath-1.ispc | 7 ++++ tests_errors/undef-struct-ptrmath-2.ispc | 7 ++++ tests_errors/undef-struct-ptrmath-3.ispc | 7 ++++ tests_errors/undef-struct-ptrmath.ispc | 7 ++++ tests_errors/undef-struct-sizeof.ispc | 7 ++++ 7 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 tests_errors/undef-struct-new.ispc create mode 100644 tests_errors/undef-struct-ptrmath-1.ispc create mode 100644 tests_errors/undef-struct-ptrmath-2.ispc create mode 100644 tests_errors/undef-struct-ptrmath-3.ispc create mode 100644 tests_errors/undef-struct-ptrmath.ispc create mode 100644 tests_errors/undef-struct-sizeof.ispc diff --git a/expr.cpp b/expr.cpp index b43f9e54..81680874 100644 --- a/expr.cpp +++ b/expr.cpp @@ -1258,6 +1258,11 @@ UnaryExpr::TypeCheck() { type->GetString().c_str()); return NULL; } + if (dynamic_cast(pt->GetBaseType())) { + Error(expr->pos, "Illegal to pre/post increment pointer to " + "undefined struct type \"%s\".", type->GetString().c_str()); + return NULL; + } return this; } @@ -2296,6 +2301,16 @@ BinaryExpr::TypeCheck() { "on \"%s\" type.", type1->GetString().c_str()); return NULL; } + if (dynamic_cast(pt0->GetBaseType())) { + Error(pos, "Illegal to perform pointer arithmetic " + "on undefined struct type \"%s\".", pt0->GetString().c_str()); + return NULL; + } + if (dynamic_cast(pt1->GetBaseType())) { + Error(pos, "Illegal to perform pointer arithmetic " + "on undefined struct type \"%s\".", pt1->GetString().c_str()); + return NULL; + } const Type *t = Type::MoreGeneralType(type0, type1, pos, "-"); if (t == NULL) @@ -2331,6 +2346,11 @@ BinaryExpr::TypeCheck() { "on \"%s\" type.", pt0->GetString().c_str()); return NULL; } + if (dynamic_cast(pt0->GetBaseType())) { + Error(pos, "Illegal to perform pointer arithmetic " + "on undefined struct type \"%s\".", pt0->GetString().c_str()); + return NULL; + } const Type *offsetType = g->target.is32Bit ? AtomicType::UniformInt32 : AtomicType::UniformInt64; @@ -7336,6 +7356,14 @@ SizeOfExpr::Print() const { Expr * SizeOfExpr::TypeCheck() { + // Can't compute the size of a struct without a definition + if (type != NULL && + dynamic_cast(type) != NULL) { + Error(pos, "Can't compute the size of declared but not defined " + "struct type \"%s\".", type->GetString().c_str()); + return NULL; + } + return this; } @@ -8105,9 +8133,20 @@ NewExpr::GetType() const { Expr * NewExpr::TypeCheck() { - // Here we only need to make sure that if we have an expression giving - // a number of elements to allocate that it can be converted to an - // integer of the appropriate variability. + // It's illegal to call new with an undefined struct type + if (allocType == NULL) { + Assert(m->errorCount > 0); + return NULL; + } + if (dynamic_cast(allocType) != NULL) { + Error(pos, "Can't dynamically allocate storage for declared " + "but not defined type \"%s\".", allocType->GetString().c_str()); + return NULL; + } + + // Otherwise we only need to make sure that if we have an expression + // giving a number of elements to allocate that it can be converted to + // an integer of the appropriate variability. if (countExpr == NULL) return this; diff --git a/tests_errors/undef-struct-new.ispc b/tests_errors/undef-struct-new.ispc new file mode 100644 index 00000000..3a9037c3 --- /dev/null +++ b/tests_errors/undef-struct-new.ispc @@ -0,0 +1,7 @@ +// Can't dynamically allocate storage for declared but not defined type + +struct Foo; + +Foo * uniform bar() { + return uniform new Foo; +} diff --git a/tests_errors/undef-struct-ptrmath-1.ispc b/tests_errors/undef-struct-ptrmath-1.ispc new file mode 100644 index 00000000..861c66fb --- /dev/null +++ b/tests_errors/undef-struct-ptrmath-1.ispc @@ -0,0 +1,7 @@ +// Illegal to perform pointer arithmetic on undefined struct type + +struct Foo; + +Foo * uniform bar(Foo * uniform f) { + return f + 1; +} diff --git a/tests_errors/undef-struct-ptrmath-2.ispc b/tests_errors/undef-struct-ptrmath-2.ispc new file mode 100644 index 00000000..dfaab13c --- /dev/null +++ b/tests_errors/undef-struct-ptrmath-2.ispc @@ -0,0 +1,7 @@ +// Illegal to perform pointer arithmetic on undefined struct type + +struct Foo; + +Foo * uniform bar(Foo * uniform f) { + return 1 + f; +} diff --git a/tests_errors/undef-struct-ptrmath-3.ispc b/tests_errors/undef-struct-ptrmath-3.ispc new file mode 100644 index 00000000..1fad2ac4 --- /dev/null +++ b/tests_errors/undef-struct-ptrmath-3.ispc @@ -0,0 +1,7 @@ +// Illegal to perform pointer arithmetic on undefined struct type + +struct Foo; + +Foo * uniform bar(Foo * uniform f) { + return f-1; +} diff --git a/tests_errors/undef-struct-ptrmath.ispc b/tests_errors/undef-struct-ptrmath.ispc new file mode 100644 index 00000000..39b19a4e --- /dev/null +++ b/tests_errors/undef-struct-ptrmath.ispc @@ -0,0 +1,7 @@ +// Illegal to pre/post increment pointer to undefined struct type + +struct Foo; + +Foo * uniform bar(Foo * uniform f) { + return ++f; +} diff --git a/tests_errors/undef-struct-sizeof.ispc b/tests_errors/undef-struct-sizeof.ispc new file mode 100644 index 00000000..d2a2219a --- /dev/null +++ b/tests_errors/undef-struct-sizeof.ispc @@ -0,0 +1,7 @@ +// Can't compute the size of declared but not defined struct type + +struct Foo; + +uniform int bar() { + return sizeof(Foo); +} From abf7c423bbabc3d8eef2aca86391b3e4b34b2a77 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 18 Apr 2012 06:14:55 -0700 Subject: [PATCH 076/173] Fix build with LLVM 3.0 --- cbackend.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cbackend.cpp b/cbackend.cpp index e9dd7fe0..0c582ce0 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -3587,6 +3587,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, // If this is an intrinsic that directly corresponds to a GCC // builtin, we emit it here. const char *BuiltinName = ""; +#ifdef LLVM_3_0 + Function *F = I.getCalledFunction(); +#endif // LLVM_3_0 #define GET_GCC_BUILTIN_NAME #include "llvm/Intrinsics.gen" #undef GET_GCC_BUILTIN_NAME From 645a8c9349d69b87c280300987c9869383eb88d3 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 18 Apr 2012 10:26:22 -0700 Subject: [PATCH 077/173] Fix serious bug in VSelMovmskOpt When the mask was all off, we'd choose the incorrect operand! (This bug was masked since this optimization wasn't triggering as intended, due to other issues to be fixed in a forthcoming commit. --- opt.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opt.cpp b/opt.cpp index 70649d39..5a7bffa4 100644 --- a/opt.cpp +++ b/opt.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -899,8 +899,8 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { // Mask all on -> replace with the first select value value = selectInst->getOperand(1); else if (mask == 0) - // Mask all off -> replace with the second select blend value - value = selectInst->getOperand(1); + // Mask all off -> replace with the second select value + value = selectInst->getOperand(2); if (value != NULL) { llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), From c202e9e106564a8c56b49957678fae935ad9b33e Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 18 Apr 2012 10:27:43 -0700 Subject: [PATCH 078/173] Add debugging printing code to optimization passes. Now all of the passes dump out the basic block before and after they do their thing when --debug is enabled. --- opt.cpp | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/opt.cpp b/opt.cpp index 5a7bffa4..f83b961e 100644 --- a/opt.cpp +++ b/opt.cpp @@ -680,6 +680,13 @@ lIsUndef(llvm::Value *value) { bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { + if (g->debugPrint) { + fprintf(stderr, "Start of IntrinsicsOpt\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + llvm::Function *avxMaskedLoad32 = llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskload_ps_256); llvm::Function *avxMaskedLoad64 = @@ -828,6 +835,14 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { } } } + + if (g->debugPrint) { + fprintf(stderr, "End of IntrinsicsOpt\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + return modifiedAny; } @@ -883,6 +898,13 @@ char VSelMovmskOpt::ID = 0; bool VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { + if (g->debugPrint) { + fprintf(stderr, "Start of VSelMovmskOpt\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + bool modifiedAny = false; restart: @@ -933,6 +955,13 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { } } + if (g->debugPrint) { + fprintf(stderr, "End of VSelMovMskOpt\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + return modifiedAny; } @@ -1666,6 +1695,13 @@ struct GSInfo { bool DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) { + if (g->debugPrint) { + fprintf(stderr, "Start of DetectGSBaseOffsets\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + GSInfo gsFuncs[] = { GSInfo("__pseudo_gather32_8", "__pseudo_gather_base_offsets32_8", "__pseudo_gather_base_offsets32_8", true), @@ -1805,6 +1841,13 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } + if (g->debugPrint) { + fprintf(stderr, "End of DetectGSBaseOffsets\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + return modifiedAny; } @@ -1849,6 +1892,13 @@ struct MSInfo { bool MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { + if (g->debugPrint) { + fprintf(stderr, "Start of MaskedStoreOpt\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + MSInfo msInfo[] = { MSInfo("__pseudo_masked_store_8", 1), MSInfo("__pseudo_masked_store_16", 2), @@ -1923,6 +1973,13 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { } } + if (g->debugPrint) { + fprintf(stderr, "End of MaskedStoreOpt\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + return modifiedAny; } @@ -1963,6 +2020,13 @@ struct MLInfo { bool MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { + if (g->debugPrint) { + fprintf(stderr, "Start of MaskedLoadOpt\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + MLInfo mlInfo[] = { MLInfo("__masked_load_8", 1), MLInfo("__masked_load_16", 2), @@ -2022,6 +2086,14 @@ MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } } + + if (g->debugPrint) { + fprintf(stderr, "End of MaskedLoadOpt\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + return modifiedAny; } @@ -2109,6 +2181,13 @@ struct LMSInfo { bool PseudoMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { + if (g->debugPrint) { + fprintf(stderr, "Start of PseudoMaskedStorePass\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + LMSInfo msInfo[] = { LMSInfo("__pseudo_masked_store_8", "__masked_store_blend_8", "__masked_store_8"), @@ -2161,6 +2240,13 @@ PseudoMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } + if (g->debugPrint) { + fprintf(stderr, "End of PseudoMaskedStorePass\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + return modifiedAny; } @@ -2246,6 +2332,13 @@ struct ScatterImpInfo { bool GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { + if (g->debugPrint) { + fprintf(stderr, "Start of GSToLoadStorePass\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + GatherImpInfo gInfo[] = { GatherImpInfo("__pseudo_gather_base_offsets32_8", "__load_and_broadcast_8", "__masked_load_8", 1), @@ -2428,6 +2521,13 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { } } + if (g->debugPrint) { + fprintf(stderr, "End of GSToLoadStorePass\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + return modifiedAny; } @@ -3360,6 +3460,13 @@ lInstructionMayWriteToMemory(llvm::Instruction *inst) { bool GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) { + if (g->debugPrint) { + fprintf(stderr, "Start of GatherCoalescePass\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + llvm::Function *gatherFuncs[] = { m->module->getFunction("__pseudo_gather_base_offsets32_32"), m->module->getFunction("__pseudo_gather_base_offsets64_32"), @@ -3494,6 +3601,13 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) { } } + if (g->debugPrint) { + fprintf(stderr, "End of GatherCoalescePass\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + return modifiedAny; } @@ -3539,6 +3653,13 @@ struct LowerGSInfo { bool PseudoGSToGSPass::runOnBasicBlock(llvm::BasicBlock &bb) { + if (g->debugPrint) { + fprintf(stderr, "Start of PseudoGSToGSPass\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + LowerGSInfo lgsInfo[] = { LowerGSInfo("__pseudo_gather_base_offsets32_8", "__gather_base_offsets32_i8", true), LowerGSInfo("__pseudo_gather_base_offsets32_16", "__gather_base_offsets32_i16", true), @@ -3622,6 +3743,13 @@ PseudoGSToGSPass::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } + if (g->debugPrint) { + fprintf(stderr, "End of PseudoGSToGSPass\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + return modifiedAny; } @@ -3665,6 +3793,13 @@ char IsCompileTimeConstantPass::ID = 0; bool IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) { + if (g->debugPrint) { + fprintf(stderr, "Start of IsCompileTimeConstantPass\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + llvm::Function *funcs[] = { m->module->getFunction("__is_compile_time_constant_mask"), m->module->getFunction("__is_compile_time_constant_uniform_int32"), @@ -3722,6 +3857,13 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) { } } + if (g->debugPrint) { + fprintf(stderr, "End of IsCompileTimeConstantPass\n"); + fprintf(stderr, "---------------\n"); + bb.dump(); + fprintf(stderr, "---------------\n\n"); + } + return modifiedAny; } From 7c91b01125bf99b9a908a89d33c653f82a2af39b Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 18 Apr 2012 11:34:28 -0700 Subject: [PATCH 079/173] Handle more forms of constant vectors in lGetMask(). Various optimization passes depend on turning a compile-time constant mask into a bit vector; it turns out that in LLVM3.1, constant vectors of ints/floats are represented with llvM::ConstantDataVector, but constant vectors of bools use llvm::ConstantVector (which is what LLVM 3.0 uses for all constant vectors). Now lGetMask() always does the llvm::ConstantVector path, to cover this case. This improves generated C++ code by eliminating things like select with an all on/off mask, turning movmask calls with constants into constant values, etc. --- opt.cpp | 87 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/opt.cpp b/opt.cpp index f83b961e..a5158375 100644 --- a/opt.cpp +++ b/opt.cpp @@ -586,6 +586,40 @@ IntrinsicsOpt::IntrinsicsOpt() } +/** Given a vector of constant values (int, float, or bool) representing an + execution mask, convert it to a bitvector where the 0th bit corresponds + to the first vector value and so forth. +*/ +static int +lConstElementsToMask(const llvm::SmallVector &elements) { + Assert(elements.size() <= 32); + + int mask = 0; + for (unsigned int i = 0; i < elements.size(); ++i) { + llvm::APInt intMaskValue; + // SSE has the "interesting" approach of encoding blending + // masks as . + llvm::ConstantFP *cf = llvm::dyn_cast(elements[i]); + if (cf != NULL) { + llvm::APFloat apf = cf->getValueAPF(); + intMaskValue = apf.bitcastToAPInt(); + } + else { + // Otherwise get it as an int + llvm::ConstantInt *ci = llvm::dyn_cast(elements[i]); + Assert(ci != NULL); // vs return -1 if NULL? + intMaskValue = ci->getValue(); + } + // Is the high-bit set? If so, OR in the appropriate bit in + // the result mask + if (intMaskValue.countLeadingOnes() > 0) + mask |= (1 << i); + } + return mask; +} + + /** Given an llvm::Value represinting a vector mask, see if the value is a constant. If so, return the integer mask found by taking the high bits of the mask values in turn and concatenating them into a single integer. @@ -600,41 +634,30 @@ lGetMask(llvm::Value *factor) { Assert(g->target.vectorWidth < 32); #ifdef LLVM_3_1svn - llvm::ConstantDataVector *cv = llvm::dyn_cast(factor); -#else - llvm::ConstantVector *cv = llvm::dyn_cast(factor); -#endif - if (cv) { - int mask = 0; + llvm::ConstantDataVector *cdv = llvm::dyn_cast(factor); + if (cdv != NULL) { llvm::SmallVector elements; -#ifdef LLVM_3_1svn - for (int i = 0; i < (int)cv->getNumElements(); ++i) - elements.push_back(cv->getElementAsConstant(i)); + for (int i = 0; i < (int)cdv->getNumElements(); ++i) + elements.push_back(cdv->getElementAsConstant(i)); + return lConstElementsToMask(elements); + } +#endif + + llvm::ConstantVector *cv = llvm::dyn_cast(factor); + if (cv != NULL) { + llvm::SmallVector elements; + #ifdef LLVM_3_1svn + for (int i = 0; i < (int)cv->getNumOperands(); ++i) { + llvm::Constant *c = + llvm::dyn_cast(cv->getOperand(i)); + if (c == NULL) + return NULL; + elements.push_back(c); + } #else cv->getVectorElements(elements); #endif - - for (unsigned int i = 0; i < elements.size(); ++i) { - llvm::APInt intMaskValue; - // SSE has the "interesting" approach of encoding blending - // masks as . - llvm::ConstantFP *cf = llvm::dyn_cast(elements[i]); - if (cf) { - llvm::APFloat apf = cf->getValueAPF(); - intMaskValue = apf.bitcastToAPInt(); - } - else { - // Otherwise get it as an int - llvm::ConstantInt *ci = llvm::dyn_cast(elements[i]); - Assert(ci != NULL); // vs return -1 if NULL? - intMaskValue = ci->getValue(); - } - // Is the high-bit set? If so, OR in the appropriate bit in - // the result mask - if (intMaskValue.countLeadingOnes() > 0) - mask |= (1 << i); - } - return mask; + return lConstElementsToMask(elements); } else if (llvm::isa(factor)) return 0; @@ -1149,7 +1172,7 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets, // Indexing into global arrays can lead to this form, with // ConstantVectors.. llvm::SmallVector elements; -#ifdef LLVM_3_1svn + #ifdef LLVM_3_1svn for (int i = 0; i < (int)cv->getNumOperands(); ++i) { llvm::Constant *c = llvm::dyn_cast(cv->getOperand(i)); From 9fedb1674e89ec78897a25acdce7acb857c9ecd5 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 18 Apr 2012 15:46:18 -0700 Subject: [PATCH 080/173] Improve basic block dumping from optimization passes. Now done via a macro, which is cleaner. It's also now possible to specify a single function to watch, which is useful for debugging. --- opt.cpp | 163 +++++++++++++++----------------------------------------- 1 file changed, 43 insertions(+), 120 deletions(-) diff --git a/opt.cpp b/opt.cpp index a5158375..dcb6ad87 100644 --- a/opt.cpp +++ b/opt.cpp @@ -100,6 +100,28 @@ static llvm::Pass *CreateMaskedLoadOptPass(); static llvm::Pass *CreateIsCompileTimeConstantPass(bool isLastTry); static llvm::Pass *CreateMakeInternalFuncsStaticPass(); +#define DEBUG_START_PASS(NAME) \ + if (g->debugPrint && \ + (getenv("FUNC") == NULL || \ + !strcmp(bb.getParent()->getName().str().c_str(), getenv("FUNC")))) { \ + fprintf(stderr, "Start of " NAME "\n"); \ + fprintf(stderr, "---------------\n"); \ + bb.dump(); \ + fprintf(stderr, "---------------\n\n"); \ + } else /* eat semicolon */ + +#define DEBUG_END_PASS(NAME) \ + if (g->debugPrint && \ + (getenv("FUNC") == NULL || \ + !strcmp(bb.getParent()->getName().str().c_str(), getenv("FUNC")))) { \ + fprintf(stderr, "End of " NAME " %s\n", modifiedAny ? "** CHANGES **" : ""); \ + fprintf(stderr, "---------------\n"); \ + bb.dump(); \ + fprintf(stderr, "---------------\n\n"); \ + } else /* eat semicolon */ + + + /////////////////////////////////////////////////////////////////////////// @@ -703,12 +725,7 @@ lIsUndef(llvm::Value *value) { bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { - if (g->debugPrint) { - fprintf(stderr, "Start of IntrinsicsOpt\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_START_PASS("IntrinsicsOpt"); llvm::Function *avxMaskedLoad32 = llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskload_ps_256); @@ -859,12 +876,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { } } - if (g->debugPrint) { - fprintf(stderr, "End of IntrinsicsOpt\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_END_PASS("IntrinsicsOpt"); return modifiedAny; } @@ -921,12 +933,7 @@ char VSelMovmskOpt::ID = 0; bool VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { - if (g->debugPrint) { - fprintf(stderr, "Start of VSelMovmskOpt\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_START_PASS("VSelMovmaskOpt"); bool modifiedAny = false; @@ -978,12 +985,7 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { } } - if (g->debugPrint) { - fprintf(stderr, "End of VSelMovMskOpt\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_END_PASS("VSelMovMskOpt"); return modifiedAny; } @@ -1718,12 +1720,7 @@ struct GSInfo { bool DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) { - if (g->debugPrint) { - fprintf(stderr, "Start of DetectGSBaseOffsets\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_START_PASS("DetectGSBaseOffsets"); GSInfo gsFuncs[] = { GSInfo("__pseudo_gather32_8", "__pseudo_gather_base_offsets32_8", @@ -1864,12 +1861,7 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } - if (g->debugPrint) { - fprintf(stderr, "End of DetectGSBaseOffsets\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_END_PASS("DetectGSBaseOffsets"); return modifiedAny; } @@ -1915,12 +1907,7 @@ struct MSInfo { bool MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { - if (g->debugPrint) { - fprintf(stderr, "Start of MaskedStoreOpt\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_START_PASS("MaskedStoreOpt"); MSInfo msInfo[] = { MSInfo("__pseudo_masked_store_8", 1), @@ -1996,12 +1983,7 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { } } - if (g->debugPrint) { - fprintf(stderr, "End of MaskedStoreOpt\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_END_PASS("MaskedStoreOpt"); return modifiedAny; } @@ -2043,12 +2025,7 @@ struct MLInfo { bool MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { - if (g->debugPrint) { - fprintf(stderr, "Start of MaskedLoadOpt\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_START_PASS("MaskedLoadOpt"); MLInfo mlInfo[] = { MLInfo("__masked_load_8", 1), @@ -2110,12 +2087,7 @@ MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { } } - if (g->debugPrint) { - fprintf(stderr, "End of MaskedLoadOpt\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_END_PASS("MaskedLoadOpt"); return modifiedAny; } @@ -2204,12 +2176,7 @@ struct LMSInfo { bool PseudoMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { - if (g->debugPrint) { - fprintf(stderr, "Start of PseudoMaskedStorePass\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_START_PASS("PseudoMaskedStorePass"); LMSInfo msInfo[] = { LMSInfo("__pseudo_masked_store_8", "__masked_store_blend_8", @@ -2263,12 +2230,7 @@ PseudoMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } - if (g->debugPrint) { - fprintf(stderr, "End of PseudoMaskedStorePass\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_END_PASS("PseudoMaskedStorePass"); return modifiedAny; } @@ -2355,12 +2317,7 @@ struct ScatterImpInfo { bool GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { - if (g->debugPrint) { - fprintf(stderr, "Start of GSToLoadStorePass\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_START_PASS("GSToLoadStorePass"); GatherImpInfo gInfo[] = { GatherImpInfo("__pseudo_gather_base_offsets32_8", "__load_and_broadcast_8", @@ -2544,12 +2501,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { } } - if (g->debugPrint) { - fprintf(stderr, "End of GSToLoadStorePass\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_END_PASS("GSToLoadStorePass"); return modifiedAny; } @@ -3483,12 +3435,7 @@ lInstructionMayWriteToMemory(llvm::Instruction *inst) { bool GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) { - if (g->debugPrint) { - fprintf(stderr, "Start of GatherCoalescePass\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_START_PASS("GatherCoalescePass"); llvm::Function *gatherFuncs[] = { m->module->getFunction("__pseudo_gather_base_offsets32_32"), @@ -3624,12 +3571,7 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) { } } - if (g->debugPrint) { - fprintf(stderr, "End of GatherCoalescePass\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_END_PASS("GatherCoalescePass"); return modifiedAny; } @@ -3676,12 +3618,7 @@ struct LowerGSInfo { bool PseudoGSToGSPass::runOnBasicBlock(llvm::BasicBlock &bb) { - if (g->debugPrint) { - fprintf(stderr, "Start of PseudoGSToGSPass\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_START_PASS("PseudoGSToGSPass"); LowerGSInfo lgsInfo[] = { LowerGSInfo("__pseudo_gather_base_offsets32_8", "__gather_base_offsets32_i8", true), @@ -3766,12 +3703,7 @@ PseudoGSToGSPass::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } - if (g->debugPrint) { - fprintf(stderr, "End of PseudoGSToGSPass\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_END_PASS("PseudoGSToGSPass"); return modifiedAny; } @@ -3814,14 +3746,10 @@ public: char IsCompileTimeConstantPass::ID = 0; + bool IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) { - if (g->debugPrint) { - fprintf(stderr, "Start of IsCompileTimeConstantPass\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_START_PASS("IsCompileTimeConstantPass"); llvm::Function *funcs[] = { m->module->getFunction("__is_compile_time_constant_mask"), @@ -3880,12 +3808,7 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) { } } - if (g->debugPrint) { - fprintf(stderr, "End of IsCompileTimeConstantPass\n"); - fprintf(stderr, "---------------\n"); - bb.dump(); - fprintf(stderr, "---------------\n\n"); - } + DEBUG_END_PASS("IsCompileTimeConstantPass"); return modifiedAny; } From a2bb899a6ba8fe919013ea72e30fc5b57ed92ce2 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Apr 2012 09:34:54 -0700 Subject: [PATCH 081/173] Opt debug printing improvement Now, just match the prefix of the provided function name of interest, which allows us to not worry about managing details. --- opt.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/opt.cpp b/opt.cpp index dcb6ad87..5a7d1ee6 100644 --- a/opt.cpp +++ b/opt.cpp @@ -103,7 +103,8 @@ static llvm::Pass *CreateMakeInternalFuncsStaticPass(); #define DEBUG_START_PASS(NAME) \ if (g->debugPrint && \ (getenv("FUNC") == NULL || \ - !strcmp(bb.getParent()->getName().str().c_str(), getenv("FUNC")))) { \ + !strncmp(bb.getParent()->getName().str().c_str(), getenv("FUNC"), \ + strlen(getenv("FUNC"))))) { \ fprintf(stderr, "Start of " NAME "\n"); \ fprintf(stderr, "---------------\n"); \ bb.dump(); \ @@ -113,7 +114,8 @@ static llvm::Pass *CreateMakeInternalFuncsStaticPass(); #define DEBUG_END_PASS(NAME) \ if (g->debugPrint && \ (getenv("FUNC") == NULL || \ - !strcmp(bb.getParent()->getName().str().c_str(), getenv("FUNC")))) { \ + !strncmp(bb.getParent()->getName().str().c_str(), getenv("FUNC"), \ + strlen(getenv("FUNC"))))) { \ fprintf(stderr, "End of " NAME " %s\n", modifiedAny ? "** CHANGES **" : ""); \ fprintf(stderr, "---------------\n"); \ bb.dump(); \ From 326c45fa171654930760995cc2bc2f719eb116b6 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Apr 2012 09:45:04 -0700 Subject: [PATCH 082/173] Fix bugs in LLVMExtractFirstVectorElement(). When we're manually scalarizing the extraction of the first element of a vector value, we need to be careful about handling constant values and about where new instructions are inserted. The old code was sloppy about this, which in turn lead to invalid IR in some cases. For example, the two bugs below were essentially due to generating an extractelement inst from a zeroinitializer value and then inserting it in the wrong bblock such that a phi node that used that value was malformed. Fixes issues #240 and #229. --- llvmutil.cpp | 69 +++++++++++++++++++++++++++++++++++++--------------- llvmutil.h | 3 +-- opt.cpp | 6 ++--- 3 files changed, 52 insertions(+), 26 deletions(-) diff --git a/llvmutil.cpp b/llvmutil.cpp index 5febaadf..d438af45 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -1390,19 +1390,38 @@ LLVMDumpValue(llvm::Value *v) { static llvm::Value * -lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore, +lExtractFirstVectorElement(llvm::Value *v, std::map &phiMap) { - // If it's not an instruction (i.e. is a constant), then we can just - // emit an extractelement instruction and let the regular optimizer do - // the rest. - if (llvm::isa(v) == false) - return llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt", - insertBefore); - llvm::VectorType *vt = llvm::dyn_cast(v->getType()); Assert(vt != NULL); + // First, handle various constant types; do the extraction manually, as + // appropriate. + if (llvm::isa(v) == true) { + Assert(vt->getElementType()->isIntegerTy()); + return llvm::ConstantInt::get(vt->getElementType(), 0); + } + if (llvm::ConstantVector *cv = llvm::dyn_cast(v)) { +#ifdef LLVM_3_1svn + return cv->getOperand(0); +#else + llvm::SmallVector elements; + cv->getVectorElements(elements); + return elements[0]; +#endif // LLVM_3_1 + } +#ifdef LLVM_3_1svn + if (llvm::ConstantDataVector *cdv = + llvm::dyn_cast(v)) + return cdv->getElementAsConstant(0); +#endif // LLVM_3_1 + + // Otherwise, all that we should have at this point is an instruction + // of some sort + Assert(llvm::isa(v) == false); + Assert(llvm::isa(v) == true); + std::string newName = v->getName().str() + std::string(".elt0"); // Rewrite regular binary operators and casts to the scalarized @@ -1410,20 +1429,24 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore, llvm::BinaryOperator *bop = llvm::dyn_cast(v); if (bop != NULL) { llvm::Value *v0 = lExtractFirstVectorElement(bop->getOperand(0), - insertBefore, phiMap); + phiMap); llvm::Value *v1 = lExtractFirstVectorElement(bop->getOperand(1), - insertBefore, phiMap); + phiMap); + // Note that the new binary operator is inserted immediately before + // the previous vector one return llvm::BinaryOperator::Create(bop->getOpcode(), v0, v1, - newName, insertBefore); + newName, bop); } llvm::CastInst *cast = llvm::dyn_cast(v); if (cast != NULL) { llvm::Value *v = lExtractFirstVectorElement(cast->getOperand(0), - insertBefore, phiMap); + phiMap); + // Similarly, the equivalent scalar cast instruction goes right + // before the vector cast return llvm::CastInst::Create(cast->getOpcode(), v, vt->getElementType(), newName, - insertBefore); + cast); } llvm::PHINode *phi = llvm::dyn_cast(v); @@ -1438,8 +1461,7 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore, // return the pointer and not get stuck in an infinite loop. // // The insertion point for the new phi node also has to be the - // start of the bblock of the original phi node, which isn't - // necessarily the same bblock as insertBefore is in! + // start of the bblock of the original phi node. llvm::Instruction *phiInsertPos = phi->getParent()->begin(); llvm::PHINode *scalarPhi = llvm::PHINode::Create(vt->getElementType(), @@ -1449,7 +1471,7 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore, for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) { llvm::Value *v = lExtractFirstVectorElement(phi->getIncomingValue(i), - insertBefore, phiMap); + phiMap); scalarPhi->addIncoming(v, phi->getIncomingBlock(i)); } @@ -1466,15 +1488,22 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore, } // Worst case, for everything else, just do a regular extract element - return llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt", - insertBefore); + // instruction, which we insert immediately after the instruction we + // have here. + llvm::Instruction *insertAfter = llvm::dyn_cast(v); + Assert(insertAfter != NULL); + llvm::Instruction *ee = + llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt", + (llvm::Instruction *)NULL); + ee->insertAfter(insertAfter); + return ee; } llvm::Value * -LLVMExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore) { +LLVMExtractFirstVectorElement(llvm::Value *v) { std::map phiMap; - llvm::Value *ret = lExtractFirstVectorElement(v, insertBefore, phiMap); + llvm::Value *ret = lExtractFirstVectorElement(v, phiMap); return ret; } diff --git a/llvmutil.h b/llvmutil.h index de50ae70..1990236d 100644 --- a/llvmutil.h +++ b/llvmutil.h @@ -274,8 +274,7 @@ extern void LLVMDumpValue(llvm::Value *v); worth of values just to extract the first element, in cases where only the first element's value is needed. */ -extern llvm::Value *LLVMExtractFirstVectorElement(llvm::Value *v, - llvm::Instruction *insertBefore); +extern llvm::Value *LLVMExtractFirstVectorElement(llvm::Value *v); /** This function takes two vectors, expected to be the same length, and returns a new vector of twice the length that represents concatenating diff --git a/opt.cpp b/opt.cpp index 5a7d1ee6..8e6162bd 100644 --- a/opt.cpp +++ b/opt.cpp @@ -2295,8 +2295,7 @@ struct GatherImpInfo { static llvm::Value * lComputeCommonPointer(llvm::Value *base, llvm::Value *offsets, llvm::Instruction *insertBefore) { - llvm::Value *firstOffset = LLVMExtractFirstVectorElement(offsets, - insertBefore); + llvm::Value *firstOffset = LLVMExtractFirstVectorElement(offsets); return lGEPInst(base, firstOffset, "ptr", insertBefore); } @@ -3290,8 +3289,7 @@ lComputeBasePtr(llvm::CallInst *gatherInst, llvm::Instruction *insertBefore) { // All of the variable offsets values should be the same, due to // checking for this in GatherCoalescePass::runOnBasicBlock(). Thus, // extract the first value and use that as a scalar. - llvm::Value *variable = LLVMExtractFirstVectorElement(variableOffsets, - insertBefore); + llvm::Value *variable = LLVMExtractFirstVectorElement(variableOffsets); if (variable->getType() == LLVMTypes::Int64Type) offsetScale = new llvm::ZExtInst(offsetScale, LLVMTypes::Int64Type, "scale_to64", insertBefore); From 49f1a5c2b38644405192ae31b236ed3c2f2d416b Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Apr 2012 10:32:55 -0700 Subject: [PATCH 083/173] Add print() statements to tests to indicate failure details. These tests all fail with generic-16/c++ output currently; however, the output indicates that it's just small floating-point differences. (Though the question remains, why are those differences popping up?) --- tests/half-3.ispc | 2 ++ tests/transcendentals-0-2.ispc | 7 ++++++- tests/transcendentals-1-3.ispc | 8 +++++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/half-3.ispc b/tests/half-3.ispc index 47de0eee..2c7b4096 100644 --- a/tests/half-3.ispc +++ b/tests/half-3.ispc @@ -10,6 +10,8 @@ export void f_v(uniform float RET[]) { h = float_to_half(f); int mismatches = (f == f && i != h); + if (any(mismatches != 0)) + print("mismatch: orig int16 % -> float % -> half %\n", i, f, h); errors += reduce_add(mismatches); } diff --git a/tests/transcendentals-0-2.ispc b/tests/transcendentals-0-2.ispc index ed75d71c..43ebca6a 100644 --- a/tests/transcendentals-0-2.ispc +++ b/tests/transcendentals-0-2.ispc @@ -13,7 +13,12 @@ static float float4(uniform float a, uniform float b, uniform float c, export uniform int width() { return programCount; } -bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-5; } +bool ok(float x, float ref) { + bool r = (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-5; + if (any(r == false)) + print("mismatch got %, expected %\n", x, ref); + return r; +} export void f_v(uniform float RET[]) { float v = float4((-9.424777984619141),(4.000000000000000),(10.000000000000000),(-10.000000000000000)); diff --git a/tests/transcendentals-1-3.ispc b/tests/transcendentals-1-3.ispc index cefa3547..f9cdc5fe 100644 --- a/tests/transcendentals-1-3.ispc +++ b/tests/transcendentals-1-3.ispc @@ -13,7 +13,13 @@ static float float4(uniform float a, uniform float b, uniform float c, export uniform int width() { return programCount; } -bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-5; } +bool ok(float x, float ref) { + bool r = (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-5; + if (any(r == false)) + print("mismatch got %, expected %\n", x, ref); + return r; +} + export void f_v(uniform float RET[]) { float v = float4((14.300000190734863),(-6.699999809265137),(-21.200000762939453),(9.000000000000000)); From 34d81fa522887343db9d2dfb969c84ca6fb2cbee Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Apr 2012 10:33:33 -0700 Subject: [PATCH 084/173] Fix bugs in tests. These two tests were walking past the end of the aFOO[] array, which in turn was leading to failures with the generic-16/c++ output path. --- tests/foreach-mask-1.ispc | 6 ++++-- tests/foreach-mask.ispc | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/foreach-mask-1.ispc b/tests/foreach-mask-1.ispc index 2f462b48..ee4b1b1e 100644 --- a/tests/foreach-mask-1.ispc +++ b/tests/foreach-mask-1.ispc @@ -10,8 +10,10 @@ export void f_f(uniform float RET[], uniform float aFOO[]) { // make sure we reset the func mask in the foreach loop... if ((int)aFOO[programIndex] & 1) - foreach (i = 0 ... programCount+3) - val[i] += aFOO[i] - 1; + foreach (i = 0 ... programCount+3) { + int ic = min(i, programCount-1); + val[i] += aFOO[ic] - 1 + i-ic; + } RET[programIndex] = val[3+programIndex]; } diff --git a/tests/foreach-mask.ispc b/tests/foreach-mask.ispc index 0d01b16a..f6000a71 100644 --- a/tests/foreach-mask.ispc +++ b/tests/foreach-mask.ispc @@ -5,8 +5,10 @@ export uniform int width() { return programCount; } // make sure we reset the func mask in the foreach loop... void update(uniform float val[], const uniform float a[]) { - foreach (i = 0 ... programCount+3) - val[i] += a[i] - 1; + foreach (i = 0 ... programCount+3) { + int ic = min(i, programCount-1); + val[i] += a[ic] - 1 + i-ic; + } } export void f_f(uniform float RET[], uniform float aFOO[]) { From cc26b66e99f2829d7a544c68284b6411c46cae3e Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Apr 2012 11:23:20 -0700 Subject: [PATCH 085/173] Improve source position reporting for scatters. Now, we only highlight the memory write--not both sides of the assignment expression. --- expr.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/expr.cpp b/expr.cpp index 81680874..6d6ad8bb 100644 --- a/expr.cpp +++ b/expr.cpp @@ -2663,6 +2663,7 @@ lEmitOpAssign(AssignExpr::Op op, Expr *arg0, Expr *arg1, const Type *type, } // And store the result back to the lvalue. + ctx->SetDebugPos(arg0->pos); lStoreAssignResult(newValue, lv, resultType, lvalueType, ctx, baseSym); return newValue; @@ -2707,7 +2708,7 @@ AssignExpr::GetValue(FunctionEmitContext *ctx) const { return NULL; } - ctx->SetDebugPos(pos); + ctx->SetDebugPos(lvalue->pos); lStoreAssignResult(value, ptr, valueType, ptrType, ctx, baseSym); From e4b3d03da53ea8bc177f046ec311285697155f0b Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Apr 2012 11:36:28 -0700 Subject: [PATCH 086/173] When available, use ANSI escapes to colorize diagnostic output. Issue #245. --- run_tests.py | 4 ++ util.cpp | 118 +++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 108 insertions(+), 14 deletions(-) diff --git a/run_tests.py b/run_tests.py index 724e1037..ce5e98f1 100755 --- a/run_tests.py +++ b/run_tests.py @@ -17,6 +17,10 @@ import shlex import platform import tempfile +# disable fancy error/warning printing with ANSI colors, so grepping for error +# messages doesn't get confused +os.environ["TERM"] = "dumb" + # This script is affected by http://bugs.python.org/issue5261 on OSX 10.5 Leopard # git history has a workaround for that issue. diff --git a/util.cpp b/util.cpp index 7057755b..92c1bac1 100644 --- a/util.cpp +++ b/util.cpp @@ -90,6 +90,49 @@ lTerminalWidth() { } +static bool +lHaveANSIColors() { + static bool r = (getenv("TERM") != NULL && + strcmp(getenv("TERM"), "dumb") != 0); + return r; +} + + +static const char * +lStartBold() { + if (lHaveANSIColors()) + return "\e[1m"; + else + return ""; +} + + +static const char * +lStartRed() { + if (lHaveANSIColors()) + return "\e[31m"; + else + return ""; +} + + +static const char * +lStartBlue() { + if (lHaveANSIColors()) + return "\e[34m"; + else + return ""; +} + + +static const char * +lResetColor() { + if (lHaveANSIColors()) + return "\e[0m"; + else + return ""; +} + /** Given a pointer into a string, find the end of the current word and return a pointer to its last character. */ @@ -140,17 +183,43 @@ lPrintFileLineContext(SourcePos p) { fclose(f); } + +/** Counts the number of characters into the buf at which the numColons + colon character is found. Skips over ANSI escape sequences and doesn't + include their characters in the final count. + */ +static int +lFindIndent(int numColons, const char *buf) { + int indent = 0; + while (*buf != '\0') { + if (*buf == '\e') { + while (*buf != '\0' && *buf != 'm') + ++buf; + if (*buf == 'm') + ++buf; + } + else { + if (*buf == ':') { + if (--numColons == 0) + break; + } + ++indent; + ++buf; + } + } + return indent + 2; +} + + /** Print the given string to the given FILE, assuming the given output column width. Break words as needed to avoid words spilling past the last column. */ static void -lPrintWithWordBreaks(const char *buf, int columnWidth, FILE *out) { +lPrintWithWordBreaks(const char *buf, int indent, int columnWidth, FILE *out) { #ifdef ISPC_IS_WINDOWS fputs(buf, out); #else int column = 0; - Assert(strchr(buf, ':') != NULL); - int indent = strchr(buf, ':') - buf + 2; int width = std::max(40, columnWidth - 2); // Collect everything into a string and print it all at once at the end @@ -160,6 +229,15 @@ lPrintWithWordBreaks(const char *buf, int columnWidth, FILE *out) { const char *msgPos = buf; while (true) { + if (*msgPos == '\e') { + // handle ANSI color escape: copy it to the output buffer + // without charging for the characters it uses + do { + outStr.push_back(*msgPos++); + } while (*msgPos != '\0' && *msgPos != 'm'); + continue; + } + while (*msgPos != '\0' && isspace(*msgPos)) ++msgPos; if (*msgPos == '\0') @@ -171,8 +249,8 @@ lPrintWithWordBreaks(const char *buf, int columnWidth, FILE *out) { column = indent; outStr.push_back('\n'); // Indent to the same column as the ":" at the start of the - // message, unless doing so would be too far in. - for (int i = 0; i < std::min(16, indent); ++i) + // message. + for (int i = 0; i < indent; ++i) outStr.push_back(' '); } @@ -225,26 +303,37 @@ asprintf(char **sptr, const char *fmt, ...) @param args Arguments with values for format string % entries */ static void -lPrint(const char *type, SourcePos p, const char *fmt, va_list args) { +lPrint(const char *type, bool isError, SourcePos p, const char *fmt, + va_list args) { char *errorBuf, *formattedBuf; if (vasprintf(&errorBuf, fmt, args) == -1) { fprintf(stderr, "vasprintf() unable to allocate memory!\n"); abort(); } + + int indent = 0; if (p.first_line == 0) { // We don't have a valid SourcePos, so create a message without it - if (asprintf(&formattedBuf, "%s: %s\n", type, errorBuf) == -1) { + if (asprintf(&formattedBuf, "%s%s%s%s%s: %s%s\n", lStartBold(), + isError ? lStartRed() : lStartBlue(), type, + lResetColor(), lStartBold(), errorBuf, + lResetColor()) == -1) { fprintf(stderr, "asprintf() unable to allocate memory!\n"); exit(1); } + indent = lFindIndent(1, formattedBuf); } else { // Create an error message that includes the file and line number - if (asprintf(&formattedBuf, "%s:%d:%d: %s: %s\n", p.name, - p.first_line, p.first_column, type, errorBuf) == -1) { + if (asprintf(&formattedBuf, "%s%s:%d:%d: %s%s%s%s: %s%s\n", + lStartBold(), p.name, p.first_line, p.first_column, + isError ? lStartRed() : lStartBlue(), type, + lResetColor(), lStartBold(), errorBuf, + lResetColor()) == -1) { fprintf(stderr, "asprintf() unable to allocate memory!\n"); exit(1); } + indent = lFindIndent(3, formattedBuf); } // Now that we've done all that work, see if we've already printed the @@ -255,7 +344,7 @@ lPrint(const char *type, SourcePos p, const char *fmt, va_list args) { return; printed.insert(formattedBuf); - lPrintWithWordBreaks(formattedBuf, lTerminalWidth(), stderr); + lPrintWithWordBreaks(formattedBuf, indent, lTerminalWidth(), stderr); lPrintFileLineContext(p); free(errorBuf); @@ -271,7 +360,7 @@ Error(SourcePos p, const char *fmt, ...) { va_list args; va_start(args, fmt); - lPrint("Error", p, fmt, args); + lPrint("Error", true, p, fmt, args); va_end(args); } @@ -283,7 +372,7 @@ Debug(SourcePos p, const char *fmt, ...) { va_list args; va_start(args, fmt); - lPrint("Debug", p, fmt, args); + lPrint("Debug", false, p, fmt, args); va_end(args); } @@ -298,7 +387,8 @@ Warning(SourcePos p, const char *fmt, ...) { va_list args; va_start(args, fmt); - lPrint(g->warningsAsErrors ? "Error" : "Warning", p, fmt, args); + lPrint(g->warningsAsErrors ? "Error" : "Warning", g->warningsAsErrors, + p, fmt, args); va_end(args); } @@ -311,7 +401,7 @@ PerformanceWarning(SourcePos p, const char *fmt, ...) { va_list args; va_start(args, fmt); - lPrint("Performance Warning", p, fmt, args); + lPrint("Performance Warning", false, p, fmt, args); va_end(args); } From 12c754c92b7fe9d3e005c3ff278fb0655c83242d Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Apr 2012 13:11:15 -0700 Subject: [PATCH 087/173] Improved handling of splatted constant vectors in C++ backend. Now, when we're printing out a constant vector value, we check to see if it's a splat and call out to one of the __splat_* functions in the generated code if to. --- cbackend.cpp | 92 ++++++++++++++++++++++---------- examples/intrinsics/generic-16.h | 5 ++ examples/intrinsics/sse4.h | 4 ++ 3 files changed, 73 insertions(+), 28 deletions(-) diff --git a/cbackend.cpp b/cbackend.cpp index 0c582ce0..ebe96c45 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -1096,6 +1096,26 @@ bool CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) { return false; } + +// FIXME: generalize this/make it not so hard-coded? +static const char *lGetSmearFunc(Type *matchType) { + switch (matchType->getTypeID()) { + case Type::FloatTyID: return "__smear_float"; + case Type::DoubleTyID: return "__smear_double"; + case Type::IntegerTyID: { + switch (cast(matchType)->getBitWidth()) { + case 1: return "__smear_i1"; + case 8: return "__smear_i8"; + case 16: return "__smear_i16"; + case 32: return "__smear_i32"; + case 64: return "__smear_i64"; + } + } + default: return NULL; + } +} + + // printConstant - The LLVM Constant to C Constant converter. void CWriter::printConstant(Constant *CPV, bool Static) { if (const ConstantExpr *CE = dyn_cast(CPV)) { @@ -1435,30 +1455,61 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << ")"; break; } - case Type::VectorTyID: - printType(Out, CPV->getType()); - Out << "("; + case Type::VectorTyID: { + VectorType *VT = dyn_cast(CPV->getType()); + const char *smearFunc = lGetSmearFunc(VT->getElementType()); - if (ConstantVector *CV = dyn_cast(CPV)) { - printConstantVector(CV, Static); + if (isa(CPV)) { + assert(smearFunc != NULL); + + Constant *CZ = Constant::getNullValue(VT->getElementType()); + Out << smearFunc << "("; + printConstant(CZ, Static); + Out << ")"; + } + else if (ConstantVector *CV = dyn_cast(CPV)) { + llvm::Constant *splatValue = CV->getSplatValue(); + if (splatValue != NULL && smearFunc != NULL) { + Out << smearFunc << "("; + printConstant(splatValue, Static); + Out << ")"; + } + else { + printType(Out, CPV->getType()); + Out << "("; + printConstantVector(CV, Static); + Out << ")"; + } + } #ifdef LLVM_3_1svn - } else if (ConstantDataSequential *CDS = - dyn_cast(CPV)) { - printConstantDataSequential(CDS, Static); + else if (ConstantDataVector *CDV = dyn_cast(CPV)) { + llvm::Constant *splatValue = CDV->getSplatValue(); + if (splatValue != NULL && smearFunc != NULL) { + Out << smearFunc << "("; + printConstant(splatValue, Static); + Out << ")"; + } + else { + printType(Out, CPV->getType()); + Out << "("; + printConstantDataSequential(CDV, Static); + Out << ")"; + } #endif } else { - assert(isa(CPV) || isa(CPV)); - VectorType *VT = cast(CPV->getType()); + assert(isa(CPV)); Constant *CZ = Constant::getNullValue(VT->getElementType()); + printType(Out, CPV->getType()); + Out << "("; printConstant(CZ, Static); for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) { Out << ", "; printConstant(CZ, Static); } + Out << ")"; } - Out << ")"; break; - + } case Type::StructTyID: if (!Static) { // call init func... @@ -4327,23 +4378,8 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { assert(toMatch != NULL); { - // FIXME: generalize this/make it not so hard-coded? Type *matchType = toMatch->getType(); - const char *smearFuncName = NULL; - - switch (matchType->getTypeID()) { - case Type::FloatTyID: smearFuncName = "__smear_float"; break; - case Type::DoubleTyID: smearFuncName = "__smear_double"; break; - case Type::IntegerTyID: { - switch (cast(matchType)->getBitWidth()) { - case 8: smearFuncName = "__smear_i8"; break; - case 16: smearFuncName = "__smear_i16"; break; - case 32: smearFuncName = "__smear_i32"; break; - case 64: smearFuncName = "__smear_i64"; break; - } - } - default: break; - } + const char *smearFuncName = lGetSmearFunc(matchType); if (smearFuncName != NULL) { Function *smearFunc = module->getFunction(smearFuncName); diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index 861db2a4..d6a5c121 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -374,6 +374,11 @@ static FORCEINLINE void __store(__vec16_i1 *p, __vec16_i1 v, int align) { *ptr = v.v; } +static FORCEINLINE __vec16_i1 __smear_i1(int v) { + return __vec16_i1(v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v); +} + /////////////////////////////////////////////////////////////////////////// // int8 diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index c6299893..48a67719 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -266,6 +266,10 @@ static FORCEINLINE void __store(__vec4_i1 *p, __vec4_i1 value, int align) { _mm_storeu_ps((float *)(&p->v), value.v); } +static FORCEINLINE __vec4_i1 __smear_i1(int v) { + return __vec4_i1(v, v, v, v); +} + /////////////////////////////////////////////////////////////////////////// // int8 From cb9f50ef636529f9670104232c5b83216632d0c8 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Apr 2012 13:11:47 -0700 Subject: [PATCH 088/173] C++ backend: mangle variable names less. This makes the generated code a little easier to connect with the original program. --- cbackend.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cbackend.cpp b/cbackend.cpp index ebe96c45..becf3f48 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -1702,7 +1702,12 @@ std::string CWriter::GetValueName(const Value *Operand) { VarName += ch; } - return VarName + "_llvm_cbe"; + if (isa(Operand)) + VarName += "_label"; + else + VarName += "_"; + + return VarName; } /// writeInstComputationInline - Emit the computation for the specified From 71bdc67a45e83b3f82092e3895a7399f23f96a70 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Apr 2012 16:24:40 -0700 Subject: [PATCH 089/173] Add LLVMGetName() utility routines. Infrastructure for issue #244. --- llvmutil.cpp | 21 +++++++++++++++++++++ llvmutil.h | 6 ++++++ 2 files changed, 27 insertions(+) diff --git a/llvmutil.cpp b/llvmutil.cpp index d438af45..f7fa041f 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -1552,3 +1552,24 @@ LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2, int32_t shuf[], return new llvm::ShuffleVectorInst(v1, v2, vec, "shuffle", insertBefore); } + + +const char * +LLVMGetName(llvm::Value *v, const char *s) { + if (v == NULL) return s; + std::string ret = v->getName(); + ret += s; + return strdup(ret.c_str()); +} + + +const char * +LLVMGetName(const char *op, llvm::Value *v1, llvm::Value *v2) { + std::string r = op; + r += "_"; + r += v1->getName().str(); + r += "_"; + r += v2->getName().str(); + return strdup(r.c_str()); +} + diff --git a/llvmutil.h b/llvmutil.h index 1990236d..ba8bc16d 100644 --- a/llvmutil.h +++ b/llvmutil.h @@ -290,4 +290,10 @@ extern llvm::Value *LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2, int32_t shuf[], int shufSize, llvm::Instruction *insertBefore); +/** Utility routines to concat strings with the names of existing values to + create meaningful new names for instruction values. +*/ +extern const char *LLVMGetName(llvm::Value *v, const char *); +extern const char *LLVMGetName(const char *op, llvm::Value *v1, llvm::Value *v2); + #endif // ISPC_LLVMUTIL_H From 32815e628d1ff442a907d548ab5e5f9ffd667a46 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Apr 2012 16:36:46 -0700 Subject: [PATCH 090/173] Improve naming of llvm Instructions created. We now try harder to keep the names of instructions related to the initial names of variables they're derived from and so forth. This is useful for making both LLVM IR as well as generated C++ code easier to correlate back to the original ispc source code. Issue #244. --- cbackend.cpp | 3 +- ctx.cpp | 174 +++++++++++++++++++++++--------------- expr.cpp | 233 +++++++++++++++++++++++++++++++-------------------- opt.cpp | 53 +++++++----- 4 files changed, 279 insertions(+), 184 deletions(-) diff --git a/cbackend.cpp b/cbackend.cpp index becf3f48..7adefb40 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -4402,7 +4402,8 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { Value *args[1] = { toMatch }; ArrayRef argArray(&args[0], &args[1]); Instruction *smearCall = - CallInst::Create(smearFunc, argArray, "smear", (Instruction *)NULL); + CallInst::Create(smearFunc, argArray, LLVMGetName(toMatch, "_smear"), + (Instruction *)NULL); ReplaceInstWithInst(iter, smearCall); diff --git a/ctx.cpp b/ctx.cpp index c76ec1b8..de61ffcb 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1236,7 +1236,7 @@ llvm::Value * FunctionEmitContext::Any(llvm::Value *mask) { llvm::Value *mmval = LaneMask(mask); return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, mmval, - LLVMInt32(0), "any_mm_cmp"); + LLVMInt32(0), LLVMGetName(mask, "_any")); } @@ -1244,7 +1244,8 @@ llvm::Value * FunctionEmitContext::All(llvm::Value *mask) { llvm::Value *mmval = LaneMask(mask); return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mmval, - LLVMInt32((1<target.vectorWidth)-1), "all_mm_cmp"); + LLVMInt32((1<target.vectorWidth)-1), + LLVMGetName(mask, "_all")); } @@ -1252,7 +1253,7 @@ llvm::Value * FunctionEmitContext::None(llvm::Value *mask) { llvm::Value *mmval = LaneMask(mask); return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mmval, - LLVMInt32(0), "none_mm_cmp"); + LLVMInt32(0), LLVMGetName(mask, "_none")); } @@ -1270,7 +1271,7 @@ FunctionEmitContext::LaneMask(llvm::Value *v) { // We can actually call either one, since both are i32s as far as // LLVM's type system is concerned... llvm::Function *fmm = mm[0]->function; - return CallInst(fmm, NULL, v, "val_movmsk"); + return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk")); } @@ -1288,7 +1289,7 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) { llvm::Value *mm1 = LaneMask(v1); llvm::Value *mm2 = LaneMask(v2); return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2, - "v1==v2"); + LLVMGetName("equal", v1, v2)); #endif } @@ -1337,7 +1338,7 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) { for (unsigned int i = 0; i < at->getNumElements(); ++i) { llvm::Value *elt = ExtractInst(b, i); llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType, - "val_to_boolvec32"); + LLVMGetName(elt, "_to_boolvec32")); ret = InsertInst(ret, sext, i); } return ret; @@ -1664,16 +1665,17 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) { llvm::Value * -FunctionEmitContext::BitCastInst(llvm::Value *value, - llvm::Type *type, +FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); return NULL; } - llvm::Instruction *inst = - new llvm::BitCastInst(value, type, name ? name : "bitcast", bblock); + if (name == NULL) + name = LLVMGetName(value, "_bitcast"); + + llvm::Instruction *inst = new llvm::BitCastInst(value, type, name, bblock); AddDebugPos(inst); return inst; } @@ -1690,23 +1692,26 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) { // no-op for varying pointers; they're already vectors of ints return value; + if (name == NULL) + name = LLVMGetName(value, "_ptr2int"); llvm::Type *type = LLVMTypes::PointerIntType; - llvm::Instruction *inst = - new llvm::PtrToIntInst(value, type, name ? name : "ptr2int", bblock); + llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock); AddDebugPos(inst); return inst; } llvm::Value * -FunctionEmitContext::PtrToIntInst(llvm::Value *value, - llvm::Type *toType, +FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); return NULL; } + if (name == NULL) + name = LLVMGetName(value, "_ptr2int"); + llvm::Type *fromType = value->getType(); if (llvm::isa(fromType)) { // varying pointer @@ -1714,30 +1719,31 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, // already the right type--done return value; else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits()) - return TruncInst(value, toType, "ptr_to_int"); + return TruncInst(value, toType, name); else { Assert(fromType->getScalarSizeInBits() < toType->getScalarSizeInBits()); - return ZExtInst(value, toType, "ptr_to_int"); + return ZExtInst(value, toType, name); } } - llvm::Instruction *inst = - new llvm::PtrToIntInst(value, toType, name ? name : "ptr2int", bblock); + llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock); AddDebugPos(inst); return inst; } llvm::Value * -FunctionEmitContext::IntToPtrInst(llvm::Value *value, - llvm::Type *toType, +FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType, const char *name) { if (value == NULL) { Assert(m->errorCount > 0); return NULL; } + if (name == NULL) + name = LLVMGetName(value, "_int2ptr"); + llvm::Type *fromType = value->getType(); if (llvm::isa(fromType)) { // varying pointer @@ -1745,16 +1751,16 @@ FunctionEmitContext::IntToPtrInst(llvm::Value *value, // done return value; else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits()) - return TruncInst(value, toType, "int_to_ptr"); + return TruncInst(value, toType, name); else { Assert(fromType->getScalarSizeInBits() < toType->getScalarSizeInBits()); - return ZExtInst(value, toType, "int_to_ptr"); + return ZExtInst(value, toType, name); } } - llvm::Instruction *inst = - new llvm::IntToPtrInst(value, toType, name ? name : "int2ptr", bblock); + llvm::Instruction *inst = new llvm::IntToPtrInst(value, toType, name, + bblock); AddDebugPos(inst); return inst; } @@ -1768,10 +1774,12 @@ FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type, return NULL; } + if (name == NULL) + name = LLVMGetName(value, "_trunc"); + // TODO: we should probably handle the array case as in // e.g. BitCastInst(), but we don't currently need that functionality - llvm::Instruction *inst = - new llvm::TruncInst(value, type, name ? name : "trunc", bblock); + llvm::Instruction *inst = new llvm::TruncInst(value, type, name, bblock); AddDebugPos(inst); return inst; } @@ -1785,10 +1793,13 @@ FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value, return NULL; } + if (name == NULL) + name = LLVMGetName(value, "_cast"); + // TODO: we should probably handle the array case as in // e.g. BitCastInst(), but we don't currently need that functionality - llvm::Instruction *inst = - llvm::CastInst::Create(op, value, type, name ? name : "cast", bblock); + llvm::Instruction *inst = llvm::CastInst::Create(op, value, type, name, + bblock); AddDebugPos(inst); return inst; } @@ -1802,10 +1813,12 @@ FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type, return NULL; } + if (name == NULL) + name = LLVMGetName(value, "_cast"); + // TODO: we should probably handle the array case as in // e.g. BitCastInst(), but we don't currently need that functionality - llvm::Instruction *inst = - llvm::CastInst::CreateFPCast(value, type, name ? name : "fpcast", bblock); + llvm::Instruction *inst = llvm::CastInst::CreateFPCast(value, type, name, bblock); AddDebugPos(inst); return inst; } @@ -1819,10 +1832,12 @@ FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type, return NULL; } + if (name == NULL) + name = LLVMGetName(value, "_sext"); + // TODO: we should probably handle the array case as in // e.g. BitCastInst(), but we don't currently need that functionality - llvm::Instruction *inst = - new llvm::SExtInst(value, type, name ? name : "sext", bblock); + llvm::Instruction *inst = new llvm::SExtInst(value, type, name, bblock); AddDebugPos(inst); return inst; } @@ -1836,10 +1851,12 @@ FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type, return NULL; } + if (name == NULL) + name = LLVMGetName(value, "_zext"); + // TODO: we should probably handle the array case as in // e.g. BitCastInst(), but we don't currently need that functionality - llvm::Instruction *inst = - new llvm::ZExtInst(value, type, name ? name : "zext", bblock); + llvm::Instruction *inst = new llvm::ZExtInst(value, type, name, bblock); AddDebugPos(inst); return inst; } @@ -1867,50 +1884,52 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, // 64-bit type. if ((g->target.is32Bit || g->opt.force32BitAddressing) && index->getType() == LLVMTypes::Int64Type) - index = TruncInst(index, LLVMTypes::Int32Type, "trunc_index"); + index = TruncInst(index, LLVMTypes::Int32Type); else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) && index->getType() == LLVMTypes::Int32Type) - index = SExtInst(index, LLVMTypes::Int64Type, "sext_index"); + index = SExtInst(index, LLVMTypes::Int64Type); // do a scalar multiply to get the offset as index * scale and then // smear the result out to be a vector; this is more efficient than // first promoting both the scale and the index to vectors and then // multiplying. offset = BinaryOperator(llvm::Instruction::Mul, scale, index); - offset = SmearUniform(offset, "offset_smear"); + offset = SmearUniform(offset); } else { // Similarly, truncate or sign extend the index to be a 32 or 64 // bit vector type if ((g->target.is32Bit || g->opt.force32BitAddressing) && index->getType() == LLVMTypes::Int64VectorType) - index = TruncInst(index, LLVMTypes::Int32VectorType, "trunc_index"); + index = TruncInst(index, LLVMTypes::Int32VectorType); else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) && index->getType() == LLVMTypes::Int32VectorType) - index = SExtInst(index, LLVMTypes::Int64VectorType, "sext_index"); + index = SExtInst(index, LLVMTypes::Int64VectorType); - scale = SmearUniform(scale, "scale_smear"); + scale = SmearUniform(scale); // offset = index * scale - offset = BinaryOperator(llvm::Instruction::Mul, scale, index, "offset"); + offset = BinaryOperator(llvm::Instruction::Mul, scale, index, + LLVMGetName("mul", scale, index)); } // For 64-bit targets, if we've been doing our offset calculations in // 32 bits, we still have to convert to a 64-bit value before we // actually add the offset to the pointer. if (g->target.is32Bit == false && g->opt.force32BitAddressing == true) - offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64"); + offset = SExtInst(offset, LLVMTypes::Int64VectorType, + LLVMGetName(offset, "_to_64")); // Smear out the pointer to be varying; either the base pointer or the // index must be varying for this method to be called. bool baseIsUniform = (llvm::isa(basePtr->getType())); Assert(baseIsUniform == false || indexIsVarying == true); - llvm::Value *varyingPtr = baseIsUniform ? - SmearUniform(basePtr, "ptr_smear") : basePtr; + llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr) : basePtr; // newPtr = ptr + offset - return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset, "new_ptr"); + return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset, + LLVMGetName(basePtr, "_offset")); } @@ -1999,8 +2018,8 @@ FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) { llvm::StructType::get(*g->ctx, eltTypes); llvm::Value *ret = llvm::UndefValue::get(st); - ret = InsertInst(ret, ptr, 0); - ret = InsertInst(ret, offset, 1); + ret = InsertInst(ret, ptr, 0, LLVMGetName(ret, "_slice_ptr")); + ret = InsertInst(ret, offset, 1, LLVMGetName(ret, "_slice_offset")); return ret; } @@ -2267,6 +2286,9 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) { llvm::dyn_cast(ptr->getType()); Assert(pt != NULL); + if (name == NULL) + name = LLVMGetName(ptr, "_load"); + // FIXME: it's not clear to me that we generate unaligned vector loads // of varying stuff out of the front-end any more. (Only by the // optimization passes that lower gathers to vector loads, I think..) @@ -2274,7 +2296,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) { int align = 0; if (llvm::isa(pt->getElementType())) align = 1; - llvm::Instruction *inst = new llvm::LoadInst(ptr, name ? name : "load", + llvm::Instruction *inst = new llvm::LoadInst(ptr, name, false /* not volatile */, align, bblock); AddDebugPos(inst); @@ -2291,8 +2313,8 @@ lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType **ptrType) { Assert(dynamic_cast(*ptrType) != NULL); - llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, "slice_ptr"); - llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, "slice_offset"); + llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, LLVMGetName(ptr, "_ptr")); + llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, LLVMGetName(ptr, "_offset")); // slicePtr should be a pointer to an soa-width wide array of the // final atomic/enum/pointer type @@ -2313,7 +2335,7 @@ lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, // And finally index based on the slice offset return ctx->GetElementPtrInst(slicePtr, sliceOffset, *ptrType, - "final_slice_gep"); + LLVMGetName(slicePtr, "_final_gep")); } @@ -2365,6 +2387,9 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, Assert(ptrRefType != NULL && mask != NULL); + if (name == NULL) + name = LLVMGetName(ptr, "_load"); + const PointerType *ptrType; if (dynamic_cast(ptrRefType) != NULL) ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); @@ -2394,7 +2419,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, // it's totally unaligned. (This shouldn't make any difference // vs the proper alignment in practice.) align = 1; - llvm::Instruction *inst = new llvm::LoadInst(ptr, name ? name : "load", + llvm::Instruction *inst = new llvm::LoadInst(ptr, name, false /* not volatile */, align, bblock); AddDebugPos(inst); @@ -2487,7 +2512,7 @@ FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, if (disableGSWarningCount == 0) addGSMetadata(call, currentPos); - return BitCastInst(call, llvmReturnType, "gather_bitcast"); + return BitCastInst(call, llvmReturnType, LLVMGetName(call, "_gather_bitcast")); } @@ -2652,9 +2677,9 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, Type::Equal(valueType, AtomicType::VaryingUInt64)) { maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_64"); ptr = BitCastInst(ptr, LLVMTypes::Int64VectorPointerType, - "ptr_to_int64vecptr"); + LLVMGetName(ptr, "_to_int64vecptr")); value = BitCastInst(value, LLVMTypes::Int64VectorType, - "value_to_int64"); + LLVMGetName(value, "_to_int64")); } else if (Type::Equal(valueType, AtomicType::VaryingFloat) || Type::Equal(valueType, AtomicType::VaryingBool) || @@ -2663,22 +2688,22 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, dynamic_cast(valueType) != NULL) { maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_32"); ptr = BitCastInst(ptr, LLVMTypes::Int32VectorPointerType, - "ptr_to_int32vecptr"); + LLVMGetName(ptr, "_to_int32vecptr")); if (Type::Equal(valueType, AtomicType::VaryingFloat)) value = BitCastInst(value, LLVMTypes::Int32VectorType, - "value_to_int32"); + LLVMGetName(value, "_to_int32")); } else if (Type::Equal(valueType, AtomicType::VaryingInt16) || Type::Equal(valueType, AtomicType::VaryingUInt16)) { maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_16"); ptr = BitCastInst(ptr, LLVMTypes::Int16VectorPointerType, - "ptr_to_int16vecptr"); + LLVMGetName(ptr, "_to_int16vecptr")); } else if (Type::Equal(valueType, AtomicType::VaryingInt8) || Type::Equal(valueType, AtomicType::VaryingUInt8)) { maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_8"); ptr = BitCastInst(ptr, LLVMTypes::Int8VectorPointerType, - "ptr_to_int8vecptr"); + LLVMGetName(ptr, "_to_int8vecptr")); } Assert(maskedStoreFunc != NULL); @@ -2964,13 +2989,17 @@ FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) { return NULL; } + if (name == NULL) { + char buf[32]; + sprintf(buf, "_extract_%d", elt); + name = LLVMGetName(v, buf); + } + llvm::Instruction *ei = NULL; if (llvm::isa(v->getType())) - ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), - name ? name : "extract", bblock); + ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), name, bblock); else - ei = llvm::ExtractValueInst::Create(v, elt, name ? name : "extract", - bblock); + ei = llvm::ExtractValueInst::Create(v, elt, name, bblock); AddDebugPos(ei); return ei; } @@ -2984,13 +3013,18 @@ FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, return NULL; } + if (name == NULL) { + char buf[32]; + sprintf(buf, "_insert_%d", elt); + name = LLVMGetName(v, buf); + } + llvm::Instruction *ii = NULL; if (llvm::isa(v->getType())) ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt), - name ? name : "insert", bblock); + name, bblock); else - ii = llvm::InsertValueInst::Create(v, eltVal, elt, - name ? name : "insert", bblock); + ii = llvm::InsertValueInst::Create(v, eltVal, elt, name, bblock); AddDebugPos(ii); return ii; } @@ -3014,9 +3048,11 @@ FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0, return NULL; } - llvm::Instruction *inst = - llvm::SelectInst::Create(test, val0, val1, name ? name : "select", - bblock); + if (name == NULL) + name = LLVMGetName(test, "_select"); + + llvm::Instruction *inst = llvm::SelectInst::Create(test, val0, val1, name, + bblock); AddDebugPos(inst); return inst; } diff --git a/expr.cpp b/expr.cpp index 6d6ad8bb..01339d03 100644 --- a/expr.cpp +++ b/expr.cpp @@ -68,6 +68,7 @@ #include #include + ///////////////////////////////////////////////////////////////////////////////////// // Expr @@ -1029,20 +1030,26 @@ lEmitPrePostIncDec(UnaryExpr::Op op, Expr *expr, SourcePos pos, llvm::Value *binop = NULL; int delta = (op == UnaryExpr::PreInc || op == UnaryExpr::PostInc) ? 1 : -1; + std::string opName = rvalue->getName().str(); + if (op == UnaryExpr::PreInc || op == UnaryExpr::PostInc) + opName += "_plus1"; + else + opName += "_minus1"; + if (dynamic_cast(type) != NULL) { const Type *incType = type->IsUniformType() ? AtomicType::UniformInt32 : AtomicType::VaryingInt32; llvm::Constant *dval = lLLVMConstantValue(incType, g->ctx, delta); - binop = ctx->GetElementPtrInst(rvalue, dval, type, "ptr_inc_or_dec"); + binop = ctx->GetElementPtrInst(rvalue, dval, type, opName.c_str()); } else { llvm::Constant *dval = lLLVMConstantValue(type, g->ctx, delta); if (type->IsFloatType()) binop = ctx->BinaryOperator(llvm::Instruction::FAdd, rvalue, - dval, "val_inc_or_dec"); + dval, opName.c_str()); else binop = ctx->BinaryOperator(llvm::Instruction::Add, rvalue, - dval, "val_inc_or_dec"); + dval, opName.c_str()); } // And store the result out to the lvalue @@ -1071,11 +1078,11 @@ lEmitNegate(Expr *arg, SourcePos pos, FunctionEmitContext *ctx) { ctx->SetDebugPos(pos); if (type->IsFloatType()) return ctx->BinaryOperator(llvm::Instruction::FSub, zero, argVal, - "fnegate"); + LLVMGetName(argVal, "_negate")); else { Assert(type->IsIntType()); return ctx->BinaryOperator(llvm::Instruction::Sub, zero, argVal, - "inegate"); + LLVMGetName(argVal, "_negate")); } } @@ -1103,11 +1110,11 @@ UnaryExpr::GetValue(FunctionEmitContext *ctx) const { return lEmitNegate(expr, pos, ctx); case LogicalNot: { llvm::Value *argVal = expr->GetValue(ctx); - return ctx->NotOperator(argVal, "logicalnot"); + return ctx->NotOperator(argVal, LLVMGetName(argVal, "_logicalnot")); } case BitNot: { llvm::Value *argVal = expr->GetValue(ctx); - return ctx->NotOperator(argVal, "bitnot"); + return ctx->NotOperator(argVal, LLVMGetName(argVal, "_bitnot")); } default: FATAL("logic error"); @@ -1506,17 +1513,22 @@ lEmitBinaryArith(BinaryExpr::Op op, llvm::Value *value0, llvm::Value *value1, bool isFloatOp = type0->IsFloatType(); bool isUnsignedOp = type0->IsUnsignedType(); + const char *opName = NULL; switch (op) { case BinaryExpr::Add: + opName = "add"; inst = isFloatOp ? llvm::Instruction::FAdd : llvm::Instruction::Add; break; case BinaryExpr::Sub: + opName = "sub"; inst = isFloatOp ? llvm::Instruction::FSub : llvm::Instruction::Sub; break; case BinaryExpr::Mul: + opName = "mul"; inst = isFloatOp ? llvm::Instruction::FMul : llvm::Instruction::Mul; break; case BinaryExpr::Div: + opName = "div"; if (type0->IsVaryingType() && !isFloatOp) PerformanceWarning(pos, "Division with varying integer types is " "very inefficient."); @@ -1524,6 +1536,7 @@ lEmitBinaryArith(BinaryExpr::Op op, llvm::Value *value0, llvm::Value *value1, (isUnsignedOp ? llvm::Instruction::UDiv : llvm::Instruction::SDiv); break; case BinaryExpr::Mod: + opName = "mod"; if (type0->IsVaryingType() && !isFloatOp) PerformanceWarning(pos, "Modulus operator with varying types is " "very inefficient."); @@ -1535,7 +1548,7 @@ lEmitBinaryArith(BinaryExpr::Op op, llvm::Value *value0, llvm::Value *value1, return NULL; } - return ctx->BinaryOperator(inst, value0, value1, "binop"); + return ctx->BinaryOperator(inst, value0, value1, LLVMGetName(opName, value0, value1)); } } @@ -1550,27 +1563,34 @@ lEmitBinaryCmp(BinaryExpr::Op op, llvm::Value *e0Val, llvm::Value *e1Val, bool isUnsignedOp = type->IsUnsignedType(); llvm::CmpInst::Predicate pred; + const char *opName = NULL; switch (op) { case BinaryExpr::Lt: + opName = "less"; pred = isFloatOp ? llvm::CmpInst::FCMP_OLT : (isUnsignedOp ? llvm::CmpInst::ICMP_ULT : llvm::CmpInst::ICMP_SLT); break; case BinaryExpr::Gt: + opName = "greater"; pred = isFloatOp ? llvm::CmpInst::FCMP_OGT : (isUnsignedOp ? llvm::CmpInst::ICMP_UGT : llvm::CmpInst::ICMP_SGT); break; case BinaryExpr::Le: + opName = "lessequal"; pred = isFloatOp ? llvm::CmpInst::FCMP_OLE : (isUnsignedOp ? llvm::CmpInst::ICMP_ULE : llvm::CmpInst::ICMP_SLE); break; case BinaryExpr::Ge: + opName = "greaterequal"; pred = isFloatOp ? llvm::CmpInst::FCMP_OGE : (isUnsignedOp ? llvm::CmpInst::ICMP_UGE : llvm::CmpInst::ICMP_SGE); break; case BinaryExpr::Equal: + opName = "equal"; pred = isFloatOp ? llvm::CmpInst::FCMP_OEQ : llvm::CmpInst::ICMP_EQ; break; case BinaryExpr::NotEqual: + opName = "notequal"; pred = isFloatOp ? llvm::CmpInst::FCMP_ONE : llvm::CmpInst::ICMP_NE; break; default: @@ -1580,7 +1600,8 @@ lEmitBinaryCmp(BinaryExpr::Op op, llvm::Value *e0Val, llvm::Value *e1Val, llvm::Value *cmp = ctx->CmpInst(isFloatOp ? llvm::Instruction::FCmp : llvm::Instruction::ICmp, - pred, e0Val, e1Val, "bincmp"); + pred, e0Val, e1Val, + LLVMGetName(opName, e0Val, e1Val)); // This is a little ugly: CmpInst returns i1 values, but we use vectors // of i32s for varying bool values; type convert the result here if // needed. @@ -2618,7 +2639,7 @@ lEmitOpAssign(AssignExpr::Op op, Expr *arg0, Expr *arg1, const Type *type, llvm::Value *rvalue = arg1->GetValue(ctx); ctx->SetDebugPos(pos); llvm::Value *mask = lMaskForSymbol(baseSym, ctx); - llvm::Value *oldLHS = ctx->LoadInst(lv, mask, lvalueType, "opassign_load"); + llvm::Value *oldLHS = ctx->LoadInst(lv, mask, lvalueType); // Map the operator to the corresponding BinaryExpr::Op operator BinaryExpr::Op basicop; @@ -3955,7 +3976,7 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { } ctx->SetDebugPos(pos); - return ctx->LoadInst(ptr, mask, lvalueType, "index"); + return ctx->LoadInst(ptr, mask, lvalueType); } @@ -4026,7 +4047,7 @@ lConvertToSlicePointer(FunctionEmitContext *ctx, llvm::Value *ptr, // offsets llvm::Value *result = llvm::Constant::getNullValue(sliceStructType); // And replace the pointer in the struct with the given pointer - return ctx->InsertInst(result, ptr, 0); + return ctx->InsertInst(result, ptr, 0, LLVMGetName(ptr, "_slice")); } @@ -4117,7 +4138,8 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const { &baseExprType); llvm::Value *ptr = ctx->GetElementPtrInst(basePtrValue, indexValue, - baseExprType, "ptr_offset"); + baseExprType, + LLVMGetName(basePtrValue, "_offset")); return lAddVaryingOffsetsIfNeeded(ctx, ptr, GetLValueType()); } @@ -4153,7 +4175,7 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const { // And do the actual indexing calculation.. llvm::Value *ptr = ctx->GetElementPtrInst(basePtr, LLVMInt32(0), indexValue, - basePtrType); + basePtrType, LLVMGetName(basePtr, "_offset")); return lAddVaryingOffsetsIfNeeded(ctx, ptr, GetLValueType()); } @@ -4643,8 +4665,8 @@ VectorMemberExpr::GetValue(FunctionEmitContext *ctx) const { // Allocate temporary memory to tore the result llvm::Value *resultPtr = ctx->AllocaInst(memberType->LLVMType(g->ctx), - "vector_tmp"); - + "vector_tmp"); + // FIXME: we should be able to use the internal mask here according // to the same logic where it's used elsewhere llvm::Value *elementMask = ctx->GetFullMask(); @@ -4655,17 +4677,19 @@ VectorMemberExpr::GetValue(FunctionEmitContext *ctx) const { ctx->SetDebugPos(pos); for (size_t i = 0; i < identifier.size(); ++i) { + char idStr[2] = { identifier[i], '\0' }; llvm::Value *elementPtr = ctx->AddElementOffset(basePtr, indices[i], - basePtrType); + basePtrType, + LLVMGetName(basePtr, idStr)); llvm::Value *elementValue = - ctx->LoadInst(elementPtr, elementMask, elementPtrType, - "vec_element"); + ctx->LoadInst(elementPtr, elementMask, elementPtrType); - llvm::Value *ptmp = ctx->AddElementOffset(resultPtr, i, NULL); + const char *resultName = LLVMGetName(resultPtr, idStr); + llvm::Value *ptmp = ctx->AddElementOffset(resultPtr, i, NULL, resultName); ctx->StoreInst(elementValue, ptmp); } - return ctx->LoadInst(resultPtr, "swizzle_vec"); + return ctx->LoadInst(resultPtr, LLVMGetName(basePtr, "_swizzle")); } } @@ -4799,7 +4823,9 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { } ctx->SetDebugPos(pos); - return ctx->LoadInst(lvalue, mask, lvalueType, "structelement"); + std::string suffix = std::string("_") + identifier; + return ctx->LoadInst(lvalue, mask, lvalueType, + LLVMGetName(lvalue, suffix.c_str())); } @@ -4841,7 +4867,8 @@ MemberExpr::GetLValue(FunctionEmitContext *ctx) const { expr->GetLValueType(); ctx->SetDebugPos(pos); llvm::Value *ptr = ctx->AddElementOffset(basePtr, elementNumber, - exprLValueType); + exprLValueType, + basePtr->getName().str().c_str()); ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, GetLValueType()); @@ -5814,6 +5841,23 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, SourcePos pos) { llvm::Value *cast = NULL; + std::string opName = exprVal->getName().str(); + switch (toType->basicType) { + case AtomicType::TYPE_BOOL: opName += "_to_bool"; break; + case AtomicType::TYPE_INT8: opName += "_to_int8"; break; + case AtomicType::TYPE_UINT8: opName += "_to_uint8"; break; + case AtomicType::TYPE_INT16: opName += "_to_int16"; break; + case AtomicType::TYPE_UINT16: opName += "_to_uint16"; break; + case AtomicType::TYPE_INT32: opName += "_to_int32"; break; + case AtomicType::TYPE_UINT32: opName += "_to_uint32"; break; + case AtomicType::TYPE_INT64: opName += "_to_int64"; break; + case AtomicType::TYPE_UINT64: opName += "_to_uint64"; break; + case AtomicType::TYPE_FLOAT: opName += "_to_float"; break; + case AtomicType::TYPE_DOUBLE: opName += "_to_double"; break; + default: FATAL("Unimplemented"); + } + const char *cOpName = opName.c_str(); + switch (toType->basicType) { case AtomicType::TYPE_FLOAT: { llvm::Type *targetType = @@ -5825,17 +5869,17 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) // If we have a bool vector of i32 elements, first truncate // down to a single bit - exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName); // And then do an unisgned int->float cast cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int - exprVal, targetType, "bool2float"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT8: case AtomicType::TYPE_INT16: case AtomicType::TYPE_INT32: case AtomicType::TYPE_INT64: cast = ctx->CastInst(llvm::Instruction::SIToFP, // signed int to float - exprVal, targetType, "int2float"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_UINT8: case AtomicType::TYPE_UINT16: @@ -5845,14 +5889,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, PerformanceWarning(pos, "Conversion from unsigned int to float is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to float - exprVal, targetType, "uint2float"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_FLOAT: // No-op cast. cast = exprVal; break; case AtomicType::TYPE_DOUBLE: - cast = ctx->FPCastInst(exprVal, targetType, "double2float"); + cast = ctx->FPCastInst(exprVal, targetType, cOpName); break; default: FATAL("unimplemented"); @@ -5868,26 +5912,26 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) // truncate i32 bool vector values to i1s - exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName); cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to double - exprVal, targetType, "bool2double"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT8: case AtomicType::TYPE_INT16: case AtomicType::TYPE_INT32: case AtomicType::TYPE_INT64: cast = ctx->CastInst(llvm::Instruction::SIToFP, // signed int - exprVal, targetType, "int2double"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_UINT8: case AtomicType::TYPE_UINT16: case AtomicType::TYPE_UINT32: case AtomicType::TYPE_UINT64: cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int - exprVal, targetType, "uint2double"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_FLOAT: - cast = ctx->FPCastInst(exprVal, targetType, "float2double"); + cast = ctx->FPCastInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_DOUBLE: cast = exprVal; @@ -5905,8 +5949,8 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_BOOL: if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) - exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - cast = ctx->ZExtInst(exprVal, targetType, "bool2int"); + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT8: case AtomicType::TYPE_UINT8: @@ -5918,15 +5962,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_UINT32: case AtomicType::TYPE_INT64: case AtomicType::TYPE_UINT64: - cast = ctx->TruncInst(exprVal, targetType, "int64_to_int8"); + cast = ctx->TruncInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_FLOAT: cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int - exprVal, targetType, "float2int"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_DOUBLE: cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int - exprVal, targetType, "double2int"); + exprVal, targetType, cOpName); break; default: FATAL("unimplemented"); @@ -5941,8 +5985,8 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_BOOL: if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) - exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - cast = ctx->ZExtInst(exprVal, targetType, "bool2uint"); + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT8: case AtomicType::TYPE_UINT8: @@ -5954,21 +5998,21 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_UINT32: case AtomicType::TYPE_INT64: case AtomicType::TYPE_UINT64: - cast = ctx->TruncInst(exprVal, targetType, "int64_to_uint8"); + cast = ctx->TruncInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_FLOAT: if (fromType->IsVaryingType()) PerformanceWarning(pos, "Conversion from float to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int - exprVal, targetType, "float2uint"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_DOUBLE: if (fromType->IsVaryingType()) PerformanceWarning(pos, "Conversion from double to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int - exprVal, targetType, "double2uint"); + exprVal, targetType, cOpName); break; default: FATAL("unimplemented"); @@ -5983,14 +6027,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_BOOL: if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) - exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - cast = ctx->ZExtInst(exprVal, targetType, "bool2int"); + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT8: - cast = ctx->SExtInst(exprVal, targetType, "int2int16"); + cast = ctx->SExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_UINT8: - cast = ctx->ZExtInst(exprVal, targetType, "uint2uint16"); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT16: case AtomicType::TYPE_UINT16: @@ -5998,17 +6042,17 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; case AtomicType::TYPE_FLOAT: cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int - exprVal, targetType, "float2int"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: case AtomicType::TYPE_INT64: case AtomicType::TYPE_UINT64: - cast = ctx->TruncInst(exprVal, targetType, "int64_to_int16"); + cast = ctx->TruncInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_DOUBLE: cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int - exprVal, targetType, "double2int"); + exprVal, targetType, cOpName); break; default: FATAL("unimplemented"); @@ -6023,14 +6067,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_BOOL: if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) - exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - cast = ctx->ZExtInst(exprVal, targetType, "bool2uint16"); + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT8: - cast = ctx->SExtInst(exprVal, targetType, "uint2uint16"); + cast = ctx->SExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_UINT8: - cast = ctx->ZExtInst(exprVal, targetType, "uint2uint16"); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT16: case AtomicType::TYPE_UINT16: @@ -6041,20 +6085,20 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, PerformanceWarning(pos, "Conversion from float to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int - exprVal, targetType, "float2uint"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: case AtomicType::TYPE_INT64: case AtomicType::TYPE_UINT64: - cast = ctx->TruncInst(exprVal, targetType, "int64_to_uint16"); + cast = ctx->TruncInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_DOUBLE: if (fromType->IsVaryingType()) PerformanceWarning(pos, "Conversion from double to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int - exprVal, targetType, "double2uint"); + exprVal, targetType, cOpName); break; default: FATAL("unimplemented"); @@ -6069,16 +6113,16 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_BOOL: if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) - exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - cast = ctx->ZExtInst(exprVal, targetType, "bool2int"); + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT8: case AtomicType::TYPE_INT16: - cast = ctx->SExtInst(exprVal, targetType, "int2int32"); + cast = ctx->SExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_UINT8: case AtomicType::TYPE_UINT16: - cast = ctx->ZExtInst(exprVal, targetType, "uint2uint32"); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: @@ -6086,15 +6130,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; case AtomicType::TYPE_FLOAT: cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int - exprVal, targetType, "float2int"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT64: case AtomicType::TYPE_UINT64: - cast = ctx->TruncInst(exprVal, targetType, "int64_to_int32"); + cast = ctx->TruncInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_DOUBLE: cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int - exprVal, targetType, "double2int"); + exprVal, targetType, cOpName); break; default: FATAL("unimplemented"); @@ -6109,16 +6153,16 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_BOOL: if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) - exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - cast = ctx->ZExtInst(exprVal, targetType, "bool2uint"); + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT8: case AtomicType::TYPE_INT16: - cast = ctx->SExtInst(exprVal, targetType, "uint2uint"); + cast = ctx->SExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_UINT8: case AtomicType::TYPE_UINT16: - cast = ctx->ZExtInst(exprVal, targetType, "uint2uint"); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: @@ -6129,18 +6173,18 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, PerformanceWarning(pos, "Conversion from float to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int - exprVal, targetType, "float2uint"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT64: case AtomicType::TYPE_UINT64: - cast = ctx->TruncInst(exprVal, targetType, "int64_to_uint32"); + cast = ctx->TruncInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_DOUBLE: if (fromType->IsVaryingType()) PerformanceWarning(pos, "Conversion from double to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int - exprVal, targetType, "double2uint"); + exprVal, targetType, cOpName); break; default: FATAL("unimplemented"); @@ -6155,22 +6199,22 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_BOOL: if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) - exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - cast = ctx->ZExtInst(exprVal, targetType, "bool2int64"); + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT8: case AtomicType::TYPE_INT16: case AtomicType::TYPE_INT32: - cast = ctx->SExtInst(exprVal, targetType, "int_to_int64"); + cast = ctx->SExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_UINT8: case AtomicType::TYPE_UINT16: case AtomicType::TYPE_UINT32: - cast = ctx->ZExtInst(exprVal, targetType, "uint_to_int64"); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_FLOAT: cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int - exprVal, targetType, "float2int64"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT64: case AtomicType::TYPE_UINT64: @@ -6178,7 +6222,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; case AtomicType::TYPE_DOUBLE: cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int - exprVal, targetType, "double2int64"); + exprVal, targetType, cOpName); break; default: FATAL("unimplemented"); @@ -6193,25 +6237,25 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_BOOL: if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) - exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - cast = ctx->ZExtInst(exprVal, targetType, "bool2uint"); + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT8: case AtomicType::TYPE_INT16: case AtomicType::TYPE_INT32: - cast = ctx->SExtInst(exprVal, targetType, "int_to_uint64"); + cast = ctx->SExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_UINT8: case AtomicType::TYPE_UINT16: case AtomicType::TYPE_UINT32: - cast = ctx->ZExtInst(exprVal, targetType, "uint_to_uint64"); + cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_FLOAT: if (fromType->IsVaryingType()) PerformanceWarning(pos, "Conversion from float to unsigned int64 is slow. " "Use \"int64\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // signed int - exprVal, targetType, "float2uint"); + exprVal, targetType, cOpName); break; case AtomicType::TYPE_INT64: case AtomicType::TYPE_UINT64: @@ -6222,7 +6266,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, PerformanceWarning(pos, "Conversion from double to unsigned int64 is slow. " "Use \"int64\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // signed int - exprVal, targetType, "double2uint"); + exprVal, targetType, cOpName); break; default: FATAL("unimplemented"); @@ -6239,7 +6283,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt8(0) : (llvm::Value *)LLVMInt8Vector((int8_t)0); cast = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, - exprVal, zero, "cmpi0"); + exprVal, zero, cOpName); break; } case AtomicType::TYPE_INT16: @@ -6247,7 +6291,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt16(0) : (llvm::Value *)LLVMInt16Vector((int16_t)0); cast = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, - exprVal, zero, "cmpi0"); + exprVal, zero, cOpName); break; } case AtomicType::TYPE_INT32: @@ -6255,14 +6299,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt32(0) : (llvm::Value *)LLVMInt32Vector(0); cast = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, - exprVal, zero, "cmpi0"); + exprVal, zero, cOpName); break; } case AtomicType::TYPE_FLOAT: { llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMFloat(0.f) : (llvm::Value *)LLVMFloatVector(0.f); cast = ctx->CmpInst(llvm::Instruction::FCmp, llvm::CmpInst::FCMP_ONE, - exprVal, zero, "cmpf0"); + exprVal, zero, cOpName); break; } case AtomicType::TYPE_INT64: @@ -6270,14 +6314,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt64(0) : (llvm::Value *)LLVMInt64Vector((int64_t)0); cast = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, - exprVal, zero, "cmpi0"); + exprVal, zero, cOpName); break; } case AtomicType::TYPE_DOUBLE: { llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMDouble(0.) : (llvm::Value *)LLVMDoubleVector(0.); cast = ctx->CmpInst(llvm::Instruction::FCmp, llvm::CmpInst::FCMP_ONE, - exprVal, zero, "cmpd0"); + exprVal, zero, cOpName); break; } default: @@ -6291,7 +6335,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, // turn into a vector below, the way it does for everyone // else... cast = ctx->SExtInst(cast, LLVMTypes::BoolVectorType->getElementType(), - "i1bool_to_i32bool"); + LLVMGetName(cast, "to_i32bool")); } } else @@ -7022,7 +7066,7 @@ DerefExpr::GetValue(FunctionEmitContext *ctx) const { ctx->GetFullMask(); ctx->SetDebugPos(pos); - return ctx->LoadInst(ptr, mask, type, "deref_load"); + return ctx->LoadInst(ptr, mask, type); } @@ -7395,7 +7439,9 @@ SymbolExpr::GetValue(FunctionEmitContext *ctx) const { if (!symbol || !symbol->storagePtr) return NULL; ctx->SetDebugPos(pos); - return ctx->LoadInst(symbol->storagePtr, symbol->name.c_str()); + + std::string loadName = symbol->name + std::string("_load"); + return ctx->LoadInst(symbol->storagePtr, loadName.c_str()); } @@ -8112,7 +8158,8 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const { // pointer of the return type and to run the code for initializers, // if present. llvm::Type *ptrType = retType->LLVMType(g->ctx); - ptrValue = ctx->BitCastInst(ptrValue, ptrType, "cast_new_ptr"); + ptrValue = ctx->BitCastInst(ptrValue, ptrType, + LLVMGetName(ptrValue, "_cast_ptr")); if (initExpr != NULL) InitSymbol(ptrValue, allocType, initExpr, ctx, pos); diff --git a/opt.cpp b/opt.cpp index 8e6162bd..9bae2bf1 100644 --- a/opt.cpp +++ b/opt.cpp @@ -829,14 +829,16 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { llvm::Type *returnType = callInst->getType(); Assert(llvm::isa(returnType)); // cast the i8 * to the appropriate type + const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast"); llvm::Value *castPtr = new llvm::BitCastInst(callInst->getArgOperand(0), llvm::PointerType::get(returnType, 0), - "ptr2vec", callInst); + name, callInst); lCopyMetadata(castPtr, callInst); int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; + name = LLVMGetName(callInst->getArgOperand(0), "_load"); llvm::Instruction *loadInst = - new llvm::LoadInst(castPtr, "load", false /* not volatile */, + new llvm::LoadInst(castPtr, name, false /* not volatile */, align, (llvm::Instruction *)NULL); lCopyMetadata(loadInst, callInst); llvm::ReplaceInstWithInst(callInst, loadInst); @@ -859,10 +861,12 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { // all lanes storing, so replace with a regular store llvm::Value *rvalue = callInst->getArgOperand(2); llvm::Type *storeType = rvalue->getType(); + const char *name = LLVMGetName(callInst->getArgOperand(0), + "_ptrcast"); llvm::Value *castPtr = new llvm::BitCastInst(callInst->getArgOperand(0), llvm::PointerType::get(storeType, 0), - "ptr2vec", callInst); + name, callInst); lCopyMetadata(castPtr, callInst); llvm::StoreInst *storeInst = @@ -1291,12 +1295,13 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, *constOffset = NULL; else *constOffset = new llvm::SExtInst(co, sext->getType(), - "const_offset_sext", insertBefore); + LLVMGetName(co, "_sext"), + insertBefore); if (vo == NULL) *variableOffset = NULL; else *variableOffset = new llvm::SExtInst(vo, sext->getType(), - "variable_offset_sext", + LLVMGetName(vo, "_sext"), insertBefore); return; } @@ -1320,7 +1325,8 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, else *constOffset = llvm::BinaryOperator::Create(llvm::Instruction::Add, c0, c1, - "const_op", insertBefore); + LLVMGetName("add", c0, c1), + insertBefore); if (v0 == NULL || llvm::isa(v0)) *variableOffset = v1; @@ -1329,7 +1335,8 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, else *variableOffset = llvm::BinaryOperator::Create(llvm::Instruction::Add, v0, v1, - "variable_op", insertBefore); + LLVMGetName("add", v0, v1), + insertBefore); return; } else if (bop->getOpcode() == llvm::Instruction::Mul) { @@ -1343,26 +1350,27 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, if (c0 != NULL && c1 != NULL) *constOffset = llvm::BinaryOperator::Create(llvm::Instruction::Mul, c0, c1, - "const_mul", insertBefore); + LLVMGetName("mul", c0, c1), + insertBefore); else *constOffset = NULL; llvm::Value *va = NULL, *vb = NULL, *vc = NULL; if (v0 != NULL && c1 != NULL) va = llvm::BinaryOperator::Create(llvm::Instruction::Mul, v0, c1, - "va_mul", insertBefore); + LLVMGetName("mul", v0, c1), insertBefore); if (c0 != NULL && v1 != NULL) vb = llvm::BinaryOperator::Create(llvm::Instruction::Mul, c0, v1, - "vb_mul", insertBefore); + LLVMGetName("mul", c0, v1), insertBefore); if (v0 != NULL && v1 != NULL) vc = llvm::BinaryOperator::Create(llvm::Instruction::Mul, v0, v1, - "vc_mul", insertBefore); + LLVMGetName("mul", v0, v1), insertBefore); llvm::Value *vab = NULL; if (va != NULL && vb != NULL) vab = llvm::BinaryOperator::Create(llvm::Instruction::Add, va, vb, - "vab_add", insertBefore); + LLVMGetName("add", va, vb), insertBefore); else if (va != NULL) vab = va; else @@ -1371,7 +1379,7 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, if (vab != NULL && vc != NULL) *variableOffset = llvm::BinaryOperator::Create(llvm::Instruction::Add, vab, vc, - "vabc_add", insertBefore); + LLVMGetName("add", vab, vc), insertBefore); else if (vab != NULL) *variableOffset = vab; else @@ -1443,7 +1451,7 @@ lExtract248Scale(llvm::Value *splatOperand, int splatValue, *result = llvm::BinaryOperator::Create(llvm::Instruction::Mul, splatDiv, otherOperand, - "add", insertBefore); + "mul", insertBefore); return LLVMInt32(scale); } } @@ -1673,7 +1681,8 @@ lOffsets32BitSafe(llvm::Value **variableOffsetPtr, // do the more general check with lVectorIs32BitInts(). variableOffset = new llvm::TruncInst(variableOffset, LLVMTypes::Int32VectorType, - "trunc_variable_offset", insertBefore); + LLVMGetName(variableOffset, "_trunc"), + insertBefore); else return false; } @@ -1683,7 +1692,7 @@ lOffsets32BitSafe(llvm::Value **variableOffsetPtr, // Truncate them so we have a 32-bit vector type for them. constOffset = new llvm::TruncInst(constOffset, LLVMTypes::Int32VectorType, - "trunc_const_offset", insertBefore); + LLVMGetName(constOffset, "_trunc"), insertBefore); } else { // FIXME: otherwise we just assume that all constant offsets @@ -1696,7 +1705,7 @@ lOffsets32BitSafe(llvm::Value **variableOffsetPtr, // enough for us in some cases if we call it from here. constOffset = new llvm::TruncInst(constOffset, LLVMTypes::Int32VectorType, - "trunc_const_offset", insertBefore); + LLVMGetName(constOffset, "_trunc"), insertBefore); } } @@ -1819,7 +1828,7 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) { // Cast the base pointer to a void *, since that's what the // __pseudo_*_base_offsets_* functions want. basePtr = new llvm::IntToPtrInst(basePtr, LLVMTypes::VoidPointerType, - "base2void", callInst); + LLVMGetName(basePtr, "_2void"), callInst); lCopyMetadata(basePtr, callInst); llvm::Function *gatherScatterFunc = info->baseOffsetsFunc; @@ -1842,7 +1851,8 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) { // way we can then call ReplaceInstWithInst(). llvm::Instruction *newCall = lCallInst(gatherScatterFunc, basePtr, variableOffset, offsetScale, - constOffset, mask, "newgather", NULL); + constOffset, mask, callInst->getName().str().c_str(), + NULL); lCopyMetadata(newCall, callInst); llvm::ReplaceInstWithInst(callInst, newCall); } @@ -2443,7 +2453,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { Debug(pos, "Transformed gather to scalar load and broadcast!"); llvm::Instruction *newCall = lCallInst(gatherInfo->loadBroadcastFunc, ptr, mask, - "load_braodcast"); + LLVMGetName(callInst, "_broadcast")); lCopyMetadata(newCall, callInst); llvm::ReplaceInstWithInst(callInst, newCall); @@ -2481,7 +2491,8 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { if (gatherInfo != NULL) { Debug(pos, "Transformed gather to unaligned vector load!"); llvm::Instruction *newCall = - lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load"); + lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, + LLVMGetName(ptr, "_masked_load")); lCopyMetadata(newCall, callInst); llvm::ReplaceInstWithInst(callInst, newCall); } From db8b08131fc81f4d18621d8a16626072340b3be2 Mon Sep 17 00:00:00 2001 From: Nipunn Koorapati Date: Fri, 20 Apr 2012 12:17:09 -0400 Subject: [PATCH 091/173] Fixed compile error which shows up on LLVM 3.0 --- cbackend.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cbackend.cpp b/cbackend.cpp index 7adefb40..0522fd85 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -1495,8 +1495,9 @@ void CWriter::printConstant(Constant *CPV, bool Static) { printConstantDataSequential(CDV, Static); Out << ")"; } + } #endif - } else { + else { assert(isa(CPV)); Constant *CZ = Constant::getNullValue(VT->getElementType()); printType(Out, CPV->getType()); From 4dfc596d38065f0856c0338bc9e739c19c56929a Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 20 Apr 2012 10:50:39 -0700 Subject: [PATCH 092/173] Fix MSVC warnings. --- llvmutil.cpp | 2 +- parse.yy | 2 +- util.cpp | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvmutil.cpp b/llvmutil.cpp index f7fa041f..e21a0b12 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -880,7 +880,7 @@ lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength, // the addConstants[], mod baseValue. If we round that up to the // next power of 2, we'll have a value that will be no greater than // baseValue and sometimes less. - int maxMod = addConstants[0] % baseValue; + int maxMod = int(addConstants[0] % baseValue); for (int i = 1; i < vectorLength; ++i) maxMod = std::max(maxMod, int(addConstants[i] % baseValue)); int requiredAlignment = lRoundUpPow2(maxMod); diff --git a/parse.yy b/parse.yy index 30144a67..c4db2fa9 100644 --- a/parse.yy +++ b/parse.yy @@ -549,7 +549,7 @@ rate_qualified_type_specifier if ($2 == NULL) $$ = NULL; else { - int soaWidth = $1; + int soaWidth = (int)$1; const StructType *st = dynamic_cast($2); if (st == NULL) { Error(@1, "\"soa\" qualifier is illegal with non-struct type \"%s\".", diff --git a/util.cpp b/util.cpp index 92c1bac1..18c6d58e 100644 --- a/util.cpp +++ b/util.cpp @@ -101,7 +101,7 @@ lHaveANSIColors() { static const char * lStartBold() { if (lHaveANSIColors()) - return "\e[1m"; + return "\033[1m"; else return ""; } @@ -110,7 +110,7 @@ lStartBold() { static const char * lStartRed() { if (lHaveANSIColors()) - return "\e[31m"; + return "\033[31m"; else return ""; } @@ -119,7 +119,7 @@ lStartRed() { static const char * lStartBlue() { if (lHaveANSIColors()) - return "\e[34m"; + return "\033[34m"; else return ""; } @@ -128,7 +128,7 @@ lStartBlue() { static const char * lResetColor() { if (lHaveANSIColors()) - return "\e[0m"; + return "\033[0m"; else return ""; } @@ -192,7 +192,7 @@ static int lFindIndent(int numColons, const char *buf) { int indent = 0; while (*buf != '\0') { - if (*buf == '\e') { + if (*buf == '\033') { while (*buf != '\0' && *buf != 'm') ++buf; if (*buf == 'm') @@ -229,7 +229,7 @@ lPrintWithWordBreaks(const char *buf, int indent, int columnWidth, FILE *out) { const char *msgPos = buf; while (true) { - if (*msgPos == '\e') { + if (*msgPos == '\033') { // handle ANSI color escape: copy it to the output buffer // without charging for the characters it uses do { From 040421942f57aedcca73fcda19ba9c59344e16b3 Mon Sep 17 00:00:00 2001 From: Nipunn Koorapati Date: Fri, 20 Apr 2012 14:42:14 -0400 Subject: [PATCH 093/173] Goto statements with a bad label produces error message. Now it also produces a short list of suggestions based on string distance. --- ctx.cpp | 13 +++++++++++++ ctx.h | 4 ++++ stmt.cpp | 22 +++++++++++++++++++--- tests_errors/goto-5.ispc | 6 ++++++ tests_errors/goto-6.ispc | 9 +++++++++ 5 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 tests_errors/goto-5.ispc create mode 100644 tests_errors/goto-6.ispc diff --git a/ctx.cpp b/ctx.cpp index de61ffcb..464b8831 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1155,6 +1155,19 @@ FunctionEmitContext::GetLabeledBasicBlock(const std::string &label) { return NULL; } +std::vector +FunctionEmitContext::GetLabels() { + // Initialize vector to the right size + std::vector labels(labelMap.size()); + + // Iterate through labelMap and grab only the keys + std::map::iterator iter; + for (iter=labelMap.begin(); iter != labelMap.end(); iter++) + labels.push_back(iter->first); + + return labels; +} + void FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { diff --git a/ctx.h b/ctx.h index 6c3f2887..bdb21ddc 100644 --- a/ctx.h +++ b/ctx.h @@ -248,6 +248,10 @@ public: new basic block that it starts. */ llvm::BasicBlock *GetLabeledBasicBlock(const std::string &label); + /** Returns a vector of all labels in the context. This is + simply the key set of the labelMap */ + std::vector GetLabels(); + /** Called to generate code for 'return' statement; value is the expression in the return statement (if non-NULL), and doCoherenceCheck indicates whether instructions should be generated diff --git a/stmt.cpp b/stmt.cpp index 4f8c0f12..e2b533f3 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -2281,10 +2281,26 @@ GotoStmt::EmitCode(FunctionEmitContext *ctx) const { llvm::BasicBlock *bb = ctx->GetLabeledBasicBlock(label); if (bb == NULL) { - // TODO: use the string distance stuff to suggest alternatives if - // there are some with names close to the label name we have here.. - Error(identifierPos, "No label named \"%s\" found in current function.", + /* Label wasn't found. Emit an error */ + Error(identifierPos, + "No label named \"%s\" found in current function.", label.c_str()); + + /* Look for suggestions that are close */ + std::vector labels = ctx->GetLabels(); + std::vector matches = MatchStrings(label, labels); + if (! matches.empty()) { + /* Print up to 5 matches. Don't want to spew too much */ + std::string match_output("Did you mean\n"); + for (unsigned int i=0; i Date: Fri, 20 Apr 2012 11:53:43 -0700 Subject: [PATCH 094/173] Fix malformed program crash. --- expr.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/expr.cpp b/expr.cpp index 01339d03..0b992ddc 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3711,7 +3711,10 @@ ExprList::GetConstant(const Type *type) const { // expression to the type we need, then let the regular type // conversion machinery handle it. expr = TypeConvertExpr(exprs[i], elementType, "initializer list"); - Assert(expr != NULL); + if (expr == NULL) { + Assert(m->errorCount > 0); + return NULL; + } // Re-establish const-ness if possible expr = ::Optimize(expr); } From c5f66535642835091ac5c92c18b5fcaeda5cea41 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 20 Apr 2012 11:54:12 -0700 Subject: [PATCH 095/173] Bump version number to 1.2.2 --- ispc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ispc.h b/ispc.h index d93788ec..32515298 100644 --- a/ispc.h +++ b/ispc.h @@ -38,7 +38,7 @@ #ifndef ISPC_H #define ISPC_H -#define ISPC_VERSION "1.2.2dev" +#define ISPC_VERSION "1.2.2" #if !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn) #error "Only LLVM 3.0, and the 3.1 development branch are supported" From 016b502d46f767257abc2c91db35f37f761b5415 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 20 Apr 2012 14:25:40 -0700 Subject: [PATCH 096/173] Update release notes for 1.2.2, bump version number in doxygen --- docs/ReleaseNotes.txt | 34 ++++++++++++++++++++++++++++++++++ doxygen.cfg | 2 +- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/docs/ReleaseNotes.txt b/docs/ReleaseNotes.txt index f99066ac..e381c017 100644 --- a/docs/ReleaseNotes.txt +++ b/docs/ReleaseNotes.txt @@ -1,3 +1,37 @@ +=== v1.2.2 === (20 April 2012) + +This release includes a number of small additions to functionality and a +number of bugfixes. New functionality includes: + +* It's now possible to forward declare structures as in C/C++: "struct + Foo;". After such a declaration, structs with pointers to "Foo" and + functions that take pointers or references to Foo structs can be declared + without the entire definition of Foo being available. + +* New built-in types size_t, ptrdiff_t, and [u]intptr_t are now available, + corresponding to the equivalent types in C. + +* The standard library now provides atomic_swap*() and + atomic_compare_exchange*() functions for void * types. + +* The C++ backend has seen a number of improvements to the quality and + readability of generated code. + +A number of bugs have been fixed in this release as well. The most +significant are: + +* Fixed a bug where nested loops could cause a compiler crash in some + circumstances (issues #240, and #229) + +* Gathers could access invlaid mamory (and cause the program to crash) in + some circumstances (#235) + +* References to temporary values are now handled properly when passed to a + function that takes a reference typed parameter. + +* A case where incorrect code could be generated for compile-time-constant + initializers has been fixed (#234). + === v1.2.1 === (6 April 2012) This release contains only minor new functionality and is mostly for many diff --git a/doxygen.cfg b/doxygen.cfg index 1659fbdd..f02dac70 100644 --- a/doxygen.cfg +++ b/doxygen.cfg @@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 1.2.1 +PROJECT_NUMBER = 1.2.2 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. From 03b2b8ae8f8faac1c3d9488a973742f97535a689 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 20 Apr 2012 14:31:46 -0700 Subject: [PATCH 097/173] Bump version number to 1.2.3dev --- ispc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ispc.h b/ispc.h index 32515298..c4c5c405 100644 --- a/ispc.h +++ b/ispc.h @@ -38,7 +38,7 @@ #ifndef ISPC_H #define ISPC_H -#define ISPC_VERSION "1.2.2" +#define ISPC_VERSION "1.2.3dev" #if !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn) #error "Only LLVM 3.0, and the 3.1 development branch are supported" From 138c7acf228cdbf293a4bf3bbedbe3f89df21f0b Mon Sep 17 00:00:00 2001 From: Nipunn Koorapati Date: Sat, 21 Apr 2012 01:44:10 -0400 Subject: [PATCH 098/173] Error() and Warning() functions for reporting compiler errors/warnings now respects newlines as part of valid error messages. --- lex.ll | 2 +- stmt.cpp | 24 ++++++++++-------------- util.cpp | 18 ++++++++++++++++-- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/lex.ll b/lex.ll index 96c19d1d..026b1b48 100644 --- a/lex.ll +++ b/lex.ll @@ -704,7 +704,7 @@ lEscapeChar(char *str, char *pChar, SourcePos *pos) str = tail - 1; break; default: - Error(*pos, "Bad character escape sequence: '%s'\n.", str); + Error(*pos, "Bad character escape sequence: '%s'.", str); break; } } diff --git a/stmt.cpp b/stmt.cpp index e2b533f3..0c8ed0c8 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -2281,26 +2281,22 @@ GotoStmt::EmitCode(FunctionEmitContext *ctx) const { llvm::BasicBlock *bb = ctx->GetLabeledBasicBlock(label); if (bb == NULL) { - /* Label wasn't found. Emit an error */ - Error(identifierPos, - "No label named \"%s\" found in current function.", - label.c_str()); - - /* Look for suggestions that are close */ + /* Label wasn't found. Look for suggestions that are close */ std::vector labels = ctx->GetLabels(); std::vector matches = MatchStrings(label, labels); + std::string match_output; if (! matches.empty()) { /* Print up to 5 matches. Don't want to spew too much */ - std::string match_output("Did you mean\n"); + match_output += "\nDid you mean:"; for (unsigned int i=0; i Date: Mon, 23 Apr 2012 16:00:07 -0700 Subject: [PATCH 099/173] Fix 32-bit samples on Mac OS X. On Mac OS X and Linux rdtsc() didn't save and restore 32-bit registers. This patch fixes issue #87. --- examples/timing.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/timing.h b/examples/timing.h index f61fbce8..7d746d45 100644 --- a/examples/timing.h +++ b/examples/timing.h @@ -43,9 +43,15 @@ extern "C" { #endif /* __cplusplus */ __inline__ uint64_t rdtsc() { uint32_t low, high; +#ifdef __x86_64 __asm__ __volatile__ ( "xorl %%eax,%%eax \n cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx" ); +#else + __asm__ __volatile__ ( + "xorl %%eax,%%eax \n cpuid" + ::: "%eax", "%ebx", "%ecx", "%edx" ); +#endif __asm__ __volatile__ ( "rdtsc" : "=a" (low), "=d" (high)); return (uint64_t)high << 32 | low; From 8547101c4bb331ff7adc3f617d6cb4e1d273b57d Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 25 Apr 2012 05:53:42 -1000 Subject: [PATCH 100/173] Debugging info: produce more descriptive producer string --- module.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/module.cpp b/module.cpp index d082255f..bb339e4d 100644 --- a/module.cpp +++ b/module.cpp @@ -144,10 +144,13 @@ Module::Module(const char *fn) { std::string directory, name; GetDirectoryAndFileName(g->currentDirectory, filename, &directory, &name); + char producerString[512]; + sprintf(producerString, "ispc version %s (build %s on %s)", + ISPC_VERSION, BUILD_VERSION, BUILD_DATE); diBuilder->createCompileUnit(llvm::dwarf::DW_LANG_C99, /* lang */ name, /* filename */ directory, /* directory */ - "ispc", /* producer */ + producerString, /* producer */ g->opt.level > 0 /* is optimized */, "-g", /* command line args */ 0 /* run time version */); From 7167442d6e4f46c56007a671590ac6f999603412 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 25 Apr 2012 05:55:47 -1000 Subject: [PATCH 101/173] Debugging info: include parameter number for function params. --- ctx.cpp | 13 ++++++++++--- ctx.h | 2 +- func.cpp | 4 ++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index 464b8831..95ef805d 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1482,19 +1482,26 @@ FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) { void -FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym) { +FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym, int argNum) { if (m->diBuilder == NULL) return; llvm::DIScope scope = diFunction; + llvm::DIType diType = sym->type->GetDIType(scope); + Assert(diType.Verify()); + int flags = 0; + llvm::DIVariable var = m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_arg_variable, scope, sym->name, sym->pos.GetDIFile(), sym->pos.first_line, - sym->type->GetDIType(scope), - true /* preserve through opts */); + diType, + true /* preserve through opts */, + flags, + argNum+1); + Assert(var.Verify()); llvm::Instruction *declareInst = m->diBuilder->insertDeclare(sym->storagePtr, var, bblock); AddDebugPos(declareInst, &sym->pos, &scope); diff --git a/ctx.h b/ctx.h index bdb21ddc..e161e366 100644 --- a/ctx.h +++ b/ctx.h @@ -342,7 +342,7 @@ public: /** Emits debugging information for the function parameter represented by sym. */ - void EmitFunctionParameterDebugInfo(Symbol *sym); + void EmitFunctionParameterDebugInfo(Symbol *sym, int parameterNum); /** @} */ /** @name IR instruction emission diff --git a/func.cpp b/func.cpp index 29dd9ecf..9fd31c7d 100644 --- a/func.cpp +++ b/func.cpp @@ -182,7 +182,7 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const // memory llvm::Value *ptrval = ctx->LoadInst(ptr, sym->name.c_str()); ctx->StoreInst(ptrval, sym->storagePtr); - ctx->EmitFunctionParameterDebugInfo(sym); + ctx->EmitFunctionParameterDebugInfo(sym, i); } @@ -262,7 +262,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, // to store the its value there. sym->storagePtr = ctx->AllocaInst(argIter->getType(), sym->name.c_str()); ctx->StoreInst(argIter, sym->storagePtr); - ctx->EmitFunctionParameterDebugInfo(sym); + ctx->EmitFunctionParameterDebugInfo(sym, i); } // If the number of actual function arguments is equal to the From 12706cd37f3957318b719b825cbd4831f110eba1 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 25 Apr 2012 05:57:07 -1000 Subject: [PATCH 102/173] Debugging optimization pass updates Don't run mem2reg with -O0 anymore, but do run the intrinsics opt pass, which allows some CFG simplification due to the mask being all on, etc. --- opt.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/opt.cpp b/opt.cpp index 9bae2bf1..644a83dc 100644 --- a/opt.cpp +++ b/opt.cpp @@ -69,7 +69,6 @@ #include #include #include -#include #include #include #ifdef ISPC_IS_LINUX @@ -293,12 +292,12 @@ Optimize(llvm::Module *module, int optLevel) { // run absolutely no optimizations, since the front-end needs us to // take the various __pseudo_* functions it has emitted and turn // them into something that can actually execute. - optPM.add(llvm::createPromoteMemoryToRegisterPass()); optPM.add(CreateDetectGSBaseOffsetsPass()); if (g->opt.disableHandlePseudoMemoryOps == false) { optPM.add(CreatePseudoGSToGSPass()); optPM.add(CreatePseudoMaskedStorePass()); } + optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateIsCompileTimeConstantPass(true)); optPM.add(llvm::createFunctionInliningPass()); optPM.add(CreateMakeInternalFuncsStaticPass()); From d5cc2ad6431e75e37f494dd4d2bdd633a0fa3561 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 25 Apr 2012 06:17:05 -1000 Subject: [PATCH 103/173] Call Verify() methods of various debugging llvm::DI* types after creation. --- ctx.cpp | 35 ++++++++++++++++++++++------------- ispc.cpp | 4 +++- module.cpp | 14 ++++++++------ 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index 95ef805d..c805bcf1 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -313,27 +313,32 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, flags, g->opt.level > 0, llvmFunction); + Assert(diFile.Verify()); /* And start a scope representing the initial function scope */ StartScope(); llvm::DIFile file = funcStartPos.GetDIFile(); Symbol *programIndexSymbol = m->symbolTable->LookupVariable("programIndex"); Assert(programIndexSymbol && programIndexSymbol->storagePtr); - m->diBuilder->createGlobalVariable(programIndexSymbol->name, - file, - funcStartPos.first_line, - programIndexSymbol->type->GetDIType(file), - true /* static */, - programIndexSymbol->storagePtr); + llvm::DIGlobalVariable var = + m->diBuilder->createGlobalVariable(programIndexSymbol->name, + file, + funcStartPos.first_line, + programIndexSymbol->type->GetDIType(file), + true /* static */, + programIndexSymbol->storagePtr); + Assert(var.Verify()); Symbol *programCountSymbol = m->symbolTable->LookupVariable("programCount"); Assert(programCountSymbol); - m->diBuilder->createGlobalVariable(programCountSymbol->name, - file, - funcStartPos.first_line, - programCountSymbol->type->GetDIType(file), - true /* static */, - programCountSymbol->storagePtr); + var = + m->diBuilder->createGlobalVariable(programCountSymbol->name, + file, + funcStartPos.first_line, + programCountSymbol->type->GetDIType(file), + true /* static */, + programCountSymbol->storagePtr); + Assert(var.Verify()); } } @@ -1440,6 +1445,7 @@ FunctionEmitContext::StartScope() { m->diBuilder->createLexicalBlock(parentScope, diFile, currentPos.first_line, currentPos.first_column); + Assert(lexicalBlock.Verify()); debugScopes.push_back(lexicalBlock); } } @@ -1467,14 +1473,17 @@ FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) { return; llvm::DIScope scope = GetDIScope(); + llvm::DIType diType = sym->type->GetDIType(scope); + Assert(diType.Verify()); llvm::DIVariable var = m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_auto_variable, scope, sym->name, sym->pos.GetDIFile(), sym->pos.first_line, - sym->type->GetDIType(scope), + diType, true /* preserve through opts */); + Assert(var.Verify()); llvm::Instruction *declareInst = m->diBuilder->insertDeclare(sym->storagePtr, var, bblock); AddDebugPos(declareInst, &sym->pos, &scope); diff --git a/ispc.cpp b/ispc.cpp index dce3ed77..2fcf27ff 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -644,7 +644,9 @@ llvm::DIFile SourcePos::GetDIFile() const { std::string directory, filename; GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename); - return m->diBuilder->createFile(filename, directory); + llvm::DIFile ret = m->diBuilder->createFile(filename, directory); + Assert(ret.Verify()); + return ret; } diff --git a/module.cpp b/module.cpp index bb339e4d..f61182ca 100644 --- a/module.cpp +++ b/module.cpp @@ -395,12 +395,14 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE if (diBuilder) { llvm::DIFile file = pos.GetDIFile(); - diBuilder->createGlobalVariable(name, - file, - pos.first_line, - sym->type->GetDIType(file), - (sym->storageClass == SC_STATIC), - sym->storagePtr); + llvm::DIGlobalVariable var = + diBuilder->createGlobalVariable(name, + file, + pos.first_line, + sym->type->GetDIType(file), + (sym->storageClass == SC_STATIC), + sym->storagePtr); + Assert(var.Verify()); } } From 260d7298c3a50fdff3b9ef39ab16062ee8d75599 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 25 Apr 2012 08:35:22 -1000 Subject: [PATCH 104/173] Strip unused debugging metadata after done with compilation. Debugging information for functions that are inlined or static and not used still hangs around after compilation; now we go through the debugging info and remove the entries for any DISubprograms that don't have their original functions left in the Module after optimization. --- module.cpp | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 1 deletion(-) diff --git a/module.cpp b/module.cpp index f61182ca..740068c8 100644 --- a/module.cpp +++ b/module.cpp @@ -106,6 +106,102 @@ lDeclareSizeAndPtrIntTypes(SymbolTable *symbolTable) { } +/** After compilation completes, there's often a lot of extra debugging + metadata left around that isn't needed any more--for example, for + static functions that weren't actually used, function information for + functions that were inlined, etc. This function takes a llvm::Module + and tries to strip out all of this extra stuff. + */ +static void +lStripUnusedDebugInfo(llvm::Module *module) { + if (g->generateDebuggingSymbols == false) + return; + + // loop over the compile units that contributed to the final module + if (llvm::NamedMDNode *cuNodes = module->getNamedMetadata("llvm.dbg.cu")) { + for (unsigned i = 0, ie = cuNodes->getNumOperands(); i != ie; ++i) { + llvm::MDNode *cuNode = cuNodes->getOperand(i); + llvm::DICompileUnit cu(cuNode); + llvm::DIArray subprograms = cu.getSubprograms(); + std::vector usedSubprograms; + + if (subprograms.getNumElements() == 0) + continue; + + // And now loop over the subprograms inside each compile unit. + for (unsigned j = 0, je = subprograms.getNumElements(); j != je; ++j) { + llvm::MDNode *spNode = + llvm::dyn_cast(subprograms->getOperand(j)); + Assert(spNode != NULL); + llvm::DISubprogram sp(spNode); + + // Get the name of the subprogram. Start with the mangled + // name; if that's empty then we have an export'ed + // function, so grab the unmangled name in that case. + std::string name = sp.getLinkageName(); + if (name == "") + name = sp.getName(); + + // Does the llvm::Function for this function survive in the + // module? + if (module->getFunction(name) != NULL) + usedSubprograms.push_back(sp); + } + + Debug(SourcePos(), "%d / %d functions left in module with debug " + "info.", (int)usedSubprograms.size(), + (int)subprograms.getNumElements()); + + // We'd now like to replace the array of subprograms in the + // compile unit with only the ones that actually have function + // definitions present. Unfortunately, llvm::DICompileUnit + // doesn't provide a method to set the subprograms. Therefore, + // we end up needing to directly stuff a new array into the + // appropriate slot (number 12) in the MDNode for the compile + // unit. + // + // Because this is all so hard-coded and would break if the + // debugging metadata organization on the LLVM side changed, + // here is a bunch of asserting to make sure that element 12 of + // the compile unit's MDNode has the subprograms array.... + llvm::MDNode *nodeSPMD = + llvm::dyn_cast(cuNode->getOperand(12)); + Assert(nodeSPMD != NULL); + llvm::MDNode *nodeSPMDArray = + llvm::dyn_cast(nodeSPMD->getOperand(0)); + llvm::DIArray nodeSPs(nodeSPMDArray); + Assert(nodeSPs.getNumElements() == subprograms.getNumElements()); + for (int i = 0; i < (int)nodeSPs.getNumElements(); ++i) + Assert(nodeSPs.getElement(i) == subprograms.getElement(i)); + + // And now we can go and stuff it into the node with some + // confidence... + llvm::DIArray usedSubprogramsArray = + m->diBuilder->getOrCreateArray(llvm::ArrayRef(usedSubprograms)); + cuNode->replaceOperandWith(12, usedSubprogramsArray); + } + } + + // Also, erase a bunch of named metadata detrius; for each function + // there is sometimes named metadata llvm.dbg.lv.{funcname} that + // doesn't seem to be otherwise needed. + std::vector toErase; + llvm::Module::named_metadata_iterator iter = module->named_metadata_begin(); + for (; iter != module->named_metadata_end(); ++iter) { + if (!strncmp(iter->getName().str().c_str(), "llvm.dbg.lv", 11)) + toErase.push_back(iter); + } + for (int i = 0; i < (int)toErase.size(); ++i) + module->eraseNamedMetadata(toErase[i]); + + // Wrap up by running the LLVM pass to remove anything left that's + // unused. + llvm::PassManager pm; + pm.add(llvm::createStripDeadDebugInfoPass()); + pm.run(*module); +} + + /////////////////////////////////////////////////////////////////////////// // Module @@ -711,9 +807,12 @@ Module::AddFunctionDefinition(const std::string &name, const FunctionType *type, bool Module::writeOutput(OutputType outputType, const char *outFileName, const char *includeFileName) { - if (diBuilder != NULL && outputType != Header) + if (diBuilder != NULL && outputType != Header) { diBuilder->finalize(); + lStripUnusedDebugInfo(module); + } + // First, issue a warning if the output file suffix and the type of // file being created seem to mismatch. This can help catch missing // command-line arguments specifying the output file type. From 0baa2b484d97bb61cfddc7e4df83a9c03667b557 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 25 Apr 2012 08:41:28 -1000 Subject: [PATCH 105/173] Fix multiple bugs related to DIBuilder::createFunction() call. The DIType passed to this method should correspond to the FunctionType of the function, not its return type. The first parameter should be the DIScope for the compile unit, not the DIFile. We previously had the unmangled function name and the mangled function name interchanged. The argument corresponding to "first line number of the function" was missing, which in turn led to subsequent arguments being off, and thus providing bogus values vs. what was supposed to be passed. Rename FunctionEmitContext::diFunction to diSubprogram, to better reflect its type. --- ctx.cpp | 50 +++++++++++++++++++++++++++++++++++--------------- ctx.h | 2 +- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index c805bcf1..1b61fdac 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -297,23 +297,43 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, } if (m->diBuilder) { + currentPos = funSym->pos; + /* If debugging is enabled, tell the debug information emission code about this new function */ diFile = funcStartPos.GetDIFile(); - llvm::DIType retType = function->GetReturnType()->GetDIType(diFile); - int flags = llvm::DIDescriptor::FlagPrototyped; // ?? - diFunction = m->diBuilder->createFunction(diFile, /* scope */ - llvmFunction->getName(), // mangled - funSym->name, - diFile, - funcStartPos.first_line, - retType, - funSym->storageClass == SC_STATIC, - true, /* is definition */ - flags, - g->opt.level > 0, - llvmFunction); Assert(diFile.Verify()); + + llvm::DIScope scope = llvm::DIScope(m->diBuilder->getCU()); + Assert(scope.Verify()); + + const FunctionType *functionType = function->GetType(); + llvm::DIType diSubprogramType; + if (functionType == NULL) + Assert(m->errorCount > 0); + else { + diSubprogramType = functionType->GetDIType(scope); + Assert(diSubprogramType.Verify()); + } + + std::string mangledName = llvmFunction->getName(); + if (mangledName == funSym->name) + mangledName = ""; + + bool isStatic = (funSym->storageClass == SC_STATIC); + bool isOptimized = (g->opt.level > 0); + int firstLine = funcStartPos.first_line; + int flags = (llvm::DIDescriptor::FlagPrototyped); + + diSubprogram = + m->diBuilder->createFunction(diFile /* scope */, funSym->name, + mangledName, diFile, + firstLine, diSubprogramType, + isStatic, true, /* is defn */ + firstLine, flags, + isOptimized, llvmFunction); + Assert(diSubprogram.Verify()); + /* And start a scope representing the initial function scope */ StartScope(); @@ -1439,7 +1459,7 @@ FunctionEmitContext::StartScope() { if (debugScopes.size() > 0) parentScope = debugScopes.back(); else - parentScope = diFunction; + parentScope = diSubprogram; llvm::DILexicalBlock lexicalBlock = m->diBuilder->createLexicalBlock(parentScope, diFile, @@ -1495,7 +1515,7 @@ FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym, int argNum) { if (m->diBuilder == NULL) return; - llvm::DIScope scope = diFunction; + llvm::DIScope scope = diSubprogram; llvm::DIType diType = sym->type->GetDIType(scope); Assert(diType.Verify()); int flags = 0; diff --git a/ctx.h b/ctx.h index e161e366..304f8af1 100644 --- a/ctx.h +++ b/ctx.h @@ -641,7 +641,7 @@ private: /** DISubprogram corresponding to this function (used for debugging info). */ - llvm::DISubprogram diFunction; + llvm::DISubprogram diSubprogram; /** These correspond to the current set of nested scopes in the function. */ From da690acce5bd3ea9c6c3ef70ce7dc87f72d69c4f Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 25 Apr 2012 14:27:33 -1000 Subject: [PATCH 106/173] Fix build with LLVM 3.0 --- ctx.cpp | 5 ++++- module.cpp | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ctx.cpp b/ctx.cpp index 1b61fdac..9d9f04e1 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -330,7 +330,10 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, mangledName, diFile, firstLine, diSubprogramType, isStatic, true, /* is defn */ - firstLine, flags, +#ifndef LLVM_3_0 + firstLine, +#endif // !LLVM_3_0 + flags, isOptimized, llvmFunction); Assert(diSubprogram.Verify()); diff --git a/module.cpp b/module.cpp index 740068c8..815a24cd 100644 --- a/module.cpp +++ b/module.cpp @@ -117,6 +117,7 @@ lStripUnusedDebugInfo(llvm::Module *module) { if (g->generateDebuggingSymbols == false) return; +#ifndef LLVM_3_0 // loop over the compile units that contributed to the final module if (llvm::NamedMDNode *cuNodes = module->getNamedMetadata("llvm.dbg.cu")) { for (unsigned i = 0, ie = cuNodes->getNumOperands(); i != ie; ++i) { @@ -193,6 +194,7 @@ lStripUnusedDebugInfo(llvm::Module *module) { } for (int i = 0; i < (int)toErase.size(); ++i) module->eraseNamedMetadata(toErase[i]); +#endif // !LLVM_3_0 // Wrap up by running the LLVM pass to remove anything left that's // unused. From 3e74d1c544648bf5ee9774a1c8afcd7b84b28bdd Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 25 Apr 2012 17:15:20 -1000 Subject: [PATCH 107/173] Fix documentation bug with typedef. --- docs/ispc.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 9edd7325..a6cc24b7 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -1505,13 +1505,17 @@ Defining New Names For Types The ``typedef`` keyword can be used to name types: :: + + typedef int64 BigInt; + typedef float Float3[3]; - typedef Float3 float[3]; +Following C's syntax, the code above defines ``BigInt`` to have ``int64`` +type and ``Float3`` to have ``float[3]`` type. -``typedef`` doesn't create a new type: it just provides an alternative name -for an existing type. Thus, in the above example, it is legal to pass a -value with ``float[3]`` type to a function that has been declared to take a -``Float3`` parameter. +Also as in C, ``typedef`` doesn't create a new type: it just provides an +alternative name for an existing type. Thus, in the above example, it is +legal to pass a value with ``float[3]`` type to a function that has been +declared to take a ``Float3`` parameter. Pointer Types From 0e2b315ded39979c2a53650facc7105effc5e4d9 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 27 Apr 2012 09:35:37 -1000 Subject: [PATCH 108/173] Add FAQ about foreach code generation. (i.e. "why's there that extra stuff at the end and what can I do about it if it's not necessary?) Issue #231. --- docs/faq.rst | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index a3517bea..ff959085 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -34,6 +34,7 @@ distribution. + `How can a gang of program instances generate variable amounts of output efficiently?`_ + `Is it possible to use ispc for explicit vector programming?`_ + `How can I debug my ispc programs using Valgrind?`_ + + `foreach statements generate more complex assembly than I'd expect; what's going on?`_ Understanding ispc's Output =========================== @@ -693,3 +694,79 @@ you can use ``--target=sse4`` when compiling to run with ``valgrind``. Note that ``valgrind`` does not yet support programs that use the AVX instruction set. +foreach statements generate more complex assembly than I'd expect; what's going on? +----------------------------------------------------------------------------------- + +Given a simple ``foreach`` loop like the following: + +:: + + void foo(uniform float a[], uniform int count) { + foreach (i = 0 ... count) + a[i] *= 2; + } + + +the ``ispc`` compiler generates approximately 40 instructions--why isn't +the generated code simpler? + +There are two main components to the code: one handles +``programCount``-sized chunks of elements of the array, and the other +handles any excess elements at the end of the array that don't completely +fill a gang. The code for the main loop is essentially what one would +expect: a vector of values are laoded from the array, the multiply is done, +and the result is stored. + +:: + + LBB0_2: ## %foreach_full_body + movslq %edx, %rdx + vmovups (%rdi,%rdx), %ymm1 + vmulps %ymm0, %ymm1, %ymm1 + vmovups %ymm1, (%rdi,%rdx) + addl $32, %edx + addl $8, %eax + cmpl %ecx, %eax + jl LBB0_2 + + +Then, there is a sequence of instructions that handles any additional +elements at the end of the array. (These instructions don't execute if +there aren't any left-over values to process, but they do lengthen the +amount of generated code.) + +:: + + ## BB#4: ## %partial_inner_only + vmovd %eax, %xmm0 + vinsertf128 $1, %xmm0, %ymm0, %ymm0 + vpermilps $0, %ymm0, %ymm0 ## ymm0 = ymm0[0,0,0,0,4,4,4,4] + vextractf128 $1, %ymm0, %xmm3 + vmovd %esi, %xmm2 + vmovaps LCPI0_1(%rip), %ymm1 + vextractf128 $1, %ymm1, %xmm4 + vpaddd %xmm4, %xmm3, %xmm3 + # .... + vmulps LCPI0_0(%rip), %ymm1, %ymm1 + vmaskmovps %ymm1, %ymm0, (%rdi,%rax) + + +If you know that the number of elements to be processed will always be an +exact multiple of the 8, 16, etc., then adding a simple assignment to +``count`` like the one below gives the compiler enough information to be +able to eliminate the code for the additional array elements. + +:: + + void foo(uniform float a[], uniform int count) { + // This assignment doesn't change the value of count + // if it's a multiple of 16, but it gives the compiler + // insight into this fact, allowing for simpler code to + // be generated for the foreach loop. + count = (count & ~(16-1)); + foreach (i = 0 ... count) + a[i] *= 2; + } + +With this new version of ``foo()``, only the code for the first loop above +is generated. From 0c5d7ff8f2d3447710ae6a5578745385a2c3bf68 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 27 Apr 2012 10:03:19 -1000 Subject: [PATCH 109/173] Add rygorous's float->srgb8 conversion routine to the stdlib. Issue #230 --- docs/ispc.rst | 17 +++++++ stdlib.ispc | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+) diff --git a/docs/ispc.rst b/docs/ispc.rst index a6cc24b7..a5ada9d9 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -147,6 +147,7 @@ Contents: * `Converting Between Array-of-Structures and Structure-of-Arrays Layout`_ * `Conversions To and From Half-Precision Floats`_ + * `Converting to sRGB8`_ + `Systems Programming Support`_ @@ -3691,6 +3692,22 @@ precise. uniform int16 float_to_half_fast(uniform float f) +Converting to sRGB8 +------------------- + +The sRGB color space is used in many applications in graphics and imaging; +see the `Wikipedia page on sRGB`_ for more information. The ``ispc`` +standard library provides two functions for converting floating-point color +values to 8-bit values in the sRGB space. + +.. _Wikipedia page on sRGB: http://en.wikipedia.org/wiki/SRGB + +:: + + int float_to_srgb8(float v) + uniform int float_to_srgb8(uniform float v) + + Systems Programming Support --------------------------- diff --git a/stdlib.ispc b/stdlib.ispc index fd0df7ce..25871616 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -3829,6 +3829,133 @@ static inline int16 float_to_half_fast(float f) { } } +/////////////////////////////////////////////////////////////////////////// +// float -> srgb8 + +// https://gist.github.com/2246678, from Fabian "rygorous" Giesen. +// +// The basic ideas are still the same, only this time, we squeeze +// everything into the table, even the linear part of the range; since we +// are approximating the function as piecewise linear anyway, this is +// fairly easy. +// +// In the exact version of the conversion, any value that produces an +// output float less than 0.5 will be rounded to an integer of +// zero. Inverting the linear part of the transform, we get: +// +// log2(0.5 / (255 * 12.92)) =~ -12.686 +// +// which in turn means that any value smaller than about 2^(-12.687) will +// return 0. What this means is that we can adapt the clamping code to +// just clamp to [2^(-13), 1-eps] and we're covered. This means our table +// needs to cover a range of 13 different exponents from -13 to -1. +// +// The table lookup, storage and interpolation works exactly the same way +// as in the code above. +// +// Max error for the whole function (integer-rounded result minus "exact" +// value, as computed in floats using the official formula): 0.544403 at +// 0x3e9f8000 + +__declspec(safe) +static inline int +float_to_srgb8(float in) +{ + static const uniform unsigned int table[104] = { + 0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, + 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d, + 0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, + 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a, + 0x010e0033, 0x01280033, 0x01410033, 0x015b0033, + 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033, + 0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, + 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067, + 0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, + 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5, + 0x06970158, 0x07420142, 0x07e30130, 0x087b0120, + 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2, + 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, + 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143, + 0x11070264, 0x1238023e, 0x1357021d, 0x14660201, + 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af, + 0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, + 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240, + 0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, + 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300, + 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, + 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401, + 0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, + 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559, + 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, + 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723, + }; + + static const uniform unsigned int almost_one = 0x3f7fffff; + + // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively. + in = max(in, 0.0f); + in = min(in, floatbits(almost_one)); + + // Do the table lookup and unpack bias, scale + unsigned int tab = table[(intbits(in) - 0x39000000u) >> 20]; + unsigned int bias = (tab >> 16) << 9; + unsigned int scale = tab & 0xffff; + + // Grab next-highest mantissa bits and perform linear interpolation + unsigned int t = (intbits(in) >> 12) & 0xff; + return (bias + scale*t) >> 16; +} + + +__declspec(safe) +static inline uniform int +float_to_srgb8(uniform float in) +{ + static const uniform unsigned int table[104] = { + 0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, + 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d, + 0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, + 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a, + 0x010e0033, 0x01280033, 0x01410033, 0x015b0033, + 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033, + 0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, + 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067, + 0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, + 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5, + 0x06970158, 0x07420142, 0x07e30130, 0x087b0120, + 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2, + 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, + 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143, + 0x11070264, 0x1238023e, 0x1357021d, 0x14660201, + 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af, + 0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, + 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240, + 0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, + 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300, + 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, + 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401, + 0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, + 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559, + 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, + 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723, + }; + + static const uniform unsigned int almost_one = 0x3f7fffff; + + // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively. + in = max(in, 0.0f); + in = min(in, floatbits(almost_one)); + + // Do the table lookup and unpack bias, scale + uniform unsigned int tab = table[(intbits(in) - 0x39000000u) >> 20]; + uniform unsigned int bias = (tab >> 16) << 9; + uniform unsigned int scale = tab & 0xffff; + + // Grab next-highest mantissa bits and perform linear interpolation + uniform unsigned int t = (intbits(in) >> 12) & 0xff; + return (bias + scale*t) >> 16; +} + /////////////////////////////////////////////////////////////////////////// // RNG stuff From 27b62781ccf8bf3e06aec3b998cb8d0117975e6b Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sat, 28 Apr 2012 13:06:29 -1000 Subject: [PATCH 110/173] Fix bug in lStripUnusedDebugInfo(). This was causing an assert to hit in llvm's DwarfDebug.cpp. --- module.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/module.cpp b/module.cpp index 815a24cd..436791e6 100644 --- a/module.cpp +++ b/module.cpp @@ -177,9 +177,11 @@ lStripUnusedDebugInfo(llvm::Module *module) { // And now we can go and stuff it into the node with some // confidence... - llvm::DIArray usedSubprogramsArray = + llvm::Value *usedSubprogramsArray = m->diBuilder->getOrCreateArray(llvm::ArrayRef(usedSubprograms)); - cuNode->replaceOperandWith(12, usedSubprogramsArray); + llvm::MDNode *replNode = + llvm::MDNode::get(*g->ctx, llvm::ArrayRef(usedSubprogramsArray)); + cuNode->replaceOperandWith(12, replNode); } } From a1a43cdfe07219f7185d9c809545600ef50c58d5 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sat, 28 Apr 2012 13:47:31 -1000 Subject: [PATCH 111/173] Fix bug so that programIndex (et al.) are available in the debugger. It's now possible to successfully print out the value of programIndex, programCount, etc., in the debugger. The issue was that they were defined as having InternalLinkage, which meant that DCE removed them at the end of compilation. Now they're declared to have WeakODRLinkage, which ensures that one copy survives (but there aren't multiply-defined symbols when compiling multiple files.) --- builtins.cpp | 60 +++++++++++++++++++++++++++++++++++++++++----------- ctx.cpp | 23 -------------------- func.cpp | 2 +- 3 files changed, 49 insertions(+), 36 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index 405c8290..1682db9a 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -637,16 +637,36 @@ AddBitcodeToModule(const unsigned char *bitcode, int length, static void lDefineConstantInt(const char *name, int val, llvm::Module *module, SymbolTable *symbolTable) { - Symbol *pw = + Symbol *sym = new Symbol(name, SourcePos(), AtomicType::UniformInt32->GetAsConstType(), SC_STATIC); - pw->constValue = new ConstExpr(pw->type, val, SourcePos()); + sym->constValue = new ConstExpr(sym->type, val, SourcePos()); llvm::Type *ltype = LLVMTypes::Int32Type; llvm::Constant *linit = LLVMInt32(val); - pw->storagePtr = new llvm::GlobalVariable(*module, ltype, true, - llvm::GlobalValue::InternalLinkage, - linit, pw->name.c_str()); - symbolTable->AddVariable(pw); + // Use WeakODRLinkage rather than InternalLinkage so that a definition + // survives even if it's not used in the module, so that the symbol is + // there in the debugger. + sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true, + llvm::GlobalValue::WeakODRLinkage, + linit, name); + symbolTable->AddVariable(sym); + + if (m->diBuilder != NULL) { + llvm::DIFile file; + llvm::DIType diType = sym->type->GetDIType(file); + Assert(diType.Verify()); + // FIXME? DWARF says that this (and programIndex below) should + // have the DW_AT_artifical attribute. It's not clear if this + // matters for anything though. + llvm::DIGlobalVariable var = + m->diBuilder->createGlobalVariable(name, + file, + 0 /* line */, + diType, + true /* static */, + sym->storagePtr); + Assert(var.Verify()); + } } @@ -672,21 +692,37 @@ lDefineConstantIntFunc(const char *name, int val, llvm::Module *module, static void lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) { - Symbol *pidx = + Symbol *sym = new Symbol("programIndex", SourcePos(), AtomicType::VaryingInt32->GetAsConstType(), SC_STATIC); int pi[ISPC_MAX_NVEC]; for (int i = 0; i < g->target.vectorWidth; ++i) pi[i] = i; - pidx->constValue = new ConstExpr(pidx->type, pi, SourcePos()); + sym->constValue = new ConstExpr(sym->type, pi, SourcePos()); llvm::Type *ltype = LLVMTypes::Int32VectorType; llvm::Constant *linit = LLVMInt32Vector(pi); - pidx->storagePtr = new llvm::GlobalVariable(*module, ltype, true, - llvm::GlobalValue::InternalLinkage, linit, - pidx->name.c_str()); - symbolTable->AddVariable(pidx); + // See comment in lDefineConstantInt() for why WeakODRLinkage is used here + sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true, + llvm::GlobalValue::WeakODRLinkage, + linit, + sym->name.c_str()); + symbolTable->AddVariable(sym); + + if (m->diBuilder != NULL) { + llvm::DIFile file; + llvm::DIType diType = sym->type->GetDIType(file); + Assert(diType.Verify()); + llvm::DIGlobalVariable var = + m->diBuilder->createGlobalVariable(sym->name.c_str(), + file, + 0 /* line */, + diType, + false /* static */, + sym->storagePtr); + Assert(var.Verify()); + } } diff --git a/ctx.cpp b/ctx.cpp index 9d9f04e1..1d0c834b 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -339,29 +339,6 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, /* And start a scope representing the initial function scope */ StartScope(); - - llvm::DIFile file = funcStartPos.GetDIFile(); - Symbol *programIndexSymbol = m->symbolTable->LookupVariable("programIndex"); - Assert(programIndexSymbol && programIndexSymbol->storagePtr); - llvm::DIGlobalVariable var = - m->diBuilder->createGlobalVariable(programIndexSymbol->name, - file, - funcStartPos.first_line, - programIndexSymbol->type->GetDIType(file), - true /* static */, - programIndexSymbol->storagePtr); - Assert(var.Verify()); - - Symbol *programCountSymbol = m->symbolTable->LookupVariable("programCount"); - Assert(programCountSymbol); - var = - m->diBuilder->createGlobalVariable(programCountSymbol->name, - file, - funcStartPos.first_line, - programCountSymbol->type->GetDIType(file), - true /* static */, - programCountSymbol->storagePtr); - Assert(var.Verify()); } } diff --git a/func.cpp b/func.cpp index 9fd31c7d..be6cf958 100644 --- a/func.cpp +++ b/func.cpp @@ -198,7 +198,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, // value maskSymbol->storagePtr = ctx->GetFullMaskPointer(); - // add debugging info for __mask, programIndex, ... + // add debugging info for __mask maskSymbol->pos = firstStmtPos; ctx->EmitVariableDebugInfo(maskSymbol); From c4b1d79c5c25345ea6c696ff56c88e8612f08ac5 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sat, 28 Apr 2012 20:28:39 -0700 Subject: [PATCH 112/173] When a function is defined, set its symbol's position to the code position. Before, if the function was declared before being defined, then the symbol's SourcePos would be left set to the position of the declaration. This ended up getting the debugging symbols mixed up in this case, which was undesirable. --- module.cpp | 4 +++- parse.yy | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/module.cpp b/module.cpp index 436791e6..17df7b86 100644 --- a/module.cpp +++ b/module.cpp @@ -792,11 +792,13 @@ void Module::AddFunctionDefinition(const std::string &name, const FunctionType *type, Stmt *code) { Symbol *sym = symbolTable->LookupFunction(name.c_str(), type); - if (sym == NULL) { + if (sym == NULL || code == NULL) { Assert(m->errorCount > 0); return; } + sym->pos = code->pos; + // FIXME: because we encode the parameter names in the function type, // we need to override the function type here in case the function had // earlier been declared with anonymous parameter names but is now diff --git a/parse.yy b/parse.yy index c4db2fa9..f7a468ad 100644 --- a/parse.yy +++ b/parse.yy @@ -1862,8 +1862,11 @@ function_definition dynamic_cast($2->type); if (funcType == NULL) Assert(m->errorCount > 0); - else - m->AddFunctionDefinition($2->name, funcType, $4); + else { + Stmt *code = $4; + if (code == NULL) code = new StmtList(@4); + m->AddFunctionDefinition($2->name, funcType, code); + } } m->symbolTable->PopScope(); // push in lAddFunctionParams(); } From ee1fe3aa9fb1aa9f9d2ca21ee70d3f7e6438f341 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 3 May 2012 08:25:25 -0700 Subject: [PATCH 113/173] Update build to handle existence of LLVM 3.2 dev branch. We now compile with LLVM 3.0, 3.1, and 3.2svn. --- Makefile | 12 +++---- cbackend.cpp | 44 ++++++++++++------------- ctx.cpp | 12 +++---- ispc.cpp | 91 +++++++++++++--------------------------------------- ispc.h | 4 +-- llvmutil.cpp | 22 ++++++------- main.cpp | 4 ++- module.cpp | 11 ++----- opt.cpp | 20 ++++++------ 9 files changed, 85 insertions(+), 135 deletions(-) diff --git a/Makefile b/Makefile index e9422564..ca55a734 100644 --- a/Makefile +++ b/Makefile @@ -19,10 +19,12 @@ else endif ARCH_TYPE = $(shell arch) -ifeq ($(shell $(LLVM_CONFIG) --version), 3.1svn) +ifeq ($(shell $(LLVM_CONFIG) --version), 3.0) + LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs) +else LLVM_LIBS=-lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker \ -lLLVMArchive -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo \ - -lLLVMBitWriter -lLLVMTableGen \ + -lLLVMBitWriter -lLLVMTableGen \ -lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMSelectionDAG \ -lLLVMAsmPrinter -lLLVMX86AsmParser -lLLVMX86Desc -lLLVMX86Info \ -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCDisassembler -lLLVMMCParser \ @@ -30,15 +32,13 @@ ifeq ($(shell $(LLVM_CONFIG) --version), 3.1svn) -lLLVMipa -lLLVMAnalysis -lLLVMMCJIT -lLLVMRuntimeDyld \ -lLLVMExecutionEngine -lLLVMTarget -lLLVMMC -lLLVMObject -lLLVMCore \ -lLLVMSupport -else - LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs) endif CLANG=clang CLANG_LIBS = -lclangFrontend -lclangDriver \ -lclangSerialization -lclangParse -lclangSema \ -lclangAnalysis -lclangAST -lclangLex -lclangBasic -ifeq ($(shell $(LLVM_CONFIG) --version), 3.1svn) +ifneq ($(shell $(LLVM_CONFIG) --version), 3.0) CLANG_LIBS += -lclangEdit endif @@ -54,7 +54,7 @@ ifeq ($(ARCH_OS2),Msys) endif LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags) -LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed s/\\./_/) +LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//) LLVM_VERSION_DEF=-D$(LLVM_VERSION) BUILD_DATE=$(shell date +%Y%m%d) diff --git a/cbackend.cpp b/cbackend.cpp index 0522fd85..e932dff2 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -361,7 +361,7 @@ namespace { bool printConstExprCast(const ConstantExpr *CE, bool Static); void printConstantArray(ConstantArray *CPA, bool Static); void printConstantVector(ConstantVector *CV, bool Static); -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 void printConstantDataSequential(ConstantDataSequential *CDS, bool Static); #endif @@ -438,11 +438,11 @@ namespace { void visitInvokeInst(InvokeInst &I) { llvm_unreachable("Lowerinvoke pass didn't work!"); } -#if !defined(LLVM_3_1) && !defined(LLVM_3_1svn) +#ifdef LLVM_3_0 void visitUnwindInst(UnwindInst &I) { llvm_unreachable("Lowerinvoke pass didn't work!"); } -#endif // !LLVM_3_1svn +#endif // LLVM_3_0 void visitResumeInst(ResumeInst &I) { llvm_unreachable("DwarfEHPrepare pass didn't work!"); } @@ -802,7 +802,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty, } void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { -#ifndef LLVM_3_1svn +#ifdef LLVM_3_0 Type *ETy = CPA->getType()->getElementType(); // MMP: this looks like a bug: both sides of the || are the same bool isString = ETy == Type::getInt8Ty(CPA->getContext()); @@ -855,7 +855,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { Out << "\""; return; } -#endif // !LLVM_3_1 +#endif // LLVM_3_0 printConstant(cast(CPA->getOperand(0)), Static); for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) { @@ -872,7 +872,7 @@ void CWriter::printConstantVector(ConstantVector *CP, bool Static) { } } -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 void CWriter::printConstantDataSequential(ConstantDataSequential *CDS, bool Static) { // As a special case, print the array as a string if it is an array of @@ -929,9 +929,9 @@ void CWriter::printConstantDataSequential(ConstantDataSequential *CDS, } } } -#endif // LLVM_3_1svn +#endif // !LLVM_3_0 -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 static inline std::string ftostr(const APFloat& V) { std::string Buf; if (&V.getSemantics() == &APFloat::IEEEdouble) { @@ -943,7 +943,7 @@ static inline std::string ftostr(const APFloat& V) { } return ""; // error } -#endif // LLVM_3_1svn +#endif // !LLVM_3_0 // isFPCSafeToPrint - Returns true if we may assume that CFP may be written out // textually as a double (rather than as a reference to a stack-allocated @@ -1432,11 +1432,11 @@ void CWriter::printConstant(Constant *CPV, bool Static) { } if (ConstantArray *CA = dyn_cast(CPV)) { printConstantArray(CA, Static); -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 } else if (ConstantDataSequential *CDS = dyn_cast(CPV)) { printConstantDataSequential(CDS, Static); -#endif // LLVM_3_1svn +#endif // !LLVM_3_0 } else { assert(isa(CPV) || isa(CPV)); if (AT->getNumElements()) { @@ -1481,7 +1481,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << ")"; } } -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 else if (ConstantDataVector *CDV = dyn_cast(CPV)) { llvm::Constant *splatValue = CDV->getSplatValue(); if (splatValue != NULL && smearFunc != NULL) { @@ -1496,7 +1496,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << ")"; } } -#endif +#endif // !LLVM_3_0 else { assert(isa(CPV)); Constant *CZ = Constant::getNullValue(VT->getElementType()); @@ -2898,17 +2898,17 @@ void CWriter::visitSwitchInst(SwitchInst &SI) { printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); Out << ";\n"; -#ifdef LLVM_3_1svn - for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - ConstantInt* CaseVal = i.getCaseValue(); - BasicBlock* Succ = i.getCaseSuccessor(); -#else +#ifdef LLVM_3_0 // Skip the first item since that's the default case. unsigned NumCases = SI.getNumCases(); for (unsigned i = 1; i < NumCases; ++i) { ConstantInt* CaseVal = SI.getCaseValue(i); BasicBlock* Succ = SI.getSuccessor(i); -#endif // LLVM_3_1svn +#else + for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { + ConstantInt* CaseVal = i.getCaseValue(); + BasicBlock* Succ = i.getCaseSuccessor(); +#endif // !LLVM_3_0 Out << " case "; writeOperand(CaseVal); Out << ":\n"; @@ -3806,10 +3806,10 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { const MCAsmInfo *TargetAsm; std::string Triple = TheModule->getTargetTriple(); if (Triple.empty()) -#if defined(LLVM_3_1) || defined(LLVM_3_1svn) - Triple = llvm::sys::getDefaultTargetTriple(); -#else +#ifdef LLVM_3_0 Triple = llvm::sys::getHostTriple(); +#else + Triple = llvm::sys::getDefaultTargetTriple(); #endif std::string E; diff --git a/ctx.cpp b/ctx.cpp index 1d0c834b..5d9ed434 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1314,10 +1314,10 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) { llvm::Value * FunctionEmitContext::GetStringPtr(const std::string &str) { -#ifdef LLVM_3_1svn - llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str); -#else +#ifdef LLVM_3_0 llvm::Constant *lstr = llvm::ConstantArray::get(*g->ctx, str); +#else + llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str); #endif llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage; llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(), @@ -1368,10 +1368,10 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) { static llvm::Value * lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) { -#ifdef LLVM_3_1svn - llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s); -#else +#ifdef LLVM_3_0 llvm::Constant *sConstant = llvm::ConstantArray::get(*g->ctx, s); +#else + llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s); #endif llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(), true /* const */, diff --git a/ispc.cpp b/ispc.cpp index 2fcf27ff..bd832825 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -54,14 +54,8 @@ #include #include #include -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) - #include - #include -#else - #include - #include - #include -#endif +#include +#include #include Globals *g; @@ -114,10 +108,7 @@ lGetSystemISA() { static const char *supportedCPUs[] = { - "atom", "penryn", "core2", "corei7", -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) - "corei7-avx" -#endif + "atom", "penryn", "core2", "corei7", "corei7-avx" }; @@ -128,14 +119,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, if (cpu != NULL) { // If a CPU was specified explicitly, try to pick the best // possible ISA based on that. -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) if (!strcmp(cpu, "sandybridge") || !strcmp(cpu, "corei7-avx")) isa = "avx"; - else -#endif - if (!strcmp(cpu, "corei7") || - !strcmp(cpu, "penryn")) + else if (!strcmp(cpu, "corei7") || + !strcmp(cpu, "penryn")) isa = "sse4"; else isa = "sse2"; @@ -277,7 +265,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->allOffMaskIsSafe = false; t->maskBitCount = 32; } -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) else if (!strcasecmp(isa, "avx")) { t->isa = Target::AVX; t->nativeVectorWidth = 8; @@ -296,8 +283,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->allOffMaskIsSafe = false; t->maskBitCount = 32; } -#endif // LLVM 3.0+ -#if defined(LLVM_3_1svn) +#ifndef LLVM_3_0 else if (!strcasecmp(isa, "avx2")) { t->isa = Target::AVX2; t->nativeVectorWidth = 8; @@ -316,7 +302,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->allOffMaskIsSafe = false; t->maskBitCount = 32; } -#endif // LLVM 3.1 +#endif // !LLVM_3_0 else { fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n", isa, SupportedTargetISAs()); @@ -354,13 +340,10 @@ Target::SupportedTargetArchs() { const char * Target::SupportedTargetISAs() { - return "sse2, sse2-x2, sse4, sse4-x2" -#ifndef LLVM_2_9 - ", avx, avx-x2" -#endif // !LLVM_2_9 -#ifdef LLVM_3_1svn + return "sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2" +#ifndef LLVM_3_0 ", avx2, avx2-x2" -#endif // LLVM_3_1svn +#endif // !LLVM_3_0 ", generic-4, generic-8, generic-16, generic-1"; } @@ -369,10 +352,10 @@ std::string Target::GetTripleString() const { llvm::Triple triple; // Start with the host triple as the default -#if defined(LLVM_3_1) || defined(LLVM_3_1svn) - triple.setTriple(llvm::sys::getDefaultTargetTriple()); -#else +#ifdef LLVM_3_0 triple.setTriple(llvm::sys::getHostTriple()); +#else + triple.setTriple(llvm::sys::getDefaultTargetTriple()); #endif // And override the arch in the host triple based on what the user @@ -398,37 +381,17 @@ Target::GetTargetMachine() const { llvm::Reloc::Model relocModel = generatePIC ? llvm::Reloc::PIC_ : llvm::Reloc::Default; -#if defined(LLVM_3_1svn) - std::string featuresString = attributes; - llvm::TargetOptions options; -#if 0 - // This was breaking e.g. round() on SSE2, where the code we want to - // run wants to do: - // x += 0x1.0p23f; - // x -= 0x1.0p23f; - // But then LLVM was optimizing this away... - if (g->opt.fastMath == true) - options.UnsafeFPMath = 1; -#endif - llvm::TargetMachine *targetMachine = - target->createTargetMachine(triple, cpu, featuresString, options, - relocModel); -#elif defined(LLVM_3_0) +#ifdef LLVM_3_0 std::string featuresString = attributes; llvm::TargetMachine *targetMachine = target->createTargetMachine(triple, cpu, featuresString, relocModel); -#else // LLVM 2.9 -#ifdef ISPC_IS_APPLE - relocModel = llvm::Reloc::PIC_; -#endif // ISPC_IS_APPLE - std::string featuresString = cpu + std::string(",") + attributes; +#else + std::string featuresString = attributes; + llvm::TargetOptions options; llvm::TargetMachine *targetMachine = - target->createTargetMachine(triple, featuresString); -#ifndef ISPC_IS_WINDOWS - targetMachine->setRelocationModel(relocModel); -#endif // !ISPC_IS_WINDOWS -#endif // LLVM_2_9 - + target->createTargetMachine(triple, cpu, featuresString, options, + relocModel); +#endif // !LLVM_3_0 Assert(targetMachine != NULL); targetMachine->setAsmVerbosityDefault(true); @@ -498,16 +461,11 @@ Target::SizeOf(llvm::Type *type, llvm::Value *index[1] = { LLVMInt32(1) }; llvm::PointerType *ptrType = llvm::PointerType::get(type, 0); llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType); -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef arrayRef(&index[0], &index[1]); llvm::Instruction *gep = llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep", insertAtEnd); -#else - llvm::Instruction *gep = - llvm::GetElementPtrInst::Create(voidPtr, &index[0], &index[1], - "sizeof_gep", insertAtEnd); -#endif + if (is32Bit || g->opt.force32BitAddressing) return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type, "sizeof_int", insertAtEnd); @@ -536,16 +494,11 @@ Target::StructOffset(llvm::Type *type, int element, llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) }; llvm::PointerType *ptrType = llvm::PointerType::get(type, 0); llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType); -#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) llvm::ArrayRef arrayRef(&indices[0], &indices[2]); llvm::Instruction *gep = llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep", insertAtEnd); -#else - llvm::Instruction *gep = - llvm::GetElementPtrInst::Create(voidPtr, &indices[0], &indices[2], - "offset_gep", insertAtEnd); -#endif + if (is32Bit || g->opt.force32BitAddressing) return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type, "offset_int", insertAtEnd); diff --git a/ispc.h b/ispc.h index c4c5c405..d0d0c3f7 100644 --- a/ispc.h +++ b/ispc.h @@ -40,8 +40,8 @@ #define ISPC_VERSION "1.2.3dev" -#if !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn) -#error "Only LLVM 3.0, and the 3.1 development branch are supported" +#if !defined(LLVM_3_0) && !defined(LLVM_3_1) && !defined(LLVM_3_2) +#error "Only LLVM 3.0, 3.1, and the 3.2 development branch are supported" #endif #if defined(_WIN32) || defined(_WIN64) diff --git a/llvmutil.cpp b/llvmutil.cpp index e21a0b12..cc8ac5af 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -657,7 +657,7 @@ LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) { // Deal with the fact that LLVM3.1 and previous versions have different // representations for vectors of constant ints... -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 llvm::ConstantDataVector *cv = llvm::dyn_cast(v); if (cv == NULL) return false; @@ -678,7 +678,7 @@ LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) { ret[i] = ci->getSExtValue(); } return true; -#endif // LLVM_3_1svn +#endif // !LLVM_3_0 } @@ -947,7 +947,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength, if (cv != NULL) return (cv->getSplatValue() != NULL); -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 llvm::ConstantDataVector *cdv = llvm::dyn_cast(v); if (cdv != NULL) return (cdv->getSplatValue() != NULL); @@ -1102,7 +1102,7 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride, */ static bool lVectorIsLinearConstantInts( -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 llvm::ConstantDataVector *cv, #else llvm::ConstantVector *cv, @@ -1111,7 +1111,7 @@ lVectorIsLinearConstantInts( int stride) { // Flatten the vector out into the elements array llvm::SmallVector elements; -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 for (int i = 0; i < (int)cv->getNumElements(); ++i) elements.push_back(cv->getElementAsConstant(i)); #else @@ -1152,7 +1152,7 @@ lCheckMulForLinear(llvm::Value *op0, llvm::Value *op1, int vectorLength, int stride, std::vector &seenPhis) { // Is the first operand a constant integer value splatted across all of // the lanes? -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 llvm::ConstantDataVector *cv = llvm::dyn_cast(op0); #else llvm::ConstantVector *cv = llvm::dyn_cast(op0); @@ -1226,7 +1226,7 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride, std::vector &seenPhis) { // First try the easy case: if the values are all just constant // integers and have the expected stride between them, then we're done. -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 llvm::ConstantDataVector *cv = llvm::dyn_cast(v); #else llvm::ConstantVector *cv = llvm::dyn_cast(v); @@ -1403,19 +1403,19 @@ lExtractFirstVectorElement(llvm::Value *v, return llvm::ConstantInt::get(vt->getElementType(), 0); } if (llvm::ConstantVector *cv = llvm::dyn_cast(v)) { -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 return cv->getOperand(0); #else llvm::SmallVector elements; cv->getVectorElements(elements); return elements[0]; -#endif // LLVM_3_1 +#endif // !LLVM_3_0 } -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 if (llvm::ConstantDataVector *cdv = llvm::dyn_cast(v)) return cdv->getElementAsConstant(0); -#endif // LLVM_3_1 +#endif // !LLVM_3_0 // Otherwise, all that we should have at this point is an instruction // of some sort diff --git a/main.cpp b/main.cpp index a98a35bb..417c1c3c 100644 --- a/main.cpp +++ b/main.cpp @@ -62,8 +62,10 @@ lPrintVersion() { ISPC_VERSION, BUILD_VERSION, BUILD_DATE, #if defined(LLVM_3_0) "3.0" -#elif defined(LLVM_3_1) || defined(LLVM_3_1svn) +#elif defined(LLVM_3_1) "3.1" +#elif defined(LLVM_3_2) + "3.2" #else #error "Unhandled LLVM version" #endif diff --git a/module.cpp b/module.cpp index 17df7b86..41eeb412 100644 --- a/module.cpp +++ b/module.cpp @@ -271,11 +271,6 @@ extern void yy_delete_buffer(YY_BUFFER_STATE); int Module::CompileFile() { -#ifndef LLVM_3_1svn - if (g->opt.fastMath == true) - llvm::UnsafeFPMath = true; -#endif // !LLVM_3_1svn - extern void ParserInit(); ParserInit(); @@ -1286,10 +1281,10 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre clang::TargetOptions &options = inst.getTargetOpts(); llvm::Triple triple(module->getTargetTriple()); if (triple.getTriple().empty()) { -#if defined(LLVM_3_1) || defined(LLVM_3_1svn) - triple.setTriple(llvm::sys::getDefaultTargetTriple()); -#else +#ifdef LLVM_3_0 triple.setTriple(llvm::sys::getHostTriple()); +#else + triple.setTriple(llvm::sys::getDefaultTargetTriple()); #endif } options.Triple = triple.getTriple(); diff --git a/opt.cpp b/opt.cpp index 644a83dc..34cdab0f 100644 --- a/opt.cpp +++ b/opt.cpp @@ -656,7 +656,7 @@ lGetMask(llvm::Value *factor) { "known and all bits on". */ Assert(g->target.vectorWidth < 32); -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 llvm::ConstantDataVector *cdv = llvm::dyn_cast(factor); if (cdv != NULL) { llvm::SmallVector elements; @@ -669,7 +669,7 @@ lGetMask(llvm::Value *factor) { llvm::ConstantVector *cv = llvm::dyn_cast(factor); if (cv != NULL) { llvm::SmallVector elements; - #ifdef LLVM_3_1svn +#ifndef LLVM_3_0 for (int i = 0; i < (int)cv->getNumOperands(); ++i) { llvm::Constant *c = llvm::dyn_cast(cv->getOperand(i)); @@ -945,7 +945,7 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { restart: for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { // vector select wasn't available before 3.1... -#if defined(LLVM_3_1svn) +#ifndef LLVM_3_0 llvm::SelectInst *selectInst = llvm::dyn_cast(&*iter); if (selectInst != NULL && selectInst->getType()->isVectorTy()) { llvm::Value *factor = selectInst->getOperand(0); @@ -966,7 +966,7 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } } -#endif // LLVM_3_1svn +#endif // !LLVM_3_0 llvm::CallInst *callInst = llvm::dyn_cast(&*iter); if (callInst == NULL) @@ -1179,7 +1179,7 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets, // Indexing into global arrays can lead to this form, with // ConstantVectors.. llvm::SmallVector elements; - #ifdef LLVM_3_1svn +#ifndef LLVM_3_0 for (int i = 0; i < (int)cv->getNumOperands(); ++i) { llvm::Constant *c = llvm::dyn_cast(cv->getOperand(i)); @@ -1274,7 +1274,7 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, llvm::Value **variableOffset, llvm::Instruction *insertBefore) { if (llvm::isa(vec) || -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 llvm::isa(vec) || #endif llvm::isa(vec)) { @@ -1399,11 +1399,11 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, *splat, if so). */ static bool lIsIntegerSplat(llvm::Value *v, int *splat) { -#ifdef LLVM_3_1svn +#ifdef LLVM_3_0 + llvm::ConstantVector *cvec = llvm::dyn_cast(v); +#else llvm::ConstantDataVector *cvec = llvm::dyn_cast(v); -#else - llvm::ConstantVector *cvec = llvm::dyn_cast(v); #endif if (cvec == NULL) return false; @@ -1539,7 +1539,7 @@ lExtractUniforms(llvm::Value **vec, llvm::Instruction *insertBefore) { fprintf(stderr, "\n"); if (llvm::isa(*vec) || -#ifdef LLVM_3_1svn +#ifndef LLVM_3_0 llvm::isa(*vec) || #endif llvm::isa(*vec)) From d99bd279e8226add132a30157114fd7b35c7b8a2 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 3 May 2012 11:11:06 -0700 Subject: [PATCH 114/173] Add generic-32 target. --- Makefile | 2 +- builtins.cpp | 7 + builtins/target-generic-32.ll | 33 +++ ispc.cpp | 11 +- ispc.h | 2 +- ispc.vcxproj | 14 ++ opt.cpp | 412 ++++++++++++++++++---------------- run_tests.py | 5 +- 8 files changed, 283 insertions(+), 203 deletions(-) create mode 100644 builtins/target-generic-32.ll diff --git a/Makefile b/Makefile index ca55a734..01746fa4 100644 --- a/Makefile +++ b/Makefile @@ -85,7 +85,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \ HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ opt.h stmt.h sym.h type.h util.h TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \ - generic-16 generic-1 + generic-16 generic-32 generic-1 BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \ builtins/dispatch.ll BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \ diff --git a/builtins.cpp b/builtins.cpp index 1682db9a..b94fa04f 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -847,6 +847,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod builtins_bitcode_generic_16_length, module, symbolTable); break; + case 32: + extern unsigned char builtins_bitcode_generic_32[]; + extern int builtins_bitcode_generic_32_length; + AddBitcodeToModule(builtins_bitcode_generic_32, + builtins_bitcode_generic_32_length, + module, symbolTable); + break; case 1: extern unsigned char builtins_bitcode_generic_1[]; extern int builtins_bitcode_generic_1_length; diff --git a/builtins/target-generic-32.ll b/builtins/target-generic-32.ll new file mode 100644 index 00000000..5f89bcdf --- /dev/null +++ b/builtins/target-generic-32.ll @@ -0,0 +1,33 @@ +;; Copyright (c) 2010-2012, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +define(`WIDTH',`32') +include(`target-generic-common.ll') diff --git a/ispc.cpp b/ispc.cpp index bd832825..3a2134d1 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -257,6 +257,14 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->allOffMaskIsSafe = true; t->maskBitCount = 1; } + else if (!strcasecmp(isa, "generic-32")) { + t->isa = Target::GENERIC; + t->nativeVectorWidth = 32; + t->vectorWidth = 32; + t->maskingIsFree = true; + t->allOffMaskIsSafe = true; + t->maskBitCount = 1; + } else if (!strcasecmp(isa, "generic-1")) { t->isa = Target::GENERIC; t->nativeVectorWidth = 1; @@ -313,6 +321,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, llvm::TargetMachine *targetMachine = t->GetTargetMachine(); const llvm::TargetData *targetData = targetMachine->getTargetData(); t->is32Bit = (targetData->getPointerSize() == 4); + Assert(t->vectorWidth <= ISPC_MAX_NVEC); } return !error; @@ -344,7 +353,7 @@ Target::SupportedTargetISAs() { #ifndef LLVM_3_0 ", avx2, avx2-x2" #endif // !LLVM_3_0 - ", generic-4, generic-8, generic-16, generic-1"; + ", generic-1, generic-4, generic-8, generic-16, generic-32"; } diff --git a/ispc.h b/ispc.h index d0d0c3f7..bb551a6d 100644 --- a/ispc.h +++ b/ispc.h @@ -71,7 +71,7 @@ /** @def ISPC_MAX_NVEC maximum vector size of any of the compliation targets. */ -#define ISPC_MAX_NVEC 16 +#define ISPC_MAX_NVEC 32 // Forward declarations of a number of widely-used LLVM types namespace llvm { diff --git a/ispc.vcxproj b/ispc.vcxproj index 6971ce9a..34ef9373 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -29,6 +29,7 @@ + @@ -264,6 +265,19 @@ Building gen-bitcode-generic-16.cpp + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp + gen-bitcode-generic-32.cpp + builtins\util.m4;builtins\target-generic-common.ll + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp + gen-bitcode-generic-32.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-32.cpp + Building gen-bitcode-generic-32.cpp + + Document diff --git a/opt.cpp b/opt.cpp index 34cdab0f..063be681 100644 --- a/opt.cpp +++ b/opt.cpp @@ -265,6 +265,124 @@ lGEPInst(llvm::Value *ptr, llvm::Value *offset, const char *name, } +/** Given a vector of constant values (int, float, or bool) representing an + execution mask, convert it to a bitvector where the 0th bit corresponds + to the first vector value and so forth. +*/ +static uint32_t +lConstElementsToMask(const llvm::SmallVector &elements) { + Assert(elements.size() <= 32); + + uint32_t mask = 0; + for (unsigned int i = 0; i < elements.size(); ++i) { + llvm::APInt intMaskValue; + // SSE has the "interesting" approach of encoding blending + // masks as . + llvm::ConstantFP *cf = llvm::dyn_cast(elements[i]); + if (cf != NULL) { + llvm::APFloat apf = cf->getValueAPF(); + intMaskValue = apf.bitcastToAPInt(); + } + else { + // Otherwise get it as an int + llvm::ConstantInt *ci = llvm::dyn_cast(elements[i]); + Assert(ci != NULL); // vs return -1 if NULL? + intMaskValue = ci->getValue(); + } + // Is the high-bit set? If so, OR in the appropriate bit in + // the result mask + if (intMaskValue.countLeadingOnes() > 0) + mask |= (1 << i); + } + return mask; +} + + +/** Given an llvm::Value represinting a vector mask, see if the value is a + constant. If so, return true and set *bits to be the integer mask + found by taking the high bits of the mask values in turn and + concatenating them into a single integer. In other words, given the + 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, we have 0b1001 = 9. + */ +static bool +lGetMask(llvm::Value *factor, uint32_t *mask) { +#ifndef LLVM_3_0 + llvm::ConstantDataVector *cdv = llvm::dyn_cast(factor); + if (cdv != NULL) { + llvm::SmallVector elements; + for (int i = 0; i < (int)cdv->getNumElements(); ++i) + elements.push_back(cdv->getElementAsConstant(i)); + *mask = lConstElementsToMask(elements); + return true; + } +#endif + + llvm::ConstantVector *cv = llvm::dyn_cast(factor); + if (cv != NULL) { + llvm::SmallVector elements; +#ifndef LLVM_3_0 + for (int i = 0; i < (int)cv->getNumOperands(); ++i) { + llvm::Constant *c = + llvm::dyn_cast(cv->getOperand(i)); + if (c == NULL) + return NULL; + elements.push_back(c); + } +#else + cv->getVectorElements(elements); +#endif + *mask = lConstElementsToMask(elements); + return true; + } + else if (llvm::isa(factor)) { + *mask = 0; + return true; + } + else { +#if 0 + llvm::ConstantExpr *ce = llvm::dyn_cast(factor); + if (ce != NULL) { + llvm::TargetMachine *targetMachine = g->target.GetTargetMachine(); + const llvm::TargetData *td = targetMachine->getTargetData(); + llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td); + c->dump(); + factor = c; + } + // else we should be able to handle it above... + Assert(!llvm::isa(factor)); +#endif + return false; + } +} + + +enum MaskStatus { ALL_ON, ALL_OFF, MIXED, UNKNOWN }; + +/** Determines if the given mask value is all on, all off, mixed, or + unknown at compile time. +*/ +static MaskStatus +lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) { + uint32_t bits; + if (lGetMask(mask, &bits) == false) + return UNKNOWN; + + if (bits == 0) + return ALL_OFF; + + if (vecWidth == -1) + vecWidth = g->target.vectorWidth; + Assert(vecWidth <= 32); + + for (int i = 0; i < vecWidth; ++i) { + if ((bits & (1ull << i)) == 0) + return MIXED; + } + return ALL_ON; +} + + /////////////////////////////////////////////////////////////////////////// void @@ -559,12 +677,12 @@ private: instruction for this optimization pass. */ struct BlendInstruction { - BlendInstruction(llvm::Function *f, int ao, int o0, int o1, int of) + BlendInstruction(llvm::Function *f, uint32_t ao, int o0, int o1, int of) : function(f), allOnMask(ao), op0(o0), op1(o1), opFactor(of) { } /** Function pointer for the blend instruction */ llvm::Function *function; /** Mask value for an "all on" mask for this instruction */ - int allOnMask; + uint32_t allOnMask; /** The operand number in the llvm CallInst corresponds to the first operand to blend with. */ int op0; @@ -609,99 +727,6 @@ IntrinsicsOpt::IntrinsicsOpt() } -/** Given a vector of constant values (int, float, or bool) representing an - execution mask, convert it to a bitvector where the 0th bit corresponds - to the first vector value and so forth. -*/ -static int -lConstElementsToMask(const llvm::SmallVector &elements) { - Assert(elements.size() <= 32); - - int mask = 0; - for (unsigned int i = 0; i < elements.size(); ++i) { - llvm::APInt intMaskValue; - // SSE has the "interesting" approach of encoding blending - // masks as . - llvm::ConstantFP *cf = llvm::dyn_cast(elements[i]); - if (cf != NULL) { - llvm::APFloat apf = cf->getValueAPF(); - intMaskValue = apf.bitcastToAPInt(); - } - else { - // Otherwise get it as an int - llvm::ConstantInt *ci = llvm::dyn_cast(elements[i]); - Assert(ci != NULL); // vs return -1 if NULL? - intMaskValue = ci->getValue(); - } - // Is the high-bit set? If so, OR in the appropriate bit in - // the result mask - if (intMaskValue.countLeadingOnes() > 0) - mask |= (1 << i); - } - return mask; -} - - -/** Given an llvm::Value represinting a vector mask, see if the value is a - constant. If so, return the integer mask found by taking the high bits - of the mask values in turn and concatenating them into a single integer. - In other words, given the 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, - we have 0b1001 = 9. - */ -static int -lGetMask(llvm::Value *factor) { - /* FIXME: This will break if we ever do 32-wide compilation, in which case - it don't be possible to distinguish between -1 for "don't know" and - "known and all bits on". */ - Assert(g->target.vectorWidth < 32); - -#ifndef LLVM_3_0 - llvm::ConstantDataVector *cdv = llvm::dyn_cast(factor); - if (cdv != NULL) { - llvm::SmallVector elements; - for (int i = 0; i < (int)cdv->getNumElements(); ++i) - elements.push_back(cdv->getElementAsConstant(i)); - return lConstElementsToMask(elements); - } -#endif - - llvm::ConstantVector *cv = llvm::dyn_cast(factor); - if (cv != NULL) { - llvm::SmallVector elements; -#ifndef LLVM_3_0 - for (int i = 0; i < (int)cv->getNumOperands(); ++i) { - llvm::Constant *c = - llvm::dyn_cast(cv->getOperand(i)); - if (c == NULL) - return NULL; - elements.push_back(c); - } -#else - cv->getVectorElements(elements); -#endif - return lConstElementsToMask(elements); - } - else if (llvm::isa(factor)) - return 0; - else { -#if 0 - llvm::ConstantExpr *ce = llvm::dyn_cast(factor); - if (ce != NULL) { - llvm::TargetMachine *targetMachine = g->target.GetTargetMachine(); - const llvm::TargetData *td = targetMachine->getTargetData(); - llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td); - c->dump(); - factor = c; - } - // else we should be able to handle it above... - Assert(!llvm::isa(factor)); -#endif - return -1; - } -} - - /** Given an llvm::Value, return true if we can determine that it's an undefined value. This only makes a weak attempt at chasing this down, only detecting flat-out undef values, and bitcasts of undef values. @@ -779,26 +804,28 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } - int mask = lGetMask(factor); - llvm::Value *value = NULL; - if (mask == 0) - // Mask all off -> replace with the first blend value - value = v[0]; - else if (mask == blend->allOnMask) - // Mask all on -> replace with the second blend value - value = v[1]; + uint32_t mask; + if (lGetMask(factor, &mask) == true) { + llvm::Value *value = NULL; + if (mask == 0) + // Mask all off -> replace with the first blend value + value = v[0]; + else if (mask == blend->allOnMask) + // Mask all on -> replace with the second blend value + value = v[1]; - if (value != NULL) { - llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), - iter, value); - modifiedAny = true; - goto restart; + if (value != NULL) { + llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), + iter, value); + modifiedAny = true; + goto restart; + } } } else if (matchesMaskInstruction(callInst->getCalledFunction())) { llvm::Value *factor = callInst->getArgOperand(0); - int mask = lGetMask(factor); - if (mask != -1) { + uint32_t mask; + if (lGetMask(factor, &mask) == true) { // If the vector-valued mask has a known value, replace it // with the corresponding integer mask from its elements // high bits. @@ -812,71 +839,75 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { else if (callInst->getCalledFunction() == avxMaskedLoad32 || callInst->getCalledFunction() == avxMaskedLoad64) { llvm::Value *factor = callInst->getArgOperand(1); - int mask = lGetMask(factor); - if (mask == 0) { - // nothing being loaded, replace with undef value - llvm::Type *returnType = callInst->getType(); - Assert(llvm::isa(returnType)); - llvm::Value *undefValue = llvm::UndefValue::get(returnType); - llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), - iter, undefValue); - modifiedAny = true; - goto restart; - } - else if (mask == 0xff) { - // all lanes active; replace with a regular load - llvm::Type *returnType = callInst->getType(); - Assert(llvm::isa(returnType)); - // cast the i8 * to the appropriate type - const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast"); - llvm::Value *castPtr = - new llvm::BitCastInst(callInst->getArgOperand(0), - llvm::PointerType::get(returnType, 0), - name, callInst); - lCopyMetadata(castPtr, callInst); - int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; - name = LLVMGetName(callInst->getArgOperand(0), "_load"); - llvm::Instruction *loadInst = - new llvm::LoadInst(castPtr, name, false /* not volatile */, - align, (llvm::Instruction *)NULL); - lCopyMetadata(loadInst, callInst); - llvm::ReplaceInstWithInst(callInst, loadInst); - modifiedAny = true; - goto restart; + uint32_t mask; + if (lGetMask(factor, &mask) == true) { + if (mask == 0) { + // nothing being loaded, replace with undef value + llvm::Type *returnType = callInst->getType(); + Assert(llvm::isa(returnType)); + llvm::Value *undefValue = llvm::UndefValue::get(returnType); + llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), + iter, undefValue); + modifiedAny = true; + goto restart; + } + else if (mask == 0xff) { + // all lanes active; replace with a regular load + llvm::Type *returnType = callInst->getType(); + Assert(llvm::isa(returnType)); + // cast the i8 * to the appropriate type + const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast"); + llvm::Value *castPtr = + new llvm::BitCastInst(callInst->getArgOperand(0), + llvm::PointerType::get(returnType, 0), + name, callInst); + lCopyMetadata(castPtr, callInst); + int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; + name = LLVMGetName(callInst->getArgOperand(0), "_load"); + llvm::Instruction *loadInst = + new llvm::LoadInst(castPtr, name, false /* not volatile */, + align, (llvm::Instruction *)NULL); + lCopyMetadata(loadInst, callInst); + llvm::ReplaceInstWithInst(callInst, loadInst); + modifiedAny = true; + goto restart; + } } } else if (callInst->getCalledFunction() == avxMaskedStore32 || callInst->getCalledFunction() == avxMaskedStore64) { // NOTE: mask is the 2nd parameter, not the 3rd one!! llvm::Value *factor = callInst->getArgOperand(1); - int mask = lGetMask(factor); - if (mask == 0) { - // nothing actually being stored, just remove the inst - callInst->eraseFromParent(); - modifiedAny = true; - goto restart; - } - else if (mask == 0xff) { - // all lanes storing, so replace with a regular store - llvm::Value *rvalue = callInst->getArgOperand(2); - llvm::Type *storeType = rvalue->getType(); - const char *name = LLVMGetName(callInst->getArgOperand(0), - "_ptrcast"); - llvm::Value *castPtr = - new llvm::BitCastInst(callInst->getArgOperand(0), - llvm::PointerType::get(storeType, 0), - name, callInst); - lCopyMetadata(castPtr, callInst); + uint32_t mask; + if (lGetMask(factor, &mask) == true) { + if (mask == 0) { + // nothing actually being stored, just remove the inst + callInst->eraseFromParent(); + modifiedAny = true; + goto restart; + } + else if (mask == 0xff) { + // all lanes storing, so replace with a regular store + llvm::Value *rvalue = callInst->getArgOperand(2); + llvm::Type *storeType = rvalue->getType(); + const char *name = LLVMGetName(callInst->getArgOperand(0), + "_ptrcast"); + llvm::Value *castPtr = + new llvm::BitCastInst(callInst->getArgOperand(0), + llvm::PointerType::get(storeType, 0), + name, callInst); + lCopyMetadata(castPtr, callInst); - llvm::StoreInst *storeInst = - new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL); - int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8; - storeInst->setAlignment(align); - lCopyMetadata(storeInst, callInst); - llvm::ReplaceInstWithInst(callInst, storeInst); + llvm::StoreInst *storeInst = + new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL); + int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8; + storeInst->setAlignment(align); + lCopyMetadata(storeInst, callInst); + llvm::ReplaceInstWithInst(callInst, storeInst); - modifiedAny = true; - goto restart; + modifiedAny = true; + goto restart; + } } } } @@ -949,13 +980,13 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { llvm::SelectInst *selectInst = llvm::dyn_cast(&*iter); if (selectInst != NULL && selectInst->getType()->isVectorTy()) { llvm::Value *factor = selectInst->getOperand(0); - int mask = lGetMask(factor); - int allOnMask = (1 << g->target.vectorWidth) - 1; + + MaskStatus maskStatus = lGetMaskStatus(factor); llvm::Value *value = NULL; - if (mask == allOnMask) + if (maskStatus == ALL_ON) // Mask all on -> replace with the first select value value = selectInst->getOperand(1); - else if (mask == 0) + else if (maskStatus == ALL_OFF) // Mask all off -> replace with the second select value value = selectInst->getOperand(2); @@ -976,8 +1007,8 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { if (calledFunc == NULL || calledFunc != m->module->getFunction("__movmsk")) continue; - int mask = lGetMask(callInst->getArgOperand(0)); - if (mask != -1) { + uint32_t mask; + if (lGetMask(callInst->getArgOperand(0), &mask) == true) { #if 0 fprintf(stderr, "mask %d\n", mask); callInst->getArgOperand(0)->dump(); @@ -1964,10 +1995,8 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { llvm::Value *rvalue = callInst->getArgOperand(1); llvm::Value *mask = callInst->getArgOperand(2); - int allOnMask = (1 << g->target.vectorWidth) - 1; - - int maskAsInt = lGetMask(mask); - if (maskAsInt == 0) { + MaskStatus maskStatus = lGetMaskStatus(mask); + if (maskStatus == ALL_OFF) { // Zero mask - no-op, so remove the store completely. (This // may in turn lead to being able to optimize out instructions // that compute the rvalue...) @@ -1975,11 +2004,10 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { modifiedAny = true; goto restart; } - else if (maskAsInt == allOnMask) { + else if (maskStatus == ALL_ON) { // The mask is all on, so turn this into a regular store llvm::Type *rvalueType = rvalue->getType(); - llvm::Type *ptrType = - llvm::PointerType::get(rvalueType, 0); + llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0); lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst); lCopyMetadata(lvalue, callInst); @@ -2072,20 +2100,18 @@ MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { // Got one; grab the operands llvm::Value *ptr = callInst->getArgOperand(0); llvm::Value *mask = callInst->getArgOperand(1); - int allOnMask = (1 << g->target.vectorWidth) - 1; - int maskAsInt = lGetMask(mask); - if (maskAsInt == 0) { + MaskStatus maskStatus = lGetMaskStatus(mask); + if (maskStatus == ALL_OFF) { // Zero mask - no-op, so replace the load with an undef value llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, llvm::UndefValue::get(callInst->getType())); modifiedAny = true; goto restart; } - else if (maskAsInt == allOnMask) { + else if (maskStatus == ALL_ON) { // The mask is all on, so turn this into a regular load - llvm::Type *ptrType = - llvm::PointerType::get(callInst->getType(), 0); + llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0); ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load", callInst); llvm::Instruction *load = @@ -2558,18 +2584,6 @@ public: char GatherCoalescePass::ID = 0; -/* Returns true if the mask is known at compile time to be "all on". */ -static bool -lIsMaskAllOn(llvm::Value *mask) { - int m = lGetMask(mask); - if (m == -1) - return false; - - int allOnMask = (1 << g->target.vectorWidth) - 1; - return (m == allOnMask); -} - - /** Representation of a memory load that the gather coalescing code has decided to generate. */ @@ -3497,7 +3511,7 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) { // Then and only then do we have a common base pointer with all // offsets from that constants (in which case we can potentially // coalesce). - if (lIsMaskAllOn(mask) == false) + if (lGetMaskStatus(mask) != ALL_ON) continue; if (!LLVMVectorValuesAllEqual(variableOffsets)) diff --git a/run_tests.py b/run_tests.py index ce5e98f1..79465267 100755 --- a/run_tests.py +++ b/run_tests.py @@ -33,7 +33,7 @@ parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order parser.add_option("-g", "--generics-include", dest="include_file", help="Filename for header implementing functions for generics", default=None) parser.add_option('-t', '--target', dest='target', - help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16)', + help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16, generic-32)', default="sse4") parser.add_option('-a', '--arch', dest='arch', help='Set architecture (x86, x86-64)', @@ -69,6 +69,9 @@ if is_generic_target and options.include_file == None: elif options.target == "generic-16": sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-16.h\n") options.include_file = "examples/intrinsics/generic-16.h" + elif options.target == "generic-32": + sys.stderr.write("No generics #include specified and no default available for \"generic-32\" target.\n") + sys.exit(1) if options.compiler_exe == None: if is_windows: From 1ba8d7ef7432d05d9737501aef7ea543d7d48952 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 3 May 2012 11:11:21 -0700 Subject: [PATCH 115/173] Fix test that had undefined behavior. --- tests/array-pointer-duality-1.ispc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/array-pointer-duality-1.ispc b/tests/array-pointer-duality-1.ispc index 1550c294..4fa51cba 100644 --- a/tests/array-pointer-duality-1.ispc +++ b/tests/array-pointer-duality-1.ispc @@ -4,7 +4,7 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { uniform float a[programCount+4]; - for (unsigned int i = 0; i < programCount+4; ++i) + for (uniform int i = 0; i < programCount+4; ++i) a[i] = aFOO[min((int)i, programCount)]; RET[programIndex] = *(a + 2); From 7d7e99a92cceda0377e58f14ae612c00ac2372d8 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 3 May 2012 12:04:24 -0700 Subject: [PATCH 116/173] Update ISPC_MINOR_VERSION to 2 (This should have been done with the 1.2.0 release!) --- module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module.cpp b/module.cpp index 41eeb412..e80ac9f7 100644 --- a/module.cpp +++ b/module.cpp @@ -1334,7 +1334,7 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre opts.addMacroDef("ISPC_POINTER_SIZE=64"); opts.addMacroDef("ISPC_MAJOR_VERSION=1"); - opts.addMacroDef("ISPC_MINOR_VERSION=1"); + opts.addMacroDef("ISPC_MINOR_VERSION=2"); if (g->includeStdlib) { if (g->opt.disableAsserts) From 0c1b206185e13e39be330622e352edd7208fcbad Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 3 May 2012 13:46:56 -0700 Subject: [PATCH 117/173] Pass log/exp/pow transcendentals through to targets that support them. Currently, this is the generic targets. --- builtins.cpp | 8 ++- builtins/util.m4 | 7 ++ examples/intrinsics/generic-16.h | 115 +++++++++++++++++++++++++++++++ ispc.cpp | 9 +++ ispc.h | 8 +++ module.cpp | 5 ++ stdlib.ispc | 28 ++++++-- 7 files changed, 172 insertions(+), 8 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index b94fa04f..d9432ae9 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -886,10 +886,12 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod symbolTable); lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module, symbolTable); - lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module, - symbolTable); + lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, + module, symbolTable); - lDefineConstantInt("__have_native_half", (g->target.isa == Target::AVX2), + lDefineConstantInt("__have_native_half", g->target.hasHalf, module, + symbolTable); + lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals, module, symbolTable); if (includeStdlibISPC) { diff --git a/builtins/util.m4 b/builtins/util.m4 index 501f2e47..042b2ef5 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1654,6 +1654,13 @@ declare void @__pseudo_scatter_base_offsets64_32(i8 * nocapture, , declare void @__pseudo_scatter_base_offsets64_64(i8 * nocapture, , i32, , , ) nounwind +declare float @__log_uniform_float(float) nounwind readnone +declare @__log_varying_float() nounwind readnone +declare float @__exp_uniform_float(float) nounwind readnone +declare @__exp_varying_float() nounwind readnone +declare float @__pow_uniform_float(float, float) nounwind readnone +declare @__pow_varying_float(, ) nounwind readnone + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; vector ops diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index d6a5c121..384a9ece 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -586,6 +586,121 @@ ROTATE(__vec16_f, float, float) SHUFFLES(__vec16_f, float, float) LOAD_STORE(__vec16_f, float) +static FORCEINLINE float __exp_uniform_float(float v) { + return expf(v); +} + +static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = expf(v.v[i]); + return ret; +} + +static FORCEINLINE float __log_uniform_float(float v) { + return logf(v); +} + +static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = logf(v.v[i]); + return ret; +} + +static FORCEINLINE float __pow_uniform_float(float a, float b) { + return powf(a, b); +} + +static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = powf(a.v[i], b.v[i]); + return ret; +} + +static FORCEINLINE int __intbits(float v) { + union { + float f; + int i; + } u; + u.f = v; + return u.i; +} + +static FORCEINLINE float __floatbits(int v) { + union { + float f; + int i; + } u; + u.i = v; + return u.f; +} + +static FORCEINLINE float __half_to_float_uniform(int16_t h) { + static const uint32_t shifted_exp = 0x7c00 << 13; // exponent mask after shift + + int32_t o = ((int32_t)(h & 0x7fff)) << 13; // exponent/mantissa bits + uint32_t exp = shifted_exp & o; // just the exponent + o += (127 - 15) << 23; // exponent adjust + + // handle exponent special cases + if (exp == shifted_exp) // Inf/NaN? + o += (128 - 16) << 23; // extra exp adjust + else if (exp == 0) { // Zero/Denormal? + o += 1 << 23; // extra exp adjust + o = __intbits(__floatbits(o) - __floatbits(113 << 23)); // renormalize + } + + o |= ((int32_t)(h & 0x8000)) << 16; // sign bit + return __floatbits(o); +} + + +static FORCEINLINE __vec16_f __half_to_float_varying(__vec16_i16 v) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = __half_to_float_uniform(v.v[i]); + return ret; +} + + +static FORCEINLINE int16_t __float_to_half_uniform(float f) { + uint32_t sign_mask = 0x80000000u; + int32_t o; + + int32_t fint = __intbits(f); + int32_t sign = fint & sign_mask; + fint ^= sign; + + int32_t f32infty = 255 << 23; + o = (fint > f32infty) ? 0x7e00 : 0x7c00; + + // (De)normalized number or zero + // update fint unconditionally to save the blending; we don't need it + // anymore for the Inf/NaN case anyway. + const uint32_t round_mask = ~0xfffu; + const int32_t magic = 15 << 23; + const int32_t f16infty = 31 << 23; + + int32_t fint2 = __intbits(__floatbits(fint & round_mask) * __floatbits(magic)) - round_mask; + fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed + + if (fint < f32infty) + o = fint2 >> 13; // Take the bits! + + return (o | (sign >> 16)); +} + + +static FORCEINLINE __vec16_i16 __float_to_half_varying(__vec16_f v) { + __vec16_i16 ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = __float_to_half_uniform(v.v[i]); + return ret; +} + + /////////////////////////////////////////////////////////////////////////// // double diff --git a/ispc.cpp b/ispc.cpp index 3a2134d1..9d1220d5 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -197,6 +197,9 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->arch = arch; } + // This is the case for most of them + t->hasHalf = t->hasTranscendentals = false; + if (!strcasecmp(isa, "sse2")) { t->isa = Target::SSE2; t->nativeVectorWidth = 4; @@ -256,6 +259,8 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->maskingIsFree = true; t->allOffMaskIsSafe = true; t->maskBitCount = 1; + t->hasHalf = true; + t->hasTranscendentals = true; } else if (!strcasecmp(isa, "generic-32")) { t->isa = Target::GENERIC; @@ -264,6 +269,8 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->maskingIsFree = true; t->allOffMaskIsSafe = true; t->maskBitCount = 1; + t->hasHalf = true; + t->hasTranscendentals = true; } else if (!strcasecmp(isa, "generic-1")) { t->isa = Target::GENERIC; @@ -300,6 +307,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->maskingIsFree = false; t->allOffMaskIsSafe = false; t->maskBitCount = 32; + t->hasHalf = true; } else if (!strcasecmp(isa, "avx2-x2")) { t->isa = Target::AVX2; @@ -309,6 +317,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->maskingIsFree = false; t->allOffMaskIsSafe = false; t->maskBitCount = 32; + t->hasHalf = true; } #endif // !LLVM_3_0 else { diff --git a/ispc.h b/ispc.h index bb551a6d..e2d9294d 100644 --- a/ispc.h +++ b/ispc.h @@ -249,6 +249,14 @@ struct Target { is 32 on SSE/AVX, since that matches the HW better, but it's 1 for the generic target. */ int maskBitCount; + + /** Indicates whether the target has native support for float/half + conversions. */ + bool hasHalf; + + /** Indicates whether the target has support for transcendentals (beyond + sqrt, which we assume that all of them handle). */ + bool hasTranscendentals; }; diff --git a/module.cpp b/module.cpp index e80ac9f7..8bbb4acc 100644 --- a/module.cpp +++ b/module.cpp @@ -1333,6 +1333,11 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre else opts.addMacroDef("ISPC_POINTER_SIZE=64"); + if (g->target.hasHalf) + opts.addMacroDef("ISPC_TARGET_HAS_HALF"); + if (g->target.hasTranscendentals) + opts.addMacroDef("ISPC_TARGET_HAS_TRANSCENDENTALS"); + opts.addMacroDef("ISPC_MAJOR_VERSION=1"); opts.addMacroDef("ISPC_MINOR_VERSION=2"); diff --git a/stdlib.ispc b/stdlib.ispc index 25871616..9b2fe17d 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -2915,7 +2915,10 @@ static inline uniform float atan2(uniform float y, uniform float x) { __declspec(safe) static inline float exp(float x_full) { - if (__math_lib == __math_lib_svml) { + if (__have_native_transcendentals) { + return __exp_varying_float(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_exp(x_full); } else if (__math_lib == __math_lib_system) { @@ -2994,7 +2997,10 @@ static inline float exp(float x_full) { __declspec(safe) static inline uniform float exp(uniform float x_full) { - if (__math_lib == __math_lib_system || + if (__have_native_transcendentals) { + return __exp_uniform_float(x_full); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_expf(x_full); } @@ -3116,7 +3122,10 @@ static inline void __range_reduce_log(uniform float input, uniform float * unifo __declspec(safe) static inline float log(float x_full) { - if (__math_lib == __math_lib_svml) { + if (__have_native_transcendentals) { + return __log_varying_float(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_log(x_full); } else if (__math_lib == __math_lib_system) { @@ -3204,7 +3213,10 @@ static inline float log(float x_full) { __declspec(safe) static inline uniform float log(uniform float x_full) { - if (__math_lib == __math_lib_system || + if (__have_native_transcendentals) { + return __log_uniform_float(x_full); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_logf(x_full); } @@ -3285,7 +3297,10 @@ static inline uniform float log(uniform float x_full) { __declspec(safe) static inline float pow(float a, float b) { - if (__math_lib == __math_lib_svml) { + if (__have_native_transcendentals) { + return __pow_varying_float(a, b); + } + else if (__math_lib == __math_lib_svml) { return __svml_pow(a, b); } else if (__math_lib == __math_lib_system) { @@ -3304,6 +3319,9 @@ static inline float pow(float a, float b) { __declspec(safe) static inline uniform float pow(uniform float a, uniform float b) { + if (__have_native_transcendentals) { + return __pow_uniform_float(a, b); + } if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_powf(a, b); From b7bef87a4dd911d2b57c2ef975038cd844d3940e Mon Sep 17 00:00:00 2001 From: Nipunn Koorapati Date: Thu, 3 May 2012 14:23:33 -0700 Subject: [PATCH 118/173] Added README for vim syntax highlighting. --- contrib/ispc.vim.README | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 contrib/ispc.vim.README diff --git a/contrib/ispc.vim.README b/contrib/ispc.vim.README new file mode 100644 index 00000000..fd33df09 --- /dev/null +++ b/contrib/ispc.vim.README @@ -0,0 +1,8 @@ +To install vim syntax highlighting for ispc files: + +1) Copy ispc.vim into ~/.vim/syntax/ispc.vim (create if necessary) +2) Create a filetype for ispc files to correspond to that syntax file + To do this, create and append the following line to ~/.vim/ftdetect/ispc.vim + +au BufRead,BufNewFile *.ispc set filetype=ispc + From 58bb2826b28c35b6ab114d61db9fbe45dfbdbd65 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 10:36:44 -0700 Subject: [PATCH 119/173] Perf: cache connection between const/non-const struct variants. In one very large program, we were spending quite a bit of time repeatedly getting const variants of StructTypes. This speeds up the front-end by about 40% for that test case. (This is something of a band-aid, pending uniquing types.) --- type.cpp | 34 ++++++++++++++++++++++++---------- type.h | 2 ++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/type.cpp b/type.cpp index 64e832bb..95877122 100644 --- a/type.cpp +++ b/type.cpp @@ -1754,8 +1754,10 @@ StructType::StructType(const std::string &n, const std::vector &el const std::vector &en, const std::vector &ep, bool ic, Variability v, SourcePos p) - : name(n), elementTypes(elts), elementNames(en), elementPositions(ep), - variability(v), isConst(ic), pos(p) { + : CollectionType(STRUCT_TYPE), name(n), elementTypes(elts), elementNames(en), + elementPositions(ep), variability(v), isConst(ic), pos(p) { + oppositeConstStructType = NULL; + if (variability != Variability::Unbound) { // For structs with non-unbound variability, we'll create the // correspoing LLVM struct type now, if one hasn't been made @@ -1908,21 +1910,33 @@ StructType::ResolveUnboundVariability(Variability v) const { const StructType * StructType::GetAsConstType() const { - if (IsConstType()) + if (isConst == true) return this; - else - return new StructType(name, elementTypes, elementNames, elementPositions, - true, variability, pos); + else if (oppositeConstStructType != NULL) + return oppositeConstStructType; + else { + oppositeConstStructType = + new StructType(name, elementTypes, elementNames, elementPositions, + true, variability, pos); + oppositeConstStructType->oppositeConstStructType = this; + return oppositeConstStructType; + } } const StructType * StructType::GetAsNonConstType() const { - if (!IsConstType()) + if (isConst == false) return this; - else - return new StructType(name, elementTypes, elementNames, elementPositions, - false, variability, pos); + else if (oppositeConstStructType != NULL) + return oppositeConstStructType; + else { + oppositeConstStructType = + new StructType(name, elementTypes, elementNames, elementPositions, + false, variability, pos); + oppositeConstStructType->oppositeConstStructType = this; + return oppositeConstStructType; + } } diff --git a/type.h b/type.h index e0560ce5..82e5a611 100644 --- a/type.h +++ b/type.h @@ -732,6 +732,8 @@ private: const Variability variability; const bool isConst; const SourcePos pos; + + mutable const StructType *oppositeConstStructType; }; From c756c855ea7f2dbf429dc446689b8d62717d85e5 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 11:01:10 -0700 Subject: [PATCH 120/173] Compile with -O2 by default on Linux/OSX. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 01746fa4..0011c2ac 100644 --- a/Makefile +++ b/Makefile @@ -62,7 +62,7 @@ BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1) CXX=g++ CPP=cpp -OPT=-g3 +OPT=-O2 CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE) \ -Wall $(LLVM_VERSION_DEF) \ -DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\"" From 944c53bff11d305d4d92b1a46a18b5fb430ff7e4 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 11:12:33 -0700 Subject: [PATCH 121/173] Stop using dynamic_cast for Types. We now have a set of template functions CastType, etc., that in turn use a new typeId field in each Type instance, allowing them to be inlined and to be quite efficient. This improves front-end performance for a particular large program by 28%. --- ast.cpp | 12 +- ctx.cpp | 72 ++++---- decl.cpp | 26 ++- expr.cpp | 482 ++++++++++++++++++++++++++--------------------------- func.cpp | 12 +- module.cpp | 44 ++--- parse.yy | 11 +- stmt.cpp | 14 +- sym.cpp | 6 +- type.cpp | 137 ++++++++------- type.h | 148 +++++++++++++++- 11 files changed, 539 insertions(+), 425 deletions(-) diff --git a/ast.cpp b/ast.cpp index 752585f1..1bf00a0e 100644 --- a/ast.cpp +++ b/ast.cpp @@ -356,10 +356,10 @@ lCheckAllOffSafety(ASTNode *node, void *data) { return false; const Type *type = fce->func->GetType(); - const PointerType *pt = dynamic_cast(type); + const PointerType *pt = CastType(type); if (pt != NULL) type = pt->GetBaseType(); - const FunctionType *ftype = dynamic_cast(type); + const FunctionType *ftype = CastType(type); Assert(ftype != NULL); if (ftype->isSafe == false) { @@ -405,7 +405,7 @@ lCheckAllOffSafety(ASTNode *node, void *data) { const Type *type = ie->baseExpr->GetType(); if (type == NULL) return true; - if (dynamic_cast(type) != NULL) + if (CastType(type) != NULL) type = type->GetReferenceTarget(); ConstExpr *ce = dynamic_cast(ie->index); @@ -415,16 +415,14 @@ lCheckAllOffSafety(ASTNode *node, void *data) { return false; } - const PointerType *pointerType = - dynamic_cast(type); + const PointerType *pointerType = CastType(type); if (pointerType != NULL) { // pointer[index] -> can't be sure -> not safe *okPtr = false; return false; } - const SequentialType *seqType = - dynamic_cast(type); + const SequentialType *seqType = CastType(type); Assert(seqType != NULL); int nElements = seqType->GetElementCount(); if (nElements == 0) { diff --git a/ctx.cpp b/ctx.cpp index 5d9ed434..cb99ac91 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1194,7 +1194,7 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { llvm::Value *retVal = expr->GetValue(this); if (retVal != NULL) { if (returnType->IsUniformType() || - dynamic_cast(returnType) != NULL) + CastType(returnType) != NULL) StoreInst(retVal, returnValuePtr); else { // Use a masked store to store the value of the expression @@ -2063,10 +2063,10 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, // Regularize to a standard pointer type for basePtr's type const PointerType *ptrType; - if (dynamic_cast(ptrRefType) != NULL) + if (CastType(ptrRefType) != NULL) ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); else { - ptrType = dynamic_cast(ptrRefType); + ptrType = CastType(ptrRefType); Assert(ptrType != NULL); } @@ -2133,10 +2133,10 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0 // Regaularize the pointer type for basePtr const PointerType *ptrType = NULL; - if (dynamic_cast(ptrRefType) != NULL) + if (CastType(ptrRefType) != NULL) ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); else { - ptrType = dynamic_cast(ptrRefType); + ptrType = CastType(ptrRefType); Assert(ptrType != NULL); } @@ -2184,7 +2184,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0 // Now index into the second dimension with index1. First figure // out the type of ptr0. const Type *baseType = ptrType->GetBaseType(); - const SequentialType *st = dynamic_cast(baseType); + const SequentialType *st = CastType(baseType); Assert(st != NULL); bool ptr0IsUniform = @@ -2211,10 +2211,10 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, const PointerType *ptrType = NULL; if (ptrRefType != NULL) { // Normalize references to uniform pointers - if (dynamic_cast(ptrRefType) != NULL) + if (CastType(ptrRefType) != NULL) ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); else - ptrType = dynamic_cast(ptrRefType); + ptrType = CastType(ptrRefType); Assert(ptrType != NULL); } @@ -2240,8 +2240,8 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, // want it. if (resultPtrType != NULL) { Assert(ptrType != NULL); - const CollectionType *ct = - dynamic_cast(ptrType->GetBaseType()); + const CollectionType *ct = + CastType(ptrType->GetBaseType()); Assert(ct != NULL); *resultPtrType = new PointerType(ct->GetElementType(elementNum), ptrType->GetVariability(), @@ -2261,8 +2261,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, else { // Otherwise do the math to find the offset and add it to the given // varying pointers - const StructType *st = - dynamic_cast(ptrType->GetBaseType()); + const StructType *st = CastType(ptrType->GetBaseType()); llvm::Value *offset = NULL; if (st != NULL) // If the pointer is to a structure, Target::StructOffset() gives @@ -2273,8 +2272,8 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, // Otherwise we should have a vector or array here and the offset // is given by the element number times the size of the element // type of the vector. - const SequentialType *st = - dynamic_cast(ptrType->GetBaseType()); + const SequentialType *st = + CastType(ptrType->GetBaseType()); Assert(st != NULL); llvm::Value *size = g->target.SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock); @@ -2340,7 +2339,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) { static llvm::Value * lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType **ptrType) { - Assert(dynamic_cast(*ptrType) != NULL); + Assert(CastType(*ptrType) != NULL); llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, LLVMGetName(ptr, "_ptr")); llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, LLVMGetName(ptr, "_offset")); @@ -2377,8 +2376,7 @@ FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const char *name) { const Type *unifType = ptrType->GetBaseType()->GetAsUniformType(); - const CollectionType *ct = - dynamic_cast(ptrType->GetBaseType()); + const CollectionType *ct = CastType(ptrType->GetBaseType()); if (ct != NULL) { // If we have a struct/array, we need to decompose it into // individual element loads to fill in the result structure since @@ -2420,10 +2418,10 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, name = LLVMGetName(ptr, "_load"); const PointerType *ptrType; - if (dynamic_cast(ptrRefType) != NULL) + if (CastType(ptrRefType) != NULL) ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); else { - ptrType = dynamic_cast(ptrRefType); + ptrType = CastType(ptrRefType); Assert(ptrType != NULL); } @@ -2440,8 +2438,8 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, // atomic types, we need to make sure that the compiler emits // unaligned vector loads, so we specify a reduced alignment here. int align = 0; - const AtomicType *atomicType = - dynamic_cast(ptrType->GetBaseType()); + const AtomicType *atomicType = + CastType(ptrType->GetBaseType()); if (atomicType != NULL && atomicType->IsVaryingType()) // We actually just want to align to the vector element // alignment, but can't easily get that here, so just tell LLVM @@ -2473,7 +2471,7 @@ FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx); const CollectionType *collectionType = - dynamic_cast(ptrType->GetBaseType()); + CastType(ptrType->GetBaseType()); if (collectionType != NULL) { // For collections, recursively gather element wise to find the // result. @@ -2508,7 +2506,7 @@ FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, // Figure out which gather function to call based on the size of // the elements. - const PointerType *pt = dynamic_cast(returnType); + const PointerType *pt = CastType(returnType); const char *funcName = NULL; if (pt != NULL) funcName = g->target.is32Bit ? "__pseudo_gather32_32" : @@ -2631,12 +2629,11 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, return; } - Assert(dynamic_cast(ptrType) != NULL); + Assert(CastType(ptrType) != NULL); Assert(ptrType->IsUniformType()); const Type *valueType = ptrType->GetBaseType(); - const CollectionType *collectionType = - dynamic_cast(valueType); + const CollectionType *collectionType = CastType(valueType); if (collectionType != NULL) { // Assigning a structure / array / vector. Handle each element // individually with what turns into a recursive call to @@ -2660,7 +2657,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, // Figure out if we need a 8, 16, 32 or 64-bit masked store. llvm::Function *maskedStoreFunc = NULL; - const PointerType *pt = dynamic_cast(valueType); + const PointerType *pt = CastType(valueType); if (pt != NULL) { if (pt->IsSlice()) { // Masked store of (varying) slice pointer. @@ -2714,7 +2711,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, Type::Equal(valueType, AtomicType::VaryingBool) || Type::Equal(valueType, AtomicType::VaryingInt32) || Type::Equal(valueType, AtomicType::VaryingUInt32) || - dynamic_cast(valueType) != NULL) { + CastType(valueType) != NULL) { maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_32"); ptr = BitCastInst(ptr, LLVMTypes::Int32VectorPointerType, LLVMGetName(ptr, "_to_int32vecptr")); @@ -2755,12 +2752,12 @@ void FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, const Type *valueType, const Type *origPt, llvm::Value *mask) { - const PointerType *ptrType = dynamic_cast(origPt); + const PointerType *ptrType = CastType(origPt); Assert(ptrType != NULL); Assert(ptrType->IsVaryingType()); const CollectionType *srcCollectionType = - dynamic_cast(valueType); + CastType(valueType); if (srcCollectionType != NULL) { // We're scattering a collection type--we need to keep track of the // source type (the type of the data values to be stored) and the @@ -2771,7 +2768,7 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, // same struct type, versus scattering into an array of varying // instances of the struct type, etc. const CollectionType *dstCollectionType = - dynamic_cast(ptrType->GetBaseType()); + CastType(ptrType->GetBaseType()); Assert(dstCollectionType != NULL); // Scatter the collection elements individually @@ -2816,11 +2813,10 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, ptr = lFinalSliceOffset(this, ptr, &ptrType); } - const PointerType *pt = dynamic_cast(valueType); + const PointerType *pt = CastType(valueType); // And everything should be a pointer or atomic from here on out... - Assert(pt != NULL || - dynamic_cast(valueType) != NULL); + Assert(pt != NULL || CastType(valueType) != NULL); llvm::Type *type = value->getType(); const char *funcName = NULL; @@ -2896,10 +2892,10 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, } const PointerType *ptrType; - if (dynamic_cast(ptrRefType) != NULL) + if (CastType(ptrRefType) != NULL) ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); else { - ptrType = dynamic_cast(ptrRefType); + ptrType = CastType(ptrRefType); Assert(ptrType != NULL); } @@ -2936,7 +2932,7 @@ FunctionEmitContext::storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, Assert(Type::EqualIgnoringConst(ptrType->GetBaseType()->GetAsUniformType(), valueType)); - const CollectionType *ct = dynamic_cast(valueType); + const CollectionType *ct = CastType(valueType); if (ct != NULL) { // Handle collections element wise... for (int i = 0; i < ct->GetElementCount(); ++i) { @@ -3418,7 +3414,7 @@ llvm::Value * FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType) { // This should only be called for varying pointers - const PointerType *pt = dynamic_cast(ptrType); + const PointerType *pt = CastType(ptrType); Assert(pt && pt->IsVaryingType()); const Type *baseType = ptrType->GetBaseType(); diff --git a/decl.cpp b/decl.cpp index 581cca3c..00caa856 100644 --- a/decl.cpp +++ b/decl.cpp @@ -136,7 +136,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const { } if (vectorSize > 0) { - const AtomicType *atomicType = dynamic_cast(retType); + const AtomicType *atomicType = CastType(retType); if (atomicType == NULL) { Error(pos, "Only atomic types (int, float, ...) are legal for vector " "types."); @@ -148,7 +148,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const { retType = lApplyTypeQualifiers(typeQualifiers, retType, pos); if (soaWidth > 0) { - const StructType *st = dynamic_cast(retType); + const StructType *st = CastType(retType); if (st == NULL) { Error(pos, "Illegal to provide soa<%d> qualifier with non-struct " @@ -238,7 +238,7 @@ Declarator::InitFromDeclSpecs(DeclSpecs *ds) { storageClass = ds->storageClass; if (ds->declSpecList.size() > 0 && - dynamic_cast(type) == NULL) { + CastType(type) == NULL) { Error(pos, "__declspec specifiers for non-function type \"%s\" are " "not used.", type->GetString().c_str()); } @@ -351,7 +351,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { return; } // The parser should disallow this already, but double check. - if (dynamic_cast(baseType) != NULL) { + if (CastType(baseType) != NULL) { Error(pos, "References to references are illegal."); return; } @@ -370,7 +370,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { Error(pos, "Arrays of \"void\" type are illegal."); return; } - if (dynamic_cast(baseType)) { + if (CastType(baseType)) { Error(pos, "Arrays of references (type \"%s\") are illegal.", baseType->GetString().c_str()); return; @@ -434,7 +434,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { decl->type = NULL; } - const ArrayType *at = dynamic_cast(decl->type); + const ArrayType *at = CastType(decl->type); if (at != NULL) { // As in C, arrays are passed to functions as pointers to // their element type. We'll just immediately make this @@ -454,13 +454,13 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { // Make sure there are no unsized arrays (other than the // first dimension) in function parameter lists. - at = dynamic_cast(targetType); + at = CastType(targetType); while (at != NULL) { if (at->GetElementCount() == 0) Error(decl->pos, "Arrays with unsized dimensions in " "dimensions after the first one are illegal in " "function parameter lists."); - at = dynamic_cast(at->GetElementType()); + at = CastType(at->GetElementType()); } } @@ -497,7 +497,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { return; } - if (dynamic_cast(returnType) != NULL) { + if (CastType(returnType) != NULL) { Error(pos, "Illegal to return function type from function."); return; } @@ -596,7 +596,7 @@ Declaration::GetVariableDeclarations() const { if (Type::Equal(decl->type, AtomicType::Void)) Error(decl->pos, "\"void\" type variable illegal in declaration."); - else if (dynamic_cast(decl->type) == NULL) { + else if (CastType(decl->type) == NULL) { decl->type = decl->type->ResolveUnboundVariability(Variability::Varying); Symbol *sym = new Symbol(decl->name, decl->pos, decl->type, decl->storageClass); @@ -621,8 +621,7 @@ Declaration::DeclareFunctions() { continue; } - const FunctionType *ftype = - dynamic_cast(decl->type); + const FunctionType *ftype = CastType(decl->type); if (ftype == NULL) continue; @@ -690,8 +689,7 @@ GetStructTypesNamesPositions(const std::vector &sd, } for (int i = 0; i < (int)elementTypes->size() - 1; ++i) { - const ArrayType *arrayType = - dynamic_cast((*elementTypes)[i]); + const ArrayType *arrayType = CastType((*elementTypes)[i]); if (arrayType != NULL && arrayType->GetElementCount() == 0) Error((*elementPositions)[i], "Unsized arrays aren't allowed except " diff --git a/expr.cpp b/expr.cpp index 0b992ddc..d7c7d04c 100644 --- a/expr.cpp +++ b/expr.cpp @@ -148,7 +148,7 @@ lMaybeIssuePrecisionWarning(const AtomicType *toAtomicType, static Expr * lArrayToPointer(Expr *expr) { - Assert(expr && dynamic_cast(expr->GetType())); + Assert(expr && CastType(expr->GetType())); Expr *zero = new ConstExpr(AtomicType::UniformInt32, 0, expr->pos); Expr *index = new IndexExpr(expr, zero, expr->pos); @@ -212,8 +212,8 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, return false; } - if (dynamic_cast(fromType)) { - if (dynamic_cast(toType) != NULL) { + if (CastType(fromType)) { + if (CastType(toType) != NULL) { // Convert function type to pointer to function type if (expr != NULL) { Expr *aoe = new AddressOfExpr(*expr, (*expr)->pos); @@ -235,7 +235,7 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, return false; } } - if (dynamic_cast(toType)) { + if (CastType(toType)) { if (!failureOk) Error(pos, "Can't convert from type \"%s\" to function type \"%s\" " "for %s.", fromType->GetString().c_str(), @@ -253,18 +253,18 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, return false; } - const ArrayType *toArrayType = dynamic_cast(toType); - const ArrayType *fromArrayType = dynamic_cast(fromType); - const VectorType *toVectorType = dynamic_cast(toType); - const VectorType *fromVectorType = dynamic_cast(fromType); - const StructType *toStructType = dynamic_cast(toType); - const StructType *fromStructType = dynamic_cast(fromType); - const EnumType *toEnumType = dynamic_cast(toType); - const EnumType *fromEnumType = dynamic_cast(fromType); - const AtomicType *toAtomicType = dynamic_cast(toType); - const AtomicType *fromAtomicType = dynamic_cast(fromType); - const PointerType *fromPointerType = dynamic_cast(fromType); - const PointerType *toPointerType = dynamic_cast(toType); + const ArrayType *toArrayType = CastType(toType); + const ArrayType *fromArrayType = CastType(fromType); + const VectorType *toVectorType = CastType(toType); + const VectorType *fromVectorType = CastType(fromType); + const StructType *toStructType = CastType(toType); + const StructType *fromStructType = CastType(fromType); + const EnumType *toEnumType = CastType(toType); + const EnumType *fromEnumType = CastType(fromType); + const AtomicType *toAtomicType = CastType(toType); + const AtomicType *fromAtomicType = CastType(fromType); + const PointerType *fromPointerType = CastType(fromType); + const PointerType *toPointerType = CastType(toType); // Do this early, since for the case of a conversion like // "float foo[10]" -> "float * uniform foo", we have what's seemingly @@ -303,7 +303,7 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, } if (fromPointerType != NULL) { - if (dynamic_cast(toType) != NULL && + if (CastType(toType) != NULL && toType->IsBoolType()) // Allow implicit conversion of pointers to bools goto typecast_ok; @@ -395,8 +395,8 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, if (Type::Equal(toType, fromType->GetAsConstType())) goto typecast_ok; - if (dynamic_cast(fromType)) { - if (dynamic_cast(toType)) { + if (CastType(fromType)) { + if (CastType(toType)) { // Convert from a reference to a type to a const reference to a type; // this is handled by TypeCastExpr if (Type::Equal(toType->GetReferenceTarget(), @@ -404,9 +404,9 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, goto typecast_ok; const ArrayType *atFrom = - dynamic_cast(fromType->GetReferenceTarget()); + CastType(fromType->GetReferenceTarget()); const ArrayType *atTo = - dynamic_cast(toType->GetReferenceTarget()); + CastType(toType->GetReferenceTarget()); if (atFrom != NULL && atTo != NULL && Type::Equal(atFrom->GetElementType(), atTo->GetElementType())) { @@ -436,7 +436,7 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, failureOk, errorMsgBase, pos); } } - else if (dynamic_cast(toType)) { + else if (CastType(toType)) { // T -> reference T if (expr != NULL) { Expr *rExpr = new ReferenceExpr(*expr, pos); @@ -593,8 +593,8 @@ bool PossiblyResolveFunctionOverloads(Expr *expr, const Type *type) { FunctionSymbolExpr *fse = NULL; const FunctionType *funcType = NULL; - if (dynamic_cast(type) != NULL && - (funcType = dynamic_cast(type->GetBaseType())) && + if (CastType(type) != NULL && + (funcType = CastType(type->GetBaseType())) && (fse = dynamic_cast(expr)) != NULL) { // We're initializing a function pointer with a function symbol, // which in turn may represent an overloaded function. So we need @@ -693,7 +693,7 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr, return; } - const ReferenceType *rt = dynamic_cast(symType); + const ReferenceType *rt = CastType(symType); if (rt) { if (!Type::Equal(initExpr->GetType(), rt)) { Error(initExpr->pos, "Initializer for reference type \"%s\" must have same " @@ -710,18 +710,17 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr, // Handle initiailizers for SOA types as well as for structs, arrays, // and vectors. - const CollectionType *collectionType = - dynamic_cast(symType); + const CollectionType *collectionType = CastType(symType); if (collectionType != NULL || symType->IsSOAType()) { int nElements = collectionType ? collectionType->GetElementCount() : symType->GetSOAWidth(); std::string name; - if (dynamic_cast(symType) != NULL) + if (CastType(symType) != NULL) name = "struct"; - else if (dynamic_cast(symType) != NULL) + else if (CastType(symType) != NULL) name = "array"; - else if (dynamic_cast(symType) != NULL) + else if (CastType(symType) != NULL) name = "vector"; else if (symType->IsSOAType()) name = symType->GetVariability().GetString(); @@ -760,7 +759,7 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr, } llvm::Value *ep; - if (dynamic_cast(symType) != NULL) + if (CastType(symType) != NULL) ep = ctx->AddElementOffset(ptr, i, NULL, "element"); else ep = ctx->GetElementPtrInst(ptr, LLVMInt32(0), LLVMInt32(i), @@ -805,12 +804,12 @@ lMatchingBoolType(const Type *type) { bool uniformTest = type->IsUniformType(); const AtomicType *boolBase = uniformTest ? AtomicType::UniformBool : AtomicType::VaryingBool; - const VectorType *vt = dynamic_cast(type); + const VectorType *vt = CastType(type); if (vt != NULL) return new VectorType(boolBase, vt->GetElementCount()); else { - Assert(dynamic_cast(type) != NULL || - dynamic_cast(type) != NULL); + Assert(CastType(type) != NULL || + CastType(type) != NULL); return boolBase; } } @@ -820,10 +819,10 @@ lMatchingBoolType(const Type *type) { static llvm::Constant * lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) { - const AtomicType *atomicType = dynamic_cast(type); - const EnumType *enumType = dynamic_cast(type); - const VectorType *vectorType = dynamic_cast(type); - const PointerType *pointerType = dynamic_cast(type); + const AtomicType *atomicType = CastType(type); + const EnumType *enumType = CastType(type); + const VectorType *vectorType = CastType(type); + const PointerType *pointerType = CastType(type); // This function is only called with, and only works for atomic, enum, // and vector types. @@ -941,8 +940,8 @@ lMaskForSymbol(Symbol *baseSym, FunctionEmitContext *ctx) { if (baseSym == NULL) return ctx->GetFullMask(); - if (dynamic_cast(baseSym->type) != NULL || - dynamic_cast(baseSym->type) != NULL) + if (CastType(baseSym->type) != NULL || + CastType(baseSym->type) != NULL) // FIXME: for pointers, we really only want to do this for // dereferencing the pointer, not for things like pointer // arithmetic, when we may be able to use the internal mask, @@ -969,8 +968,8 @@ lStoreAssignResult(llvm::Value *value, llvm::Value *ptr, const Type *valueType, baseSym != NULL && baseSym->varyingCFDepth == ctx->VaryingCFDepth() && baseSym->storageClass != SC_STATIC && - dynamic_cast(baseSym->type) == NULL && - dynamic_cast(baseSym->type) == NULL) { + CastType(baseSym->type) == NULL && + CastType(baseSym->type) == NULL) { // If the variable is declared at the same varying control flow // depth as where it's being assigned, then we don't need to do any // masking but can just do the assignment as if all the lanes were @@ -1000,7 +999,7 @@ lEmitPrePostIncDec(UnaryExpr::Op op, Expr *expr, SourcePos pos, // Get both the lvalue and the rvalue of the given expression llvm::Value *lvalue = NULL, *rvalue = NULL; const Type *lvalueType = NULL; - if (dynamic_cast(type) != NULL) { + if (CastType(type) != NULL) { lvalueType = type; type = type->GetReferenceTarget(); lvalue = expr->GetValue(ctx); @@ -1036,7 +1035,7 @@ lEmitPrePostIncDec(UnaryExpr::Op op, Expr *expr, SourcePos pos, else opName += "_minus1"; - if (dynamic_cast(type) != NULL) { + if (CastType(type) != NULL) { const Type *incType = type->IsUniformType() ? AtomicType::UniformInt32 : AtomicType::VaryingInt32; llvm::Constant *dval = lLLVMConstantValue(incType, g->ctx, delta); @@ -1161,7 +1160,7 @@ UnaryExpr::Optimize() { return this; const Type *type = constExpr->GetType(); - bool isEnumType = dynamic_cast(type) != NULL; + bool isEnumType = CastType(type) != NULL; const Type *baseType = type->GetAsNonConstType()->GetAsUniformType(); if (Type::Equal(baseType, AtomicType::UniformInt8) || @@ -1253,7 +1252,7 @@ UnaryExpr::TypeCheck() { if (type->IsNumericType()) return this; - const PointerType *pt = dynamic_cast(type); + const PointerType *pt = CastType(type); if (pt == NULL) { Error(expr->pos, "Can only pre/post increment numeric and " "pointer types, not \"%s\".", type->GetString().c_str()); @@ -1265,7 +1264,7 @@ UnaryExpr::TypeCheck() { type->GetString().c_str()); return NULL; } - if (dynamic_cast(pt->GetBaseType())) { + if (CastType(pt->GetBaseType())) { Error(expr->pos, "Illegal to pre/post increment pointer to " "undefined struct type \"%s\".", type->GetString().c_str()); return NULL; @@ -1275,7 +1274,7 @@ UnaryExpr::TypeCheck() { } // don't do this for pre/post increment/decrement - if (dynamic_cast(type)) { + if (CastType(type)) { expr = new RefDerefExpr(expr, pos); type = expr->GetType(); } @@ -1399,7 +1398,7 @@ lEmitBinaryPointerArith(BinaryExpr::Op op, llvm::Value *value0, llvm::Value *value1, const Type *type0, const Type *type1, FunctionEmitContext *ctx, SourcePos pos) { - const PointerType *ptrType = dynamic_cast(type0); + const PointerType *ptrType = CastType(type0); switch (op) { case BinaryExpr::Add: @@ -1407,7 +1406,7 @@ lEmitBinaryPointerArith(BinaryExpr::Op op, llvm::Value *value0, return ctx->GetElementPtrInst(value0, value1, ptrType, "ptrmath"); break; case BinaryExpr::Sub: { - if (dynamic_cast(type1) != NULL) { + if (CastType(type1) != NULL) { Assert(Type::Equal(type0, type1)); if (ptrType->IsSlice()) { @@ -1501,7 +1500,7 @@ static llvm::Value * lEmitBinaryArith(BinaryExpr::Op op, llvm::Value *value0, llvm::Value *value1, const Type *type0, const Type *type1, FunctionEmitContext *ctx, SourcePos pos) { - const PointerType *ptrType = dynamic_cast(type0); + const PointerType *ptrType = CastType(type0); if (ptrType != NULL) return lEmitBinaryPointerArith(op, value0, value1, type0, type1, @@ -1641,8 +1640,8 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, // FIXME: not sure what we should do about vector types here... bool shortCircuit = (EstimateCost(arg1) > PREDICATE_SAFE_IF_STATEMENT_COST || SafeToRunWithMaskAllOff(arg1) == false || - dynamic_cast(type0) != NULL || - dynamic_cast(type1) != NULL); + CastType(type0) != NULL || + CastType(type1) != NULL); if (shortCircuit == false) { // If one of the operands is uniform but the other is varying, // promote the uniform one to varying @@ -1940,17 +1939,17 @@ BinaryExpr::GetType() const { // and will fail type checking and (int + ptr) should be canonicalized // into (ptr + int) by type checking. if (op == Add) - Assert(dynamic_cast(type1) == NULL); + Assert(CastType(type1) == NULL); if (op == Comma) return arg1->GetType(); - if (dynamic_cast(type0) != NULL) { + if (CastType(type0) != NULL) { if (op == Add) // ptr + int -> ptr return type0; else if (op == Sub) { - if (dynamic_cast(type1) != NULL) { + if (CastType(type1) != NULL) { // ptr - ptr -> ~ptrdiff_t const Type *diffType = (g->target.is32Bit || g->opt.force32BitAddressing) ? @@ -2232,7 +2231,7 @@ BinaryExpr::Optimize() { } else if (Type::Equal(type, AtomicType::UniformUInt32) || Type::Equal(type, AtomicType::VaryingUInt32) || - dynamic_cast(type) != NULL) { + CastType(type) != NULL) { uint32_t v0[ISPC_MAX_NVEC], v1[ISPC_MAX_NVEC]; constArg0->AsUInt32(v0); constArg1->AsUInt32(v1); @@ -2275,23 +2274,23 @@ BinaryExpr::TypeCheck() { // If either operand is a reference, dereference it before we move // forward - if (dynamic_cast(type0) != NULL) { + if (CastType(type0) != NULL) { arg0 = new RefDerefExpr(arg0, arg0->pos); type0 = arg0->GetType(); Assert(type0 != NULL); } - if (dynamic_cast(type1) != NULL) { + if (CastType(type1) != NULL) { arg1 = new RefDerefExpr(arg1, arg1->pos); type1 = arg1->GetType(); Assert(type1 != NULL); } // Convert arrays to pointers to their first elements - if (dynamic_cast(type0) != NULL) { + if (CastType(type0) != NULL) { arg0 = lArrayToPointer(arg0); type0 = arg0->GetType(); } - if (dynamic_cast(type1) != NULL) { + if (CastType(type1) != NULL) { arg1 = lArrayToPointer(arg1); type1 = arg1->GetType(); } @@ -2308,8 +2307,8 @@ BinaryExpr::TypeCheck() { return NULL; } - const PointerType *pt0 = dynamic_cast(type0); - const PointerType *pt1 = dynamic_cast(type1); + const PointerType *pt0 = CastType(type0); + const PointerType *pt1 = CastType(type1); if (pt0 != NULL && pt1 != NULL && op == Sub) { // Pointer subtraction if (PointerType::IsVoidPointer(type0)) { @@ -2322,12 +2321,12 @@ BinaryExpr::TypeCheck() { "on \"%s\" type.", type1->GetString().c_str()); return NULL; } - if (dynamic_cast(pt0->GetBaseType())) { + if (CastType(pt0->GetBaseType())) { Error(pos, "Illegal to perform pointer arithmetic " "on undefined struct type \"%s\".", pt0->GetString().c_str()); return NULL; } - if (dynamic_cast(pt1->GetBaseType())) { + if (CastType(pt1->GetBaseType())) { Error(pos, "Illegal to perform pointer arithmetic " "on undefined struct type \"%s\".", pt1->GetString().c_str()); return NULL; @@ -2367,7 +2366,7 @@ BinaryExpr::TypeCheck() { "on \"%s\" type.", pt0->GetString().c_str()); return NULL; } - if (dynamic_cast(pt0->GetBaseType())) { + if (CastType(pt0->GetBaseType())) { Error(pos, "Illegal to perform pointer arithmetic " "on undefined struct type \"%s\".", pt0->GetString().c_str()); return NULL; @@ -2474,20 +2473,20 @@ BinaryExpr::TypeCheck() { case Ge: case Equal: case NotEqual: { - const PointerType *pt0 = dynamic_cast(type0); - const PointerType *pt1 = dynamic_cast(type1); + const PointerType *pt0 = CastType(type0); + const PointerType *pt1 = CastType(type1); // Convert '0' in expressions where the other expression is a // pointer type to a NULL pointer. if (pt0 != NULL && lIsAllIntZeros(arg1)) { arg1 = new NullPointerExpr(pos); type1 = arg1->GetType(); - pt1 = dynamic_cast(type1); + pt1 = CastType(type1); } else if (pt1 != NULL && lIsAllIntZeros(arg0)) { arg0 = new NullPointerExpr(pos); type0 = arg1->GetType(); - pt0 = dynamic_cast(type0); + pt0 = CastType(type0); } if (pt0 == NULL && pt1 == NULL) { @@ -2529,8 +2528,8 @@ BinaryExpr::TypeCheck() { AtomicType::UniformBool : AtomicType::VaryingBool; const Type *destType0 = NULL, *destType1 = NULL; - const VectorType *vtype0 = dynamic_cast(type0); - const VectorType *vtype1 = dynamic_cast(type1); + const VectorType *vtype0 = CastType(type0); + const VectorType *vtype1 = CastType(type1); if (vtype0 && vtype1) { int sz0 = vtype0->GetElementCount(), sz1 = vtype1->GetElementCount(); if (sz0 != sz1) { @@ -2746,8 +2745,8 @@ AssignExpr::GetValue(FunctionEmitContext *ctx) const { case XorAssign: case OrAssign: { // This should be caught during type checking - Assert(!dynamic_cast(type) && - !dynamic_cast(type)); + Assert(!CastType(type) && + !CastType(type)); return lEmitOpAssign(op, lvalue, rvalue, type, baseSym, pos, ctx); } default: @@ -2794,7 +2793,7 @@ lCheckForConstStructMember(SourcePos pos, const StructType *structType, return true; } - const StructType *st = dynamic_cast(t); + const StructType *st = CastType(t); if (st != NULL && lCheckForConstStructMember(pos, st, initialType)) return true; } @@ -2808,7 +2807,7 @@ AssignExpr::TypeCheck() { return NULL; bool lvalueIsReference = - dynamic_cast(lvalue->GetType()) != NULL; + CastType(lvalue->GetType()) != NULL; if (lvalueIsReference) lvalue = new RefDerefExpr(lvalue, lvalue->pos); @@ -2819,8 +2818,8 @@ AssignExpr::TypeCheck() { // function is overloaded. const Type *lvalueType = lvalue->GetType(); const FunctionType *ftype; - if (dynamic_cast(lvalueType) == NULL || - (ftype = dynamic_cast(lvalueType->GetBaseType())) == NULL) { + if (CastType(lvalueType) == NULL || + (ftype = CastType(lvalueType->GetBaseType())) == NULL) { Error(lvalue->pos, "Can't assign function pointer to type \"%s\".", lvalue->GetType()->GetString().c_str()); return NULL; @@ -2849,7 +2848,7 @@ AssignExpr::TypeCheck() { return NULL; } - if (dynamic_cast(lhsType) != NULL) { + if (CastType(lhsType) != NULL) { if (op == AddAssign || op == SubAssign) { if (PointerType::IsVoidPointer(lhsType)) { Error(pos, "Illegal to perform pointer arithmetic on \"%s\" " @@ -2871,7 +2870,7 @@ AssignExpr::TypeCheck() { return NULL; } } - else if (dynamic_cast(lhsType) != NULL) { + else if (CastType(lhsType) != NULL) { Error(lvalue->pos, "Illegal to assign to array type \"%s\".", lhsType->GetString().c_str()); return NULL; @@ -2891,7 +2890,7 @@ AssignExpr::TypeCheck() { } // Make sure we're not assigning to a struct that has a constant member - const StructType *st = dynamic_cast(lhsType); + const StructType *st = CastType(lhsType); if (st != NULL && lCheckForConstStructMember(pos, st, st)) return NULL; @@ -3027,7 +3026,7 @@ SelectExpr::GetValue(FunctionEmitContext *ctx) const { ret->addIncoming(expr2Val, falsePred); return ret; } - else if (dynamic_cast(testType) == NULL) { + else if (CastType(testType) == NULL) { // the test is a varying bool type llvm::Value *testVal = test->GetValue(ctx); Assert(testVal->getType() == LLVMTypes::MaskType); @@ -3091,11 +3090,11 @@ SelectExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Value *expr2Val = expr2->GetValue(ctx); ctx->SetDebugPos(pos); - const VectorType *vt = dynamic_cast(type); + const VectorType *vt = CastType(type); // Things that typechecking should have caught Assert(vt != NULL); - Assert(dynamic_cast(testType) != NULL && - (dynamic_cast(testType)->GetElementCount() == + Assert(CastType(testType) != NULL && + (CastType(testType)->GetElementCount() == vt->GetElementCount())); // Do an element-wise select @@ -3131,10 +3130,10 @@ SelectExpr::GetType() const { bool becomesVarying = (testType->IsVaryingType() || expr1Type->IsVaryingType() || expr2Type->IsVaryingType()); // if expr1 and expr2 have different vector sizes, typechecking should fail... - int testVecSize = dynamic_cast(testType) != NULL ? - dynamic_cast(testType)->GetElementCount() : 0; - int expr1VecSize = dynamic_cast(expr1Type) != NULL ? - dynamic_cast(expr1Type)->GetElementCount() : 0; + int testVecSize = CastType(testType) != NULL ? + CastType(testType)->GetElementCount() : 0; + int expr1VecSize = CastType(expr1Type) != NULL ? + CastType(expr1Type)->GetElementCount() : 0; Assert(!(testVecSize != 0 && expr1VecSize != 0 && testVecSize != expr1VecSize)); int vectorSize = std::max(testVecSize, expr1VecSize); @@ -3247,12 +3246,12 @@ SelectExpr::TypeCheck() { if (!type1 || !type2) return NULL; - if (dynamic_cast(type1)) { + if (CastType(type1)) { Error(pos, "Array type \"%s\" can't be used in select expression", type1->GetString().c_str()); return NULL; } - if (dynamic_cast(type2)) { + if (CastType(type2)) { Error(pos, "Array type \"%s\" can't be used in select expression", type2->GetString().c_str()); return NULL; @@ -3266,8 +3265,8 @@ SelectExpr::TypeCheck() { return NULL; testType = test->GetType(); - int testVecSize = dynamic_cast(testType) ? - dynamic_cast(testType)->GetElementCount() : 0; + int testVecSize = CastType(testType) ? + CastType(testType)->GetElementCount() : 0; const Type *promotedType = Type::MoreGeneralType(type1, type2, Union(expr1->pos, expr2->pos), "select expression", testType->IsVaryingType(), testVecSize); @@ -3326,11 +3325,11 @@ lGetFunctionType(Expr *func) { if (type == NULL) return NULL; - const FunctionType *ftype = dynamic_cast(type); + const FunctionType *ftype = CastType(type); if (ftype == NULL) { // Not a regular function symbol--is it a function pointer? - if (dynamic_cast(type) != NULL) - ftype = dynamic_cast(type->GetBaseType()); + if (CastType(type) != NULL) + ftype = CastType(type->GetBaseType()); } return ftype; } @@ -3377,9 +3376,9 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { const Type *argLValueType = argExpr->GetLValueType(); if (argLValueType != NULL && - dynamic_cast(argLValueType) != NULL && + CastType(argLValueType) != NULL && argLValueType->IsVaryingType() && - dynamic_cast(paramType) != NULL) { + CastType(paramType) != NULL) { Error(argExpr->pos, "Illegal to pass a \"varying\" lvalue to a " "reference parameter of type \"%s\".", paramType->GetString().c_str()); @@ -3496,13 +3495,11 @@ FunctionCallExpr::TypeCheck() { if (func == NULL) return NULL; - const FunctionType *ft = - dynamic_cast(func->GetType()); + const FunctionType *ft = CastType(func->GetType()); if (ft == NULL) { - const PointerType *pt = - dynamic_cast(func->GetType()); + const PointerType *pt = CastType(func->GetType()); ft = (pt == NULL) ? NULL : - dynamic_cast(pt->GetBaseType()); + CastType(pt->GetBaseType()); } if (ft == NULL) { @@ -3538,8 +3535,8 @@ FunctionCallExpr::TypeCheck() { // Make sure we do in fact have a function to call const FunctionType *funcType; - if (dynamic_cast(fptrType) == NULL || - (funcType = dynamic_cast(fptrType->GetBaseType())) == NULL) { + if (CastType(fptrType) == NULL || + (funcType = CastType(fptrType->GetBaseType())) == NULL) { Error(func->pos, "Must provide function name or function pointer for " "function call expression."); return NULL; @@ -3571,7 +3568,7 @@ FunctionCallExpr::TypeCheck() { const Type *paramType = funcType->GetParameterType(i); if (CanConvertTypes(argTypes[i], paramType) == false && !(argCouldBeNULL[i] == true && - dynamic_cast(paramType) != NULL)) { + CastType(paramType) != NULL)) { Error(args->exprs[i]->pos, "Can't convert argument of " "type \"%s\" to type \"%s\" for function call " "argument.", argTypes[i]->GetString().c_str(), @@ -3609,10 +3606,10 @@ FunctionCallExpr::EstimateCost() const { if (type == NULL) return 0; - const PointerType *pt = dynamic_cast(type); + const PointerType *pt = CastType(type); if (pt != NULL) type = type->GetBaseType(); - const FunctionType *ftype = dynamic_cast(type); + const FunctionType *ftype = CastType(type); if (ftype->costOverride > -1) return ftype->costOverride; @@ -3671,22 +3668,21 @@ ExprList::TypeCheck() { llvm::Constant * ExprList::GetConstant(const Type *type) const { if (exprs.size() == 1 && - (dynamic_cast(type) != NULL || - dynamic_cast(type) != NULL || - dynamic_cast(type) != NULL)) + (CastType(type) != NULL || + CastType(type) != NULL || + CastType(type) != NULL)) return exprs[0]->GetConstant(type); - const CollectionType *collectionType = - dynamic_cast(type); + const CollectionType *collectionType = CastType(type); if (collectionType == NULL) return NULL; std::string name; - if (dynamic_cast(type) != NULL) + if (CastType(type) != NULL) name = "struct"; - else if (dynamic_cast(type) != NULL) + else if (CastType(type) != NULL) name = "array"; - else if (dynamic_cast(type) != NULL) + else if (CastType(type) != NULL) name = "vector"; else FATAL("Unexpected CollectionType in ExprList::GetConstant()"); @@ -3745,7 +3741,7 @@ ExprList::GetConstant(const Type *type) const { cv.push_back(c); } - if (dynamic_cast(type) != NULL) { + if (CastType(type) != NULL) { llvm::StructType *llvmStructType = llvm::dyn_cast(collectionType->LLVMType(g->ctx)); Assert(llvmStructType != NULL); @@ -3760,7 +3756,7 @@ ExprList::GetConstant(const Type *type) const { else { // uniform short vector type Assert(type->IsUniformType() && - dynamic_cast(type) != NULL); + CastType(type) != NULL); llvm::VectorType *lvt = llvm::dyn_cast(lt); Assert(lvt != NULL); @@ -3829,11 +3825,11 @@ IndexExpr::IndexExpr(Expr *a, Expr *i, SourcePos p) static llvm::Value * lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr, const Type *ptrRefType) { - if (dynamic_cast(ptrRefType) != NULL) + if (CastType(ptrRefType) != NULL) // References are uniform pointers, so no offsetting is needed return ptr; - const PointerType *ptrType = dynamic_cast(ptrRefType); + const PointerType *ptrType = CastType(ptrRefType); Assert(ptrType != NULL); if (ptrType->IsUniformType() || ptrType->IsSlice()) return ptr; @@ -3869,21 +3865,21 @@ lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr, */ static bool lVaryingStructHasUniformMember(const Type *type, SourcePos pos) { - if (dynamic_cast(type) != NULL || - dynamic_cast(type) != NULL) + if (CastType(type) != NULL || + CastType(type) != NULL) return false; - const StructType *st = dynamic_cast(type); + const StructType *st = CastType(type); if (st == NULL) { - const ArrayType *at = dynamic_cast(type); + const ArrayType *at = CastType(type); if (at != NULL) - st = dynamic_cast(at->GetElementType()); + st = CastType(at->GetElementType()); else { - const PointerType *pt = dynamic_cast(type); + const PointerType *pt = CastType(type); if (pt == NULL) return false; - st = dynamic_cast(pt->GetBaseType()); + st = CastType(pt->GetBaseType()); } if (st == NULL) @@ -3900,7 +3896,7 @@ lVaryingStructHasUniformMember(const Type *type, SourcePos pos) { continue; } - if (dynamic_cast(eltType) != NULL) { + if (CastType(eltType) != NULL) { // We know that the enclosing struct is varying at this point, // so push that down to the enclosed struct before makign the // recursive call. @@ -3960,8 +3956,7 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { ctx->StoreInst(val, tmpPtr); // Get a pointer type to the underlying elements - const SequentialType *st = - dynamic_cast(baseExprType); + const SequentialType *st = CastType(baseExprType); Assert(st != NULL); lvalueType = PointerType::GetUniform(st->GetElementType()); @@ -3992,15 +3987,14 @@ IndexExpr::GetType() const { return NULL; const Type *elementType = NULL; - const PointerType *pointerType = - dynamic_cast(baseExprType); + const PointerType *pointerType = CastType(baseExprType); if (pointerType != NULL) // ptr[index] -> type that the pointer points to elementType = pointerType->GetBaseType(); else { // sequential type[index] -> element type of the sequential type const SequentialType *sequentialType = - dynamic_cast(baseExprType->GetReferenceTarget()); + CastType(baseExprType->GetReferenceTarget()); // Typechecking should have caught this... Assert(sequentialType != NULL); elementType = sequentialType->GetElementType(); @@ -4060,8 +4054,7 @@ lConvertToSlicePointer(FunctionEmitContext *ctx, llvm::Value *ptr, */ static void lCheckIndicesVersusBounds(const Type *baseExprType, Expr *index) { - const SequentialType *seqType = - dynamic_cast(baseExprType); + const SequentialType *seqType = CastType(baseExprType); if (seqType == NULL) return; @@ -4099,7 +4092,7 @@ lConvertPtrToSliceIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr, const Type **type) { Assert(*type != NULL); - const PointerType *ptrType = dynamic_cast(*type); + const PointerType *ptrType = CastType(*type); bool convertToSlice = (ptrType->GetBaseType()->IsSOAType() && ptrType->IsSlice() == false); if (convertToSlice == false) @@ -4127,7 +4120,7 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const { } ctx->SetDebugPos(pos); - if (dynamic_cast(baseExprType) != NULL) { + if (CastType(baseExprType) != NULL) { // We're indexing off of a pointer llvm::Value *basePtrValue = baseExpr->GetValue(ctx); if (basePtrValue == NULL) { @@ -4150,16 +4143,16 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const { // a reference thereuponfore.) llvm::Value *basePtr = NULL; const PointerType *basePtrType = NULL; - if (dynamic_cast(baseExprType) || - dynamic_cast(baseExprType)) { + if (CastType(baseExprType) || + CastType(baseExprType)) { basePtr = baseExpr->GetLValue(ctx); - basePtrType = dynamic_cast(baseExpr->GetLValueType()); + basePtrType = CastType(baseExpr->GetLValueType()); if (baseExpr->GetLValueType()) Assert(basePtrType != NULL); } else { baseExprType = baseExprType->GetReferenceTarget(); - Assert(dynamic_cast(baseExprType) || - dynamic_cast(baseExprType)); + Assert(CastType(baseExprType) || + CastType(baseExprType)); basePtr = baseExpr->GetValue(ctx); basePtrType = PointerType::GetUniform(baseExprType); } @@ -4193,21 +4186,21 @@ IndexExpr::GetLValueType() const { return NULL; // regularize to a PointerType - if (dynamic_cast(baseExprLValueType) != NULL) { + if (CastType(baseExprLValueType) != NULL) { const Type *refTarget = baseExprLValueType->GetReferenceTarget(); baseExprLValueType = PointerType::GetUniform(refTarget); } - Assert(dynamic_cast(baseExprLValueType) != NULL); + Assert(CastType(baseExprLValueType) != NULL); // Find the type of thing that we're indexing into const Type *elementType; const SequentialType *st = - dynamic_cast(baseExprLValueType->GetBaseType()); + CastType(baseExprLValueType->GetBaseType()); if (st != NULL) elementType = st->GetElementType(); else { const PointerType *pt = - dynamic_cast(baseExprLValueType->GetBaseType()); + CastType(baseExprLValueType->GetBaseType()); Assert(pt != NULL); elementType = pt->GetBaseType(); } @@ -4215,7 +4208,7 @@ IndexExpr::GetLValueType() const { // Are we indexing into a varying type, or are we indexing with a // varying pointer? bool baseVarying; - if (dynamic_cast(baseExprType) != NULL) + if (CastType(baseExprType) != NULL) baseVarying = baseExprType->IsVaryingType(); else baseVarying = baseExprLValueType->IsVaryingType(); @@ -4261,8 +4254,8 @@ IndexExpr::TypeCheck() { return NULL; } - if (!dynamic_cast(baseExprType->GetReferenceTarget()) && - !dynamic_cast(baseExprType)) { + if (!CastType(baseExprType->GetReferenceTarget()) && + !CastType(baseExprType)) { Error(pos, "Trying to index into non-array, vector, or pointer " "type \"%s\".", baseExprType->GetString().c_str()); return NULL; @@ -4298,7 +4291,7 @@ IndexExpr::EstimateCost() const { const Type *baseExprType = baseExpr->GetType(); if ((indexType != NULL && indexType->IsVaryingType()) || - (dynamic_cast(baseExprType) != NULL && + (CastType(baseExprType) != NULL && baseExprType->IsVaryingType())) // be pessimistic; some of these will later turn out to be vector // loads/stores, but it's too early for us to know that here. @@ -4399,8 +4392,8 @@ StructMemberExpr::GetType() const { } Assert(Type::Equal(lvalueType->GetBaseType(), elementType)); - bool isSlice = (dynamic_cast(lvalueType) && - dynamic_cast(lvalueType)->IsSlice()); + bool isSlice = (CastType(lvalueType) && + CastType(lvalueType)->IsSlice()); if (isSlice) { // FIXME: not true if we allow bound unif/varying for soa<> // structs?... @@ -4441,7 +4434,7 @@ StructMemberExpr::GetLValueType() const { // varying (and otherwise uniform) const PointerType *ptrType = (exprLValueType->IsUniformType() || - dynamic_cast(exprLValueType) != NULL) ? + CastType(exprLValueType) != NULL) ? PointerType::GetUniform(getElementType()) : PointerType::GetVarying(getElementType()); @@ -4449,8 +4442,8 @@ StructMemberExpr::GetLValueType() const { // needs to be a frozen slice pointer--i.e. any further indexing with // the result shouldn't modify the minor slice offset, but it should be // left unchanged until we get to a leaf SOA value. - if (dynamic_cast(exprLValueType) && - dynamic_cast(exprLValueType)->IsSlice()) + if (CastType(exprLValueType) && + CastType(exprLValueType)->IsSlice()) ptrType = ptrType->GetAsFrozenSlice(); return ptrType; @@ -4494,16 +4487,16 @@ StructMemberExpr::getStructType() const { return NULL; const Type *structType; - const ReferenceType *rt = dynamic_cast(type); + const ReferenceType *rt = CastType(type); if (rt != NULL) structType = rt->GetReferenceTarget(); else { - const PointerType *pt = dynamic_cast(type); + const PointerType *pt = CastType(type); Assert(pt != NULL); structType = pt->GetBaseType(); } - const StructType *ret = dynamic_cast(structType); + const StructType *ret = CastType(structType); Assert(ret != NULL); return ret; } @@ -4536,15 +4529,15 @@ VectorMemberExpr::VectorMemberExpr(Expr *e, const char *id, SourcePos p, SourcePos idpos, bool derefLValue) : MemberExpr(e, id, p, idpos, derefLValue) { const Type *exprType = e->GetType(); - exprVectorType = dynamic_cast(exprType); + exprVectorType = CastType(exprType); if (exprVectorType == NULL) { - const PointerType *pt = dynamic_cast(exprType); + const PointerType *pt = CastType(exprType); if (pt != NULL) - exprVectorType = dynamic_cast(pt->GetBaseType()); + exprVectorType = CastType(pt->GetBaseType()); else { - Assert(dynamic_cast(exprType) != NULL); + Assert(CastType(exprType) != NULL); exprVectorType = - dynamic_cast(exprType->GetReferenceTarget()); + CastType(exprType->GetReferenceTarget()); } Assert(exprVectorType != NULL); } @@ -4565,8 +4558,8 @@ VectorMemberExpr::GetType() const { const Type *lvalueType = GetLValueType(); if (lvalueType != NULL) { - bool isSlice = (dynamic_cast(lvalueType) && - dynamic_cast(lvalueType)->IsSlice()); + bool isSlice = (CastType(lvalueType) && + CastType(lvalueType)->IsSlice()); if (isSlice) { //CO Assert(type->IsSOAType()); if (lvalueType->IsUniformType()) @@ -4605,24 +4598,24 @@ VectorMemberExpr::GetLValueType() const { return NULL; const VectorType *vt = NULL; - if (dynamic_cast(exprLValueType) != NULL) - vt = dynamic_cast(exprLValueType->GetReferenceTarget()); + if (CastType(exprLValueType) != NULL) + vt = CastType(exprLValueType->GetReferenceTarget()); else - vt = dynamic_cast(exprLValueType->GetBaseType()); + vt = CastType(exprLValueType->GetBaseType()); Assert(vt != NULL); // we don't want to report that it's e.g. a pointer to a float<1>, // but a pointer to a float, etc. const Type *elementType = vt->GetElementType(); - if (dynamic_cast(exprLValueType) != NULL) + if (CastType(exprLValueType) != NULL) return new ReferenceType(elementType); else { const PointerType *ptrType = exprLValueType->IsUniformType() ? PointerType::GetUniform(elementType) : PointerType::GetVarying(elementType); // FIXME: replicated logic with structmemberexpr.... - if (dynamic_cast(exprLValueType) && - dynamic_cast(exprLValueType)->IsSlice()) + if (CastType(exprLValueType) && + CastType(exprLValueType)->IsSlice()) ptrType = ptrType->GetAsFrozenSlice(); return ptrType; } @@ -4727,21 +4720,20 @@ MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos, if (e == NULL || (exprType = e->GetType()) == NULL) return NULL; - const ReferenceType *referenceType = - dynamic_cast(exprType); + const ReferenceType *referenceType = CastType(exprType); if (referenceType != NULL) { e = new RefDerefExpr(e, e->pos); exprType = e->GetType(); Assert(exprType != NULL); } - const PointerType *pointerType = dynamic_cast(exprType); + const PointerType *pointerType = CastType(exprType); if (pointerType != NULL) exprType = pointerType->GetBaseType(); if (derefLValue == true && pointerType == NULL) { const Type *targetType = exprType->GetReferenceTarget(); - if (dynamic_cast(targetType) != NULL) + if (CastType(targetType) != NULL) Error(p, "Member operator \"->\" can't be applied to non-pointer " "type \"%s\". Did you mean to use \".\"?", exprType->GetString().c_str()); @@ -4751,18 +4743,18 @@ MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos, return NULL; } if (derefLValue == false && pointerType != NULL && - dynamic_cast(pointerType->GetBaseType()) != NULL) { + CastType(pointerType->GetBaseType()) != NULL) { Error(p, "Member operator \".\" can't be applied to pointer " "type \"%s\". Did you mean to use \"->\"?", exprType->GetString().c_str()); return NULL; } - if (dynamic_cast(exprType) != NULL) + if (CastType(exprType) != NULL) return new StructMemberExpr(e, id, p, idpos, derefLValue); - else if (dynamic_cast(exprType) != NULL) + else if (CastType(exprType) != NULL) return new VectorMemberExpr(e, id, p, idpos, derefLValue); - else if (dynamic_cast(exprType)) { + else if (CastType(exprType)) { Error(p, "Member operator \"%s\" can't be applied to declared " "but not defined struct type \"%s\".", derefLValue ? "->" : ".", exprType->GetString().c_str()); @@ -4919,8 +4911,7 @@ MemberExpr::Print() const { */ std::string MemberExpr::getCandidateNearMatches() const { - const StructType *structType = - dynamic_cast(expr->GetType()); + const StructType *structType = CastType(expr->GetType()); if (!structType) return ""; @@ -5049,7 +5040,7 @@ ConstExpr::ConstExpr(const Type *t, uint32_t u, SourcePos p) type = t; type = type->GetAsConstType(); Assert(Type::Equal(type, AtomicType::UniformUInt32->GetAsConstType()) || - (dynamic_cast(type) != NULL && + (CastType(type) != NULL && type->IsUniformType())); uint32Val[0] = u; } @@ -5061,7 +5052,7 @@ ConstExpr::ConstExpr(const Type *t, uint32_t *u, SourcePos p) type = type->GetAsConstType(); Assert(Type::Equal(type, AtomicType::UniformUInt32->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingUInt32->GetAsConstType()) || - (dynamic_cast(type) != NULL)); + (CastType(type) != NULL)); for (int j = 0; j < Count(); ++j) uint32Val[j] = u[j]; } @@ -5270,11 +5261,11 @@ ConstExpr::ConstExpr(ConstExpr *old, SourcePos p) AtomicType::BasicType ConstExpr::getBasicType() const { - const AtomicType *at = dynamic_cast(type); + const AtomicType *at = CastType(type); if (at != NULL) return at->basicType; else { - Assert(dynamic_cast(type) != NULL); + Assert(CastType(type) != NULL); return AtomicType::TYPE_UINT32; } } @@ -5685,7 +5676,7 @@ ConstExpr::GetConstant(const Type *type) const { } else if (Type::Equal(type, AtomicType::UniformUInt32) || Type::Equal(type, AtomicType::VaryingUInt32) || - dynamic_cast(type) != NULL) { + CastType(type) != NULL) { uint32_t uiv[ISPC_MAX_NVEC]; AsUInt32(uiv, type->IsVaryingType()); if (type->IsUniformType()) @@ -5729,7 +5720,7 @@ ConstExpr::GetConstant(const Type *type) const { else return LLVMDoubleVector(dv); } - else if (dynamic_cast(type) != NULL) { + else if (CastType(type) != NULL) { // The only time we should get here is if we have an integer '0' // constant that should be turned into a NULL pointer of the // appropriate type. @@ -6374,8 +6365,7 @@ lUniformValueToVarying(FunctionEmitContext *ctx, llvm::Value *value, // for structs/arrays/vectors, just recursively make their elements // varying (if needed) and populate the return value. - const CollectionType *collectionType = - dynamic_cast(type); + const CollectionType *collectionType = CastType(type); if (collectionType != NULL) { llvm::Type *llvmType = type->GetAsVaryingType()->LLVMType(g->ctx); @@ -6391,7 +6381,7 @@ lUniformValueToVarying(FunctionEmitContext *ctx, llvm::Value *value, // Otherwise we must have a uniform AtomicType, so smear its value // across the vector lanes. - Assert(dynamic_cast(type) != NULL); + Assert(CastType(type) != NULL); return ctx->SmearUniform(value); } @@ -6415,10 +6405,10 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { return NULL; } - const PointerType *fromPointerType = dynamic_cast(fromType); - const PointerType *toPointerType = dynamic_cast(toType); - const ArrayType *toArrayType = dynamic_cast(toType); - const ArrayType *fromArrayType = dynamic_cast(fromType); + const PointerType *fromPointerType = CastType(fromType); + const PointerType *toPointerType = CastType(toType); + const ArrayType *toArrayType = CastType(toType); + const ArrayType *fromArrayType = CastType(fromType); if (fromPointerType != NULL) { if (toArrayType != NULL) { return expr->GetValue(ctx); @@ -6476,7 +6466,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { } } else { - Assert(dynamic_cast(toType) != NULL); + Assert(CastType(toType) != NULL); if (toType->IsBoolType()) { // convert pointer to bool llvm::Type *lfu = @@ -6558,14 +6548,14 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { return ctx->BitCastInst(v, ptype); //, "array_cast_0size"); } - const ReferenceType *toReference = dynamic_cast(toType); - const ReferenceType *fromReference = dynamic_cast(fromType); + const ReferenceType *toReference = CastType(toType); + const ReferenceType *fromReference = CastType(fromType); if (toReference && fromReference) { const Type *toTarget = toReference->GetReferenceTarget(); const Type *fromTarget = fromReference->GetReferenceTarget(); - const ArrayType *toArray = dynamic_cast(toTarget); - const ArrayType *fromArray = dynamic_cast(fromTarget); + const ArrayType *toArray = CastType(toTarget); + const ArrayType *fromArray = CastType(fromTarget); if (toArray && fromArray) { // cast array pointer from [n x foo] to [0 x foo] if needed to be able // to pass to a function that takes an unsized array as a parameter @@ -6585,8 +6575,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { return expr->GetValue(ctx); } - const StructType *toStruct = dynamic_cast(toType); - const StructType *fromStruct = dynamic_cast(fromType); + const StructType *toStruct = CastType(toType); + const StructType *fromStruct = CastType(fromType); if (toStruct && fromStruct) { // The only legal type conversions for structs are to go from a // uniform to a varying instance of the same struct type. @@ -6599,8 +6589,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { return lUniformValueToVarying(ctx, origValue, fromType); } - const VectorType *toVector = dynamic_cast(toType); - const VectorType *fromVector = dynamic_cast(fromType); + const VectorType *toVector = CastType(toType); + const VectorType *fromVector = CastType(fromType); if (toVector && fromVector) { // this should be caught during typechecking Assert(toVector->GetElementCount() == fromVector->GetElementCount()); @@ -6632,8 +6622,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { if (!exprVal) return NULL; - const EnumType *fromEnum = dynamic_cast(fromType); - const EnumType *toEnum = dynamic_cast(toType); + const EnumType *fromEnum = CastType(fromType); + const EnumType *toEnum = CastType(toType); if (fromEnum) // treat it as an uint32 type for the below and all will be good. fromType = fromEnum->IsUniformType() ? AtomicType::UniformUInt32 : @@ -6643,7 +6633,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { toType = toEnum->IsUniformType() ? AtomicType::UniformUInt32 : AtomicType::VaryingUInt32; - const AtomicType *fromAtomic = dynamic_cast(fromType); + const AtomicType *fromAtomic = CastType(fromType); // at this point, coming from an atomic type is all that's left... Assert(fromAtomic != NULL); @@ -6671,7 +6661,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { return ctx->IntToPtrInst(exprVal, llvmToType, "int_to_ptr"); } else { - const AtomicType *toAtomic = dynamic_cast(toType); + const AtomicType *toAtomic = CastType(toType); // typechecking should ensure this is the case Assert(toAtomic != NULL); @@ -6689,7 +6679,7 @@ TypeCastExpr::GetType() const { static const Type * lDeconstifyType(const Type *t) { - const PointerType *pt = dynamic_cast(t); + const PointerType *pt = CastType(t); if (pt != NULL) return new PointerType(lDeconstifyType(pt->GetBaseType()), pt->GetVariability(), false); @@ -6729,16 +6719,16 @@ TypeCastExpr::TypeCheck() { } // First some special cases that we allow only with an explicit type cast - const PointerType *fromPtr = dynamic_cast(fromType); - const PointerType *toPtr = dynamic_cast(toType); + const PointerType *fromPtr = CastType(fromType); + const PointerType *toPtr = CastType(toType); if (fromPtr != NULL && toPtr != NULL) // allow explicit typecasts between any two different pointer types return this; - const AtomicType *fromAtomic = dynamic_cast(fromType); - const AtomicType *toAtomic = dynamic_cast(toType); - const EnumType *fromEnum = dynamic_cast(fromType); - const EnumType *toEnum = dynamic_cast(toType); + const AtomicType *fromAtomic = CastType(fromType); + const AtomicType *toAtomic = CastType(toType); + const EnumType *fromEnum = CastType(fromType); + const EnumType *toEnum = CastType(toType); if ((fromAtomic || fromEnum) && (toAtomic || toEnum)) // Allow explicit casts between all of these return this; @@ -6779,8 +6769,8 @@ TypeCastExpr::Optimize() { return this; const Type *toType = GetType(); - const AtomicType *toAtomic = dynamic_cast(toType); - const EnumType *toEnum = dynamic_cast(toType); + const AtomicType *toAtomic = CastType(toType); + const EnumType *toEnum = CastType(toType); // If we're not casting to an atomic or enum type, we can't do anything // here, since ConstExprs can only represent those two types. (So // e.g. we're casting from an int to an int<4>.) @@ -6924,7 +6914,7 @@ TypeCastExpr::GetConstant(const Type *constType) const { // 2. Converting function types to pointer-to-function types // 3. And converting these from uniform to the varying/soa equivalents. // - if (dynamic_cast(constType) == NULL) + if (CastType(constType) == NULL) return NULL; llvm::Constant *c = expr->GetConstant(constType->GetAsUniformType()); @@ -7118,7 +7108,7 @@ PtrDerefExpr::GetType() const { Assert(m->errorCount > 0); return NULL; } - Assert(dynamic_cast(type) != NULL); + Assert(CastType(type) != NULL); if (type->IsUniformType()) return type->GetBaseType(); @@ -7135,7 +7125,7 @@ PtrDerefExpr::TypeCheck() { return NULL; } - if (dynamic_cast(type) == NULL) { + if (CastType(type) == NULL) { Error(pos, "Illegal to dereference non-pointer type \"%s\".", type->GetString().c_str()); return NULL; @@ -7190,7 +7180,7 @@ RefDerefExpr::GetType() const { return NULL; } - Assert(dynamic_cast(type) != NULL); + Assert(CastType(type) != NULL); return type->GetReferenceTarget(); } @@ -7206,7 +7196,7 @@ RefDerefExpr::TypeCheck() { // We only create RefDerefExprs internally for references in // expressions, so we should never create one with a non-reference // expression... - Assert(dynamic_cast(type) != NULL); + Assert(CastType(type) != NULL); return this; } @@ -7248,8 +7238,8 @@ AddressOfExpr::GetValue(FunctionEmitContext *ctx) const { return NULL; const Type *exprType = expr->GetType(); - if (dynamic_cast(exprType) != NULL || - dynamic_cast(exprType) != NULL) + if (CastType(exprType) != NULL || + CastType(exprType) != NULL) return expr->GetValue(ctx); else return expr->GetLValue(ctx); @@ -7262,7 +7252,7 @@ AddressOfExpr::GetType() const { return NULL; const Type *exprType = expr->GetType(); - if (dynamic_cast(exprType) != NULL) + if (CastType(exprType) != NULL) return PointerType::GetUniform(exprType->GetReferenceTarget()); const Type *t = expr->GetLValueType(); @@ -7305,8 +7295,8 @@ AddressOfExpr::TypeCheck() { return NULL; } - if (dynamic_cast(exprType) != NULL|| - dynamic_cast(exprType) != NULL) { + if (CastType(exprType) != NULL || + CastType(exprType) != NULL) { return this; } @@ -7338,12 +7328,11 @@ AddressOfExpr::GetConstant(const Type *type) const { return NULL; } - const PointerType *pt = dynamic_cast(type); + const PointerType *pt = CastType(type); if (pt == NULL) return NULL; - const FunctionType *ft = - dynamic_cast(pt->GetBaseType()); + const FunctionType *ft = CastType(pt->GetBaseType()); if (ft != NULL) { llvm::Constant *c = expr->GetConstant(ft); return lConvertPointerConstant(c, type); @@ -7406,7 +7395,7 @@ Expr * SizeOfExpr::TypeCheck() { // Can't compute the size of a struct without a definition if (type != NULL && - dynamic_cast(type) != NULL) { + CastType(type) != NULL) { Error(pos, "Can't compute the size of declared but not defined " "struct type \"%s\".", type->GetString().c_str()); return NULL; @@ -7462,7 +7451,7 @@ SymbolExpr::GetLValueType() const { if (symbol == NULL) return NULL; - if (dynamic_cast(symbol->type) != NULL) + if (CastType(symbol->type) != NULL) return PointerType::GetUniform(symbol->type->GetReferenceTarget()); else return PointerType::GetUniform(symbol->type); @@ -7591,7 +7580,7 @@ FunctionSymbolExpr::GetConstant(const Type *type) const { if (matchingFunc == NULL || matchingFunc->function == NULL) return NULL; - const FunctionType *ft = dynamic_cast(type); + const FunctionType *ft = CastType(type); if (ft == NULL) return NULL; @@ -7611,8 +7600,7 @@ lPrintOverloadCandidates(SourcePos pos, const std::vector &funcs, const std::vector &argTypes, const std::vector *argCouldBeNULL) { for (unsigned int i = 0; i < funcs.size(); ++i) { - const FunctionType *ft = - dynamic_cast(funcs[i]->type); + const FunctionType *ft = CastType(funcs[i]->type); Assert(ft != NULL); Error(funcs[i]->pos, "Candidate function: %s.", ft->GetString().c_str()); } @@ -7631,7 +7619,7 @@ lPrintOverloadCandidates(SourcePos pos, const std::vector &funcs, static bool lIsMatchToNonConstReference(const Type *callType, const Type *funcArgType) { - return (dynamic_cast(funcArgType) && + return (CastType(funcArgType) && (funcArgType->IsConstType() == false) && Type::Equal(callType, funcArgType->GetReferenceTarget())); } @@ -7640,7 +7628,7 @@ lIsMatchToNonConstReference(const Type *callType, const Type *funcArgType) { static bool lIsMatchToNonConstReferenceUnifToVarying(const Type *callType, const Type *funcArgType) { - return (dynamic_cast(funcArgType) && + return (CastType(funcArgType) && (funcArgType->IsConstType() == false) && Type::Equal(callType->GetAsVaryingType(), funcArgType->GetReferenceTarget())); @@ -7652,8 +7640,8 @@ lIsMatchToNonConstReferenceUnifToVarying(const Type *callType, */ static bool lIsMatchWithTypeWidening(const Type *callType, const Type *funcArgType) { - const AtomicType *callAt = dynamic_cast(callType); - const AtomicType *funcAt = dynamic_cast(funcArgType); + const AtomicType *callAt = CastType(callType); + const AtomicType *funcAt = CastType(funcArgType); if (callAt == NULL || funcAt == NULL) return false; @@ -7725,7 +7713,7 @@ FunctionSymbolExpr::getCandidateFunctions(int argCount) const { std::vector ret; for (int i = 0; i < (int)candidateFunctions.size(); ++i) { const FunctionType *ft = - dynamic_cast(candidateFunctions[i]->type); + CastType(candidateFunctions[i]->type); Assert(ft != NULL); // There's no way to match if the caller is passing more arguments @@ -7747,15 +7735,15 @@ FunctionSymbolExpr::getCandidateFunctions(int argCount) const { static bool lArgIsPointerType(const Type *type) { - if (dynamic_cast(type) != NULL) + if (CastType(type) != NULL) return true; - const ReferenceType *rt = dynamic_cast(type); + const ReferenceType *rt = CastType(type); if (rt == NULL) return false; const Type *t = rt->GetReferenceTarget(); - return (dynamic_cast(t) != NULL); + return (CastType(t) != NULL); } @@ -7805,9 +7793,9 @@ FunctionSymbolExpr::computeOverloadCost(const FunctionType *ftype, // references, where const-ness matters). For all other types, // we're passing by value anyway, so const doesn't matter. const Type *callTypeNC = callType, *fargTypeNC = fargType; - if (dynamic_cast(callType) == NULL) + if (CastType(callType) == NULL) callTypeNC = callType->GetAsNonConstType(); - if (dynamic_cast(fargType) == NULL) + if (CastType(fargType) == NULL) fargTypeNC = fargType->GetAsNonConstType(); if (Type::Equal(callTypeNC, fargTypeNC)) @@ -7870,7 +7858,7 @@ FunctionSymbolExpr::ResolveOverloads(SourcePos argPos, // Compute the cost for calling each of the candidate functions for (int i = 0; i < (int)actualCandidates.size(); ++i) { const FunctionType *ft = - dynamic_cast(actualCandidates[i]->type); + CastType(actualCandidates[i]->type); Assert(ft != NULL); candidateCosts.push_back(computeOverloadCost(ft, argTypes, argCouldBeNULL, @@ -7994,7 +7982,7 @@ NullPointerExpr::Optimize() { llvm::Constant * NullPointerExpr::GetConstant(const Type *type) const { - const PointerType *pt = dynamic_cast(type); + const PointerType *pt = CastType(type); if (pt == NULL) return NULL; @@ -8189,7 +8177,7 @@ NewExpr::TypeCheck() { Assert(m->errorCount > 0); return NULL; } - if (dynamic_cast(allocType) != NULL) { + if (CastType(allocType) != NULL) { Error(pos, "Can't dynamically allocate storage for declared " "but not defined type \"%s\".", allocType->GetString().c_str()); return NULL; diff --git a/func.cpp b/func.cpp index be6cf958..4e4e8196 100644 --- a/func.cpp +++ b/func.cpp @@ -100,7 +100,7 @@ Function::Function(Symbol *s, Stmt *c) { printf("\n\n\n"); } - const FunctionType *type = dynamic_cast(sym->type); + const FunctionType *type = CastType(sym->type); Assert(type != NULL); for (int i = 0; i < type->GetNumParameters(); ++i) { @@ -111,7 +111,7 @@ Function::Function(Symbol *s, Stmt *c) { args.push_back(sym); const Type *t = type->GetParameterType(i); - if (sym != NULL && dynamic_cast(t) == NULL) + if (sym != NULL && CastType(t) == NULL) sym->parentFunction = this; } @@ -132,7 +132,7 @@ Function::Function(Symbol *s, Stmt *c) { const Type * Function::GetReturnType() const { - const FunctionType *type = dynamic_cast(sym->type); + const FunctionType *type = CastType(sym->type); Assert(type != NULL); return type->GetReturnType(); } @@ -140,7 +140,7 @@ Function::GetReturnType() const { const FunctionType * Function::GetType() const { - const FunctionType *type = dynamic_cast(sym->type); + const FunctionType *type = CastType(sym->type); Assert(type != NULL); return type; } @@ -205,7 +205,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, #if 0 llvm::BasicBlock *entryBBlock = ctx->GetCurrentBasicBlock(); #endif - const FunctionType *type = dynamic_cast(sym->type); + const FunctionType *type = CastType(sym->type); Assert(type != NULL); if (type->isTask == true) { // For tasks, we there should always be three parmeters: the @@ -431,7 +431,7 @@ Function::GenerateIR() { // If the function is 'export'-qualified, emit a second version of // it without a mask parameter and without name mangling so that // the application can call it - const FunctionType *type = dynamic_cast(sym->type); + const FunctionType *type = CastType(sym->type); Assert(type != NULL); if (type->isExported) { if (!type->isTask) { diff --git a/module.cpp b/module.cpp index 8bbb4acc..7e83725f 100644 --- a/module.cpp +++ b/module.cpp @@ -368,7 +368,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE if (type == NULL) return; - const ArrayType *at = dynamic_cast(type); + const ArrayType *at = CastType(type); if (at != NULL && at->TotalElementCount() == 0) { Error(pos, "Illegal to declare a global variable with unsized " "array dimensions that aren't set with an initializer " @@ -517,7 +517,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE */ static bool lRecursiveCheckValidParamType(const Type *t) { - const StructType *st = dynamic_cast(t); + const StructType *st = CastType(t); if (st != NULL) { for (int i = 0; i < st->GetElementCount(); ++i) if (lRecursiveCheckValidParamType(st->GetElementType(i))) @@ -525,11 +525,11 @@ lRecursiveCheckValidParamType(const Type *t) { return false; } - const SequentialType *seqt = dynamic_cast(t); + const SequentialType *seqt = CastType(t); if (seqt != NULL) return lRecursiveCheckValidParamType(seqt->GetElementType()); - const PointerType *pt = dynamic_cast(t); + const PointerType *pt = CastType(t); if (pt != NULL) { if (pt->IsSlice() || pt->IsVaryingType()) return true; @@ -550,7 +550,7 @@ lCheckForVaryingParameter(const Type *type, const std::string &name, SourcePos pos) { if (lRecursiveCheckValidParamType(type)) { const Type *t = type->GetBaseType(); - if (dynamic_cast(t)) + if (CastType(t)) Error(pos, "Struct parameter \"%s\" with varying member(s) is illegal " "in an exported function.", name.c_str()); else @@ -568,7 +568,7 @@ static void lCheckForStructParameters(const FunctionType *ftype, SourcePos pos) { for (int i = 0; i < ftype->GetNumParameters(); ++i) { const Type *type = ftype->GetParameterType(i); - if (dynamic_cast(type) != NULL) { + if (CastType(type) != NULL) { Error(pos, "Passing structs to/from application functions is " "currently broken. Use a pointer or const pointer to the " "struct instead for now."); @@ -615,7 +615,7 @@ Module::AddFunctionDeclaration(const std::string &name, // different, return an error--overloading by return type isn't // allowed. const FunctionType *ofType = - dynamic_cast(overloadFunc->type); + CastType(overloadFunc->type); Assert(ofType != NULL); if (ofType->GetNumParameters() == functionType->GetNumParameters()) { int i; @@ -737,9 +737,9 @@ Module::AddFunctionDeclaration(const std::string &name, // default.) Set parameter attributes accordingly. (Only for // uniform pointers, since varying pointers are int vectors...) if (!functionType->isTask && - ((dynamic_cast(argType) != NULL && + ((CastType(argType) != NULL && argType->IsUniformType()) || - dynamic_cast(argType) != NULL)) { + CastType(argType) != NULL)) { // NOTE: LLVM indexes function parameters starting from 1. // This is unintuitive. @@ -962,7 +962,7 @@ lEmitStructDecl(const StructType *st, std::vector *emittedSt // Otherwise first make sure any contained structs have been declared. for (int i = 0; i < st->GetElementCount(); ++i) { const StructType *elementStructType = - dynamic_cast(st->GetElementType(i)); + CastType(st->GetElementType(i)); if (elementStructType != NULL) lEmitStructDecl(elementStructType, emittedStructs, file); } @@ -1084,7 +1084,7 @@ lAddTypeIfNew(const Type *type, std::vector *exportedTypes) { if (Type::Equal((*exportedTypes)[i], type)) return; - const T *castType = dynamic_cast(type); + const T *castType = CastType(type); Assert(castType != NULL); exportedTypes->push_back(castType); } @@ -1099,13 +1099,13 @@ lGetExportedTypes(const Type *type, std::vector *exportedStructTypes, std::vector *exportedEnumTypes, std::vector *exportedVectorTypes) { - const ArrayType *arrayType = dynamic_cast(type); - const StructType *structType = dynamic_cast(type); + const ArrayType *arrayType = CastType(type); + const StructType *structType = CastType(type); - if (dynamic_cast(type) != NULL) + if (CastType(type) != NULL) lGetExportedTypes(type->GetReferenceTarget(), exportedStructTypes, exportedEnumTypes, exportedVectorTypes); - else if (dynamic_cast(type) != NULL) + else if (CastType(type) != NULL) lGetExportedTypes(type->GetBaseType(), exportedStructTypes, exportedEnumTypes, exportedVectorTypes); else if (arrayType != NULL) @@ -1117,12 +1117,12 @@ lGetExportedTypes(const Type *type, lGetExportedTypes(structType->GetElementType(i), exportedStructTypes, exportedEnumTypes, exportedVectorTypes); } - else if (dynamic_cast(type) != NULL) + else if (CastType(type) != NULL) lAddTypeIfNew(type, exportedEnumTypes); - else if (dynamic_cast(type) != NULL) + else if (CastType(type) != NULL) lAddTypeIfNew(type, exportedVectorTypes); else - Assert(dynamic_cast(type) != NULL); + Assert(CastType(type) != NULL); } @@ -1135,7 +1135,7 @@ lGetExportedParamTypes(const std::vector &funcs, std::vector *exportedEnumTypes, std::vector *exportedVectorTypes) { for (unsigned int i = 0; i < funcs.size(); ++i) { - const FunctionType *ftype = dynamic_cast(funcs[i]->type); + const FunctionType *ftype = CastType(funcs[i]->type); // Handle the return type lGetExportedTypes(ftype->GetReturnType(), exportedStructTypes, exportedEnumTypes, exportedVectorTypes); @@ -1152,7 +1152,7 @@ static void lPrintFunctionDeclarations(FILE *file, const std::vector &funcs) { fprintf(file, "#ifdef __cplusplus\nextern \"C\" {\n#endif // __cplusplus\n"); for (unsigned int i = 0; i < funcs.size(); ++i) { - const FunctionType *ftype = dynamic_cast(funcs[i]->type); + const FunctionType *ftype = CastType(funcs[i]->type); Assert(ftype); std::string decl = ftype->GetCDeclaration(funcs[i]->name); fprintf(file, " extern %s;\n", decl.c_str()); @@ -1163,7 +1163,7 @@ lPrintFunctionDeclarations(FILE *file, const std::vector &funcs) { static bool lIsExported(const Symbol *sym) { - const FunctionType *ft = dynamic_cast(sym->type); + const FunctionType *ft = CastType(sym->type); Assert(ft); return ft->isExported; } @@ -1171,7 +1171,7 @@ lIsExported(const Symbol *sym) { static bool lIsExternC(const Symbol *sym) { - const FunctionType *ft = dynamic_cast(sym->type); + const FunctionType *ft = CastType(sym->type); Assert(ft); return ft->isExternC; } diff --git a/parse.yy b/parse.yy index f7a468ad..e983afdf 100644 --- a/parse.yy +++ b/parse.yy @@ -550,7 +550,7 @@ rate_qualified_type_specifier $$ = NULL; else { int soaWidth = (int)$1; - const StructType *st = dynamic_cast($2); + const StructType *st = CastType($2); if (st == NULL) { Error(@1, "\"soa\" qualifier is illegal with non-struct type \"%s\".", $2->GetString().c_str()); @@ -895,7 +895,7 @@ struct_or_union_specifier st = new UndefinedStructType($2, Variability::Unbound, false, @2); m->symbolTable->AddType($2, st, @2); } - else if (dynamic_cast(st) == NULL) + else if (CastType(st) == NULL) Error(@2, "Type \"%s\" is not a struct type! (%s)", $2, st->GetString().c_str()); $$ = st; @@ -1060,7 +1060,7 @@ enum_specifier $$ = NULL; } else { - const EnumType *enumType = dynamic_cast(type); + const EnumType *enumType = CastType(type); if (enumType == NULL) { Error(@2, "Type \"%s\" is not an enum type (%s).", $2, type->GetString().c_str()); @@ -1858,8 +1858,7 @@ function_definition { if ($2 != NULL) { $2->InitFromDeclSpecs($1); - const FunctionType *funcType = - dynamic_cast($2->type); + const FunctionType *funcType = CastType($2->type); if (funcType == NULL) Assert(m->errorCount > 0); else { @@ -1987,7 +1986,7 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) { decl->type = decl->type->ResolveUnboundVariability(Variability::Varying); - const FunctionType *ft = dynamic_cast(decl->type); + const FunctionType *ft = CastType(decl->type); if (ft != NULL) { bool isInline = (ds->typeQualifiers & TYPEQUAL_INLINE); m->AddFunctionDeclaration(decl->name, ft, ds->storageClass, diff --git a/stmt.cpp b/stmt.cpp index 0c8ed0c8..bcc57f4b 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -122,7 +122,7 @@ DeclStmt::DeclStmt(const std::vector &v, SourcePos p) static bool lHasUnsizedArrays(const Type *type) { - const ArrayType *at = dynamic_cast(type); + const ArrayType *at = CastType(type); if (at == NULL) return false; @@ -297,8 +297,8 @@ DeclStmt::TypeCheck() { // the int->float type conversion is in there and we don't return // an int as the constValue later... const Type *type = vars[i].sym->type; - if (dynamic_cast(type) != NULL || - dynamic_cast(type) != NULL) { + if (CastType(type) != NULL || + CastType(type) != NULL) { // If it's an expr list with an atomic type, we'll later issue // an error. Need to leave vars[i].init as is in that case so // it is in fact caught later, though. @@ -2461,7 +2461,7 @@ lEncodeType(const Type *t) { if (Type::Equal(t, AtomicType::VaryingUInt64)) return 'V'; if (Type::Equal(t, AtomicType::UniformDouble)) return 'd'; if (Type::Equal(t, AtomicType::VaryingDouble)) return 'D'; - if (dynamic_cast(t) != NULL) { + if (CastType(t) != NULL) { if (t->IsUniformType()) return 'p'; else @@ -2481,7 +2481,7 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) { if (type == NULL) return NULL; - if (dynamic_cast(type) != NULL) { + if (CastType(type) != NULL) { expr = new RefDerefExpr(expr, expr->pos); type = expr->GetType(); if (type == NULL) @@ -2732,7 +2732,7 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const { } // Typechecking should catch this - Assert(dynamic_cast(exprType) != NULL); + Assert(CastType(exprType) != NULL); if (exprType->IsUniformType()) { // For deletion of a uniform pointer, we just need to cast the @@ -2772,7 +2772,7 @@ DeleteStmt::TypeCheck() { if (expr == NULL || ((exprType = expr->GetType()) == NULL)) return NULL; - if (dynamic_cast(exprType) == NULL) { + if (CastType(exprType) == NULL) { Error(pos, "Illegal to delete non-pointer type \"%s\".", exprType->GetString().c_str()); return NULL; diff --git a/sym.cpp b/sym.cpp index 8c7e04a6..0a211884 100644 --- a/sym.cpp +++ b/sym.cpp @@ -136,7 +136,7 @@ SymbolTable::LookupVariable(const char *name) { bool SymbolTable::AddFunction(Symbol *symbol) { - const FunctionType *ft = dynamic_cast(symbol->type); + const FunctionType *ft = CastType(symbol->type); Assert(ft != NULL); if (LookupFunction(symbol->name.c_str(), ft) != NULL) // A function of the same name and type has already been added to @@ -182,7 +182,7 @@ SymbolTable::LookupFunction(const char *name, const FunctionType *type) { bool SymbolTable::AddType(const char *name, const Type *type, SourcePos pos) { const Type *t = LookupType(name); - if (t != NULL && dynamic_cast(t) == NULL) { + if (t != NULL && CastType(t) == NULL) { // If we have a previous declaration of anything other than an // UndefinedStructType with this struct name, issue an error. If // we have an UndefinedStructType, then we'll fall through to the @@ -270,7 +270,7 @@ SymbolTable::closestTypeMatch(const char *str, bool structsVsEnums) const { for (iter = types.begin(); iter != types.end(); ++iter) { // Skip over either StructTypes or EnumTypes, depending on the // value of the structsVsEnums parameter - bool isEnum = (dynamic_cast(iter->second) != NULL); + bool isEnum = (CastType(iter->second) != NULL); if (isEnum && structsVsEnums) continue; else if (!isEnum && !structsVsEnums) diff --git a/type.cpp b/type.cpp index 95877122..738f496c 100644 --- a/type.cpp +++ b/type.cpp @@ -184,7 +184,7 @@ const AtomicType *AtomicType::Void = AtomicType::AtomicType(BasicType bt, Variability v, bool ic) - : basicType(bt), variability(v), isConst(ic) { + : Type(ATOMIC_TYPE), basicType(bt), variability(v), isConst(ic) { } @@ -532,7 +532,7 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const { // EnumType EnumType::EnumType(SourcePos p) - : pos(p) { + : Type(ENUM_TYPE), pos(p) { // name = "/* (anonymous) */"; isConst = false; variability = Variability(Variability::Unbound); @@ -540,7 +540,7 @@ EnumType::EnumType(SourcePos p) EnumType::EnumType(const char *n, SourcePos p) - : pos(p), name(n) { + : Type(ENUM_TYPE), pos(p), name(n) { isConst = false; variability = Variability(Variability::Unbound); } @@ -817,7 +817,7 @@ PointerType *PointerType::Void = PointerType::PointerType(const Type *t, Variability v, bool ic, bool is, bool fr) - : variability(v), isConst(ic), isSlice(is), isFrozen(fr) { + : Type(POINTER_TYPE), variability(v), isConst(ic), isSlice(is), isFrozen(fr) { baseType = t; } @@ -1083,7 +1083,7 @@ PointerType::LLVMType(llvm::LLVMContext *ctx) const { switch (variability.type) { case Variability::Uniform: { llvm::Type *ptype = NULL; - const FunctionType *ftype = dynamic_cast(baseType); + const FunctionType *ftype = CastType(baseType); if (ftype != NULL) // Get the type of the function variant that takes the mask as the // last parameter--i.e. we don't allow taking function pointers of @@ -1155,7 +1155,7 @@ const Type *SequentialType::GetElementType(int index) const { // ArrayType ArrayType::ArrayType(const Type *c, int a) - : child(c), numElements(a) { + : SequentialType(ARRAY_TYPE), child(c), numElements(a) { // 0 -> unsized array. Assert(numElements >= 0); Assert(Type::Equal(c, AtomicType::Void) == false); @@ -1217,11 +1217,11 @@ ArrayType::IsConstType() const { const Type * ArrayType::GetBaseType() const { const Type *type = child; - const ArrayType *at = dynamic_cast(type); + const ArrayType *at = CastType(type); // Keep walking until we reach a child that isn't itself an array while (at) { type = at->child; - at = dynamic_cast(type); + at = CastType(type); } return type; } @@ -1338,7 +1338,7 @@ ArrayType::GetString() const { else buf[0] = '\0'; s += std::string("[") + std::string(buf) + std::string("]"); - at = dynamic_cast(at->child); + at = CastType(at->child); } return s; } @@ -1381,7 +1381,7 @@ ArrayType::GetCDeclaration(const std::string &name) const { else buf[0] = '\0'; s += std::string("[") + std::string(buf) + std::string("]"); - at = dynamic_cast(at->child); + at = CastType(at->child); } if (soaWidth > 0) { @@ -1396,7 +1396,7 @@ ArrayType::GetCDeclaration(const std::string &name) const { int ArrayType::TotalElementCount() const { - const ArrayType *ct = dynamic_cast(child); + const ArrayType *ct = CastType(child); if (ct != NULL) return numElements * ct->TotalElementCount(); else @@ -1425,7 +1425,7 @@ ArrayType::GetSizedArray(int sz) const { const Type * ArrayType::SizeUnsizedArrays(const Type *type, Expr *initExpr) { - const ArrayType *at = dynamic_cast(type); + const ArrayType *at = CastType(type); if (at == NULL) return type; @@ -1437,7 +1437,7 @@ ArrayType::SizeUnsizedArrays(const Type *type, Expr *initExpr) { // length of the expression list if (at->GetElementCount() == 0) { type = at->GetSizedArray(exprList->exprs.size()); - at = dynamic_cast(type); + at = CastType(type); } // Is there another nested level of expression lists? If not, bail out @@ -1449,7 +1449,7 @@ ArrayType::SizeUnsizedArrays(const Type *type, Expr *initExpr) { return type; const Type *nextType = at->GetElementType(); - const ArrayType *nextArrayType = dynamic_cast(nextType); + const ArrayType *nextArrayType = CastType(nextType); if (nextArrayType != NULL && nextArrayType->GetElementCount() == 0) { // If the recursive call to SizeUnsizedArrays at the bottom of the // function is going to size an unsized dimension, make sure that @@ -1485,7 +1485,7 @@ ArrayType::SizeUnsizedArrays(const Type *type, Expr *initExpr) { // VectorType VectorType::VectorType(const AtomicType *b, int a) - : base(b), numElements(a) { + : SequentialType(VECTOR_TYPE), base(b), numElements(a) { Assert(numElements > 0); Assert(base != NULL); } @@ -2111,8 +2111,7 @@ StructType::checkIfCanBeSOA(const StructType *st) { bool ok = true; for (int i = 0; i < (int)st->elementTypes.size(); ++i) { const Type *eltType = st->elementTypes[i]; - const StructType *childStructType = - dynamic_cast(eltType); + const StructType *childStructType = CastType(eltType); if (childStructType != NULL) ok &= checkIfCanBeSOA(childStructType); @@ -2124,7 +2123,7 @@ StructType::checkIfCanBeSOA(const StructType *st) { eltType->IsUniformType() ? "uniform" : "varying"); ok = false; } - else if (dynamic_cast(eltType)) { + else if (CastType(eltType)) { Error(st->elementPositions[i], "Unable to apply SOA conversion to " "struct due to member \"%s\" with reference type \"%s\".", st->elementNames[i].c_str(), eltType->GetString().c_str()); @@ -2141,7 +2140,7 @@ StructType::checkIfCanBeSOA(const StructType *st) { UndefinedStructType::UndefinedStructType(const std::string &n, const Variability var, bool ic, SourcePos p) - : name(n), variability(var), isConst(ic), pos(p) { + : Type(UNDEFINED_STRUCT_TYPE), name(n), variability(var), isConst(ic), pos(p) { Assert(name != ""); if (variability != Variability::Unbound) { // Create a new opaque LLVM struct type for this struct name @@ -2303,7 +2302,7 @@ UndefinedStructType::GetDIType(llvm::DIDescriptor scope) const { // ReferenceType ReferenceType::ReferenceType(const Type *t) - : targetType(t) { + : Type(REFERENCE_TYPE), targetType(t) { } @@ -2493,7 +2492,7 @@ ReferenceType::GetCDeclaration(const std::string &name) const { return ""; } - const ArrayType *at = dynamic_cast(targetType); + const ArrayType *at = CastType(targetType); if (at != NULL) { if (at->GetElementCount() == 0) { // emit unsized arrays as pointers to the base type.. @@ -2553,8 +2552,8 @@ ReferenceType::GetDIType(llvm::DIDescriptor scope) const { FunctionType::FunctionType(const Type *r, const std::vector &a, SourcePos p) - : isTask(false), isExported(false), isExternC(false), returnType(r), - paramTypes(a), paramNames(std::vector(a.size(), "")), + : Type(FUNCTION_TYPE), isTask(false), isExported(false), isExternC(false), + returnType(r), paramTypes(a), paramNames(std::vector(a.size(), "")), paramDefaults(std::vector(a.size(), NULL)), paramPositions(std::vector(a.size(), p)) { Assert(returnType != NULL); @@ -2568,8 +2567,8 @@ FunctionType::FunctionType(const Type *r, const std::vector &a, const std::vector &ad, const std::vector &ap, bool it, bool is, bool ec) - : isTask(it), isExported(is), isExternC(ec), returnType(r), paramTypes(a), - paramNames(an), paramDefaults(ad), paramPositions(ap) { + : Type(FUNCTION_TYPE), isTask(it), isExported(is), isExternC(ec), returnType(r), + paramTypes(a), paramNames(an), paramDefaults(ad), paramPositions(ap) { Assert(paramTypes.size() == paramNames.size() && paramNames.size() == paramDefaults.size() && paramDefaults.size() == paramPositions.size()); @@ -2733,9 +2732,9 @@ FunctionType::GetCDeclaration(const std::string &fname) const { // Convert pointers to arrays to unsized arrays, which are more clear // to print out for multidimensional arrays (i.e. "float foo[][4] " // versus "float (foo *)[4]"). - const PointerType *pt = dynamic_cast(type); + const PointerType *pt = CastType(type); if (pt != NULL && - dynamic_cast(pt->GetBaseType()) != NULL) { + CastType(pt->GetBaseType()) != NULL) { type = new ArrayType(pt->GetBaseType(), 0); } @@ -2906,7 +2905,7 @@ Type::GetAsUnsignedType() const { */ static const Type * lVectorConvert(const Type *type, SourcePos pos, const char *reason, int vecSize) { - const VectorType *vt = dynamic_cast(type); + const VectorType *vt = CastType(type); if (vt) { if (vt->GetElementCount() != vecSize) { Error(pos, "Implicit conversion between from vector type " @@ -2917,7 +2916,7 @@ lVectorConvert(const Type *type, SourcePos pos, const char *reason, int vecSize) return vt; } else { - const AtomicType *at = dynamic_cast(type); + const AtomicType *at = CastType(type); if (!at) { Error(pos, "Non-atomic type \"%s\" can't be converted to vector type " "for %s.", type->GetString().c_str(), reason); @@ -2935,11 +2934,10 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char // First, if one or both types are function types, convert them to // pointer to function types and then try again. - if (dynamic_cast(t0) || - dynamic_cast(t1)) { - if (dynamic_cast(t0)) + if (CastType(t0) || CastType(t1)) { + if (CastType(t0)) t0 = PointerType::GetUniform(t0); - if (dynamic_cast(t1)) + if (CastType(t1)) t1 = PointerType::GetUniform(t1); return MoreGeneralType(t0, t1, pos, reason, forceVarying, vecSize); } @@ -2967,8 +2965,7 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char // If they're function types, it's hopeless if they didn't match in the // Type::Equal() call above. Fail here so that we don't get into // trouble calling GetAsConstType()... - if (dynamic_cast(t0) || - dynamic_cast(t1)) { + if (CastType(t0) || CastType(t1)) { Error(pos, "Incompatible function types \"%s\" and \"%s\" in %s.", t0->GetString().c_str(), t1->GetString().c_str(), reason); return NULL; @@ -2979,8 +2976,8 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char if (Type::EqualIgnoringConst(t0, t1)) return t0->GetAsNonConstType(); - const PointerType *pt0 = dynamic_cast(t0); - const PointerType *pt1 = dynamic_cast(t1); + const PointerType *pt0 = CastType(t0); + const PointerType *pt1 = CastType(t1); if (pt0 != NULL && pt1 != NULL) { if (PointerType::IsVoidPointer(pt0)) return pt1; @@ -2994,8 +2991,8 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char } } - const VectorType *vt0 = dynamic_cast(t0); - const VectorType *vt1 = dynamic_cast(t1); + const VectorType *vt0 = CastType(t0); + const VectorType *vt1 = CastType(t1); if (vt0 && vt1) { // both are vectors; convert their base types and make a new vector // type, as long as their lengths match @@ -3012,7 +3009,7 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char // The 'more general' version of the two vector element types must // be an AtomicType (that's all that vectors can hold...) - const AtomicType *at = dynamic_cast(t); + const AtomicType *at = CastType(t); Assert(at != NULL); return new VectorType(at, vt0->GetElementCount()); @@ -3027,7 +3024,7 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char if (!t) return NULL; - const AtomicType *at = dynamic_cast(t); + const AtomicType *at = CastType(t); Assert(at != NULL); return new VectorType(at, vt0->GetElementCount()); } @@ -3039,18 +3036,18 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char if (!t) return NULL; - const AtomicType *at = dynamic_cast(t); + const AtomicType *at = CastType(t); Assert(at != NULL); return new VectorType(at, vt1->GetElementCount()); } // TODO: what do we need to do about references here, if anything?? - const AtomicType *at0 = dynamic_cast(t0->GetReferenceTarget()); - const AtomicType *at1 = dynamic_cast(t1->GetReferenceTarget()); + const AtomicType *at0 = CastType(t0->GetReferenceTarget()); + const AtomicType *at1 = CastType(t1->GetReferenceTarget()); - const EnumType *et0 = dynamic_cast(t0->GetReferenceTarget()); - const EnumType *et1 = dynamic_cast(t1->GetReferenceTarget()); + const EnumType *et0 = CastType(t0->GetReferenceTarget()); + const EnumType *et1 = CastType(t1->GetReferenceTarget()); if (et0 != NULL && et1 != NULL) { // Two different enum types -> make them uint32s... Assert(et0->IsVaryingType() == et1->IsVaryingType()); @@ -3098,9 +3095,9 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char bool Type::IsBasicType(const Type *type) { - return (dynamic_cast(type) != NULL || - dynamic_cast(type) != NULL || - dynamic_cast(type) != NULL); + return (CastType(type) != NULL || + CastType(type) != NULL || + CastType(type) != NULL); } @@ -3110,16 +3107,16 @@ lCheckTypeEquality(const Type *a, const Type *b, bool ignoreConst) { return false; if (ignoreConst == true) { - if (dynamic_cast(a) == NULL) + if (CastType(a) == NULL) a = a->GetAsNonConstType(); - if (dynamic_cast(b) == NULL) + if (CastType(b) == NULL) b = b->GetAsNonConstType(); } else if (a->IsConstType() != b->IsConstType()) return false; - const AtomicType *ata = dynamic_cast(a); - const AtomicType *atb = dynamic_cast(b); + const AtomicType *ata = CastType(a); + const AtomicType *atb = CastType(b); if (ata != NULL && atb != NULL) { return ((ata->basicType == atb->basicType) && (ata->GetVariability() == atb->GetVariability())); @@ -3128,33 +3125,31 @@ lCheckTypeEquality(const Type *a, const Type *b, bool ignoreConst) { // For all of the other types, we need to see if we have the same two // general types. If so, then we dig into the details of the type and // see if all of the relevant bits are equal... - const EnumType *eta = dynamic_cast(a); - const EnumType *etb = dynamic_cast(b); + const EnumType *eta = CastType(a); + const EnumType *etb = CastType(b); if (eta != NULL && etb != NULL) // Kind of goofy, but this sufficies to check return (eta->pos == etb->pos && eta->GetVariability() == etb->GetVariability()); - const ArrayType *arta = dynamic_cast(a); - const ArrayType *artb = dynamic_cast(b); + const ArrayType *arta = CastType(a); + const ArrayType *artb = CastType(b); if (arta != NULL && artb != NULL) return (arta->GetElementCount() == artb->GetElementCount() && lCheckTypeEquality(arta->GetElementType(), artb->GetElementType(), ignoreConst)); - const VectorType *vta = dynamic_cast(a); - const VectorType *vtb = dynamic_cast(b); + const VectorType *vta = CastType(a); + const VectorType *vtb = CastType(b); if (vta != NULL && vtb != NULL) return (vta->GetElementCount() == vtb->GetElementCount() && lCheckTypeEquality(vta->GetElementType(), vtb->GetElementType(), ignoreConst)); - const StructType *sta = dynamic_cast(a); - const StructType *stb = dynamic_cast(b); - const UndefinedStructType *usta = - dynamic_cast(a); - const UndefinedStructType *ustb = - dynamic_cast(b); + const StructType *sta = CastType(a); + const StructType *stb = CastType(b); + const UndefinedStructType *usta = CastType(a); + const UndefinedStructType *ustb = CastType(b); if ((sta != NULL || usta != NULL) && (stb != NULL || ustb != NULL)) { // Report both defuned and undefined structs as equal if their // names are the same. @@ -3166,8 +3161,8 @@ lCheckTypeEquality(const Type *a, const Type *b, bool ignoreConst) { return (namea == nameb); } - const PointerType *pta = dynamic_cast(a); - const PointerType *ptb = dynamic_cast(b); + const PointerType *pta = CastType(a); + const PointerType *ptb = CastType(b); if (pta != NULL && ptb != NULL) return (pta->IsUniformType() == ptb->IsUniformType() && pta->IsSlice() == ptb->IsSlice() && @@ -3175,14 +3170,14 @@ lCheckTypeEquality(const Type *a, const Type *b, bool ignoreConst) { lCheckTypeEquality(pta->GetBaseType(), ptb->GetBaseType(), ignoreConst)); - const ReferenceType *rta = dynamic_cast(a); - const ReferenceType *rtb = dynamic_cast(b); + const ReferenceType *rta = CastType(a); + const ReferenceType *rtb = CastType(b); if (rta != NULL && rtb != NULL) return (lCheckTypeEquality(rta->GetReferenceTarget(), rtb->GetReferenceTarget(), ignoreConst)); - const FunctionType *fta = dynamic_cast(a); - const FunctionType *ftb = dynamic_cast(b); + const FunctionType *fta = CastType(a); + const FunctionType *ftb = CastType(b); if (fta != NULL && ftb != NULL) { // Both the return types and all of the argument types must match // for function types to match diff --git a/type.h b/type.h index 82e5a611..6102e0dd 100644 --- a/type.h +++ b/type.h @@ -72,6 +72,21 @@ struct Variability { }; +/** Enumerant that records each of the types that inherit from the Type + baseclass. */ +enum TypeId { + ATOMIC_TYPE, + ENUM_TYPE, + POINTER_TYPE, + ARRAY_TYPE, + VECTOR_TYPE, + STRUCT_TYPE, + UNDEFINED_STRUCT_TYPE, + REFERENCE_TYPE, + FUNCTION_TYPE +}; + + /** @brief Interface class that defines the type abstraction. Abstract base class that defines the interface that must be implemented @@ -231,6 +246,14 @@ public: (i.e. not an aggregation of multiple instances of a type or types.) */ static bool IsBasicType(const Type *type); + + /** Indicates which Type implementation this type is. This value can + be used to determine the actual type much more efficiently than + using dynamic_cast. */ + const TypeId typeId; + +protected: + Type(TypeId id) : typeId(id) { } }; @@ -452,6 +475,9 @@ public: index must be between 0 and GetElementCount()-1. */ virtual const Type *GetElementType(int index) const = 0; + +protected: + CollectionType(TypeId id) : Type(id) { } }; @@ -473,6 +499,9 @@ public: the same type. */ const Type *GetElementType(int index) const; + +protected: + SequentialType(TypeId id) : CollectionType(id) { } }; @@ -686,6 +715,8 @@ private: const Variability variability; const bool isConst; const SourcePos pos; + + mutable const StructType *oppositeConstStructType; }; @@ -732,8 +763,6 @@ private: const Variability variability; const bool isConst; const SourcePos pos; - - mutable const StructType *oppositeConstStructType; }; @@ -875,8 +904,119 @@ private: const std::vector paramPositions; }; -inline bool IsReferenceType(const Type *t) { - return dynamic_cast(t) != NULL; + +/* Efficient dynamic casting of Types. First, we specify a default + template function that returns NULL, indicating a failed cast, for + arbitrary types. */ +template inline const T * +CastType(const Type *type) { + return NULL; } + +/* Now we have template specializaitons for the Types implemented in this + file. Each one checks the Type::typeId member and then performs the + corresponding static cast if it's safe as per the typeId. + */ +template <> inline const AtomicType * +CastType(const Type *type) { + if (type != NULL && type->typeId == ATOMIC_TYPE) + return (const AtomicType *)type; + else + return NULL; +} + +template <> inline const EnumType * +CastType(const Type *type) { + if (type != NULL && type->typeId == ENUM_TYPE) + return (const EnumType *)type; + else + return NULL; +} + +template <> inline const PointerType * +CastType(const Type *type) { + if (type != NULL && type->typeId == POINTER_TYPE) + return (const PointerType *)type; + else + return NULL; +} + +template <> inline const ArrayType * +CastType(const Type *type) { + if (type != NULL && type->typeId == ARRAY_TYPE) + return (const ArrayType *)type; + else + return NULL; +} + +template <> inline const VectorType * +CastType(const Type *type) { + if (type != NULL && type->typeId == VECTOR_TYPE) + return (const VectorType *)type; + else + return NULL; +} + +template <> inline const SequentialType * +CastType(const Type *type) { + // Note that this function must be updated if other sequential type + // implementations are added. + if (type != NULL && + (type->typeId == ARRAY_TYPE || type->typeId == VECTOR_TYPE)) + return (const SequentialType *)type; + else + return NULL; +} + +template <> inline const CollectionType * +CastType(const Type *type) { + // Similarly a new collection type implementation requires updating + // this function. + if (type != NULL && + (type->typeId == ARRAY_TYPE || type->typeId == VECTOR_TYPE || + type->typeId == STRUCT_TYPE)) + return (const CollectionType *)type; + else + return NULL; +} + +template <> inline const StructType * +CastType(const Type *type) { + if (type != NULL && type->typeId == STRUCT_TYPE) + return (const StructType *)type; + else + return NULL; +} + +template <> inline const UndefinedStructType * +CastType(const Type *type) { + if (type != NULL && type->typeId == UNDEFINED_STRUCT_TYPE) + return (const UndefinedStructType *)type; + else + return NULL; +} + +template <> inline const ReferenceType * +CastType(const Type *type) { + if (type != NULL && type->typeId == REFERENCE_TYPE) + return (const ReferenceType *)type; + else + return NULL; +} + +template <> inline const FunctionType * +CastType(const Type *type) { + if (type != NULL && type->typeId == FUNCTION_TYPE) + return (const FunctionType *)type; + else + return NULL; +} + + +inline bool IsReferenceType(const Type *t) { + return CastType(t) != NULL; +} + + #endif // ISPC_TYPE_H From 7cf66eb61f9570fffe05be182dcaa4b0458e4f5c Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 11:12:53 -0700 Subject: [PATCH 122/173] Small optimizations to various AtomicType methods. --- type.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/type.cpp b/type.cpp index 738f496c..2ddbb5c7 100644 --- a/type.cpp +++ b/type.cpp @@ -254,7 +254,7 @@ AtomicType::GetAsUnsignedType() const { const AtomicType * AtomicType::GetAsConstType() const { - if (Type::Equal(this, AtomicType::Void) || isConst == true) + if (basicType == TYPE_VOID || isConst == true) return this; return new AtomicType(basicType, variability, true); @@ -263,7 +263,7 @@ AtomicType::GetAsConstType() const { const AtomicType * AtomicType::GetAsNonConstType() const { - if (Type::Equal(this, AtomicType::Void) || isConst == false) + if (basicType == TYPE_VOID || isConst == false) return this; return new AtomicType(basicType, variability, false); @@ -278,7 +278,7 @@ AtomicType::GetBaseType() const { const AtomicType * AtomicType::GetAsVaryingType() const { - Assert(Type::Equal(this, AtomicType::Void) == false); + Assert(basicType != TYPE_VOID); if (variability == Variability::Varying) return this; return new AtomicType(basicType, Variability::Varying, isConst); @@ -287,7 +287,7 @@ AtomicType::GetAsVaryingType() const { const AtomicType * AtomicType::GetAsUniformType() const { - Assert(Type::Equal(this, AtomicType::Void) == false); + Assert(basicType != TYPE_VOID); if (variability == Variability::Uniform) return this; return new AtomicType(basicType, Variability::Uniform, isConst); @@ -296,7 +296,7 @@ AtomicType::GetAsUniformType() const { const AtomicType * AtomicType::GetAsUnboundVariabilityType() const { - Assert(Type::Equal(this, AtomicType::Void) == false); + Assert(basicType != TYPE_VOID); if (variability == Variability::Unbound) return this; return new AtomicType(basicType, Variability::Unbound, isConst); @@ -305,7 +305,7 @@ AtomicType::GetAsUnboundVariabilityType() const { const AtomicType * AtomicType::GetAsSOAType(int width) const { - Assert(this != AtomicType::Void); + Assert(basicType != TYPE_VOID); if (variability == Variability(Variability::SOA, width)) return this; return new AtomicType(basicType, Variability(Variability::SOA, width), isConst); From 2791bd0015455fa90f42ffa2fa3faed44b0dd913 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 11:31:40 -0700 Subject: [PATCH 123/173] Improve performance of lCheckTypeEquality() We don't need to explicitly create the non-const Types to do type comparison when ignoring const-ness in the check. We can also save some unnecessary dynamic memory allocation by keeping strings returned from GetStructName() as references to strings. This gives another 10% on front-end perf on that big program. --- type.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/type.cpp b/type.cpp index 2ddbb5c7..c29ac599 100644 --- a/type.cpp +++ b/type.cpp @@ -3106,13 +3106,8 @@ lCheckTypeEquality(const Type *a, const Type *b, bool ignoreConst) { if (a == NULL || b == NULL) return false; - if (ignoreConst == true) { - if (CastType(a) == NULL) - a = a->GetAsNonConstType(); - if (CastType(b) == NULL) - b = b->GetAsNonConstType(); - } - else if (a->IsConstType() != b->IsConstType()) + if (ignoreConst == false && + a->IsConstType() != b->IsConstType()) return false; const AtomicType *ata = CastType(a); @@ -3156,8 +3151,10 @@ lCheckTypeEquality(const Type *a, const Type *b, bool ignoreConst) { if (a->GetVariability() != b->GetVariability()) return false; - std::string namea = sta ? sta->GetStructName() : usta->GetStructName(); - std::string nameb = stb ? stb->GetStructName() : ustb->GetStructName(); + const std::string &namea = sta ? sta->GetStructName() : + usta->GetStructName(); + const std::string &nameb = stb ? stb->GetStructName() : + ustb->GetStructName(); return (namea == nameb); } From 78d62705cc1d27e8654635b103d28d9af15b1f0c Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 12:07:01 -0700 Subject: [PATCH 124/173] Cache element types in StructType. Previously, GetElementType() would end up causing dynamic allocation to happen to compute the final element type (turning types with unbound variability into the same type with the struct's variability) each it was called, which was wasteful and slow. Now we cache the result. Another 20% perf on compiling that problematic program. --- type.cpp | 27 +++++++++++++++++---------- type.h | 2 ++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/type.cpp b/type.cpp index c29ac599..a978a16b 100644 --- a/type.cpp +++ b/type.cpp @@ -1757,7 +1757,8 @@ StructType::StructType(const std::string &n, const std::vector &el : CollectionType(STRUCT_TYPE), name(n), elementTypes(elts), elementNames(en), elementPositions(ep), variability(v), isConst(ic), pos(p) { oppositeConstStructType = NULL; - + finalElementTypes.resize(elts.size(), NULL); + if (variability != Variability::Unbound) { // For structs with non-unbound variability, we'll create the // correspoing LLVM struct type now, if one hasn't been made @@ -2074,17 +2075,23 @@ const Type * StructType::GetElementType(int i) const { Assert(variability != Variability::Unbound); Assert(i < (int)elementTypes.size()); - const Type *ret = elementTypes[i]; - if (ret == NULL) { - Assert(m->errorCount > 0); - return NULL; + + if (finalElementTypes[i] == NULL) { + const Type *type = elementTypes[i]; + if (type == NULL) { + Assert(m->errorCount > 0); + return NULL; + } + + // If the element has unbound variability, resolve its variability to + // the struct type's variability + type = type ->ResolveUnboundVariability(variability); + if (isConst) + type = type->GetAsConstType(); + finalElementTypes[i] = type; } - // If the element has unbound variability, resolve its variability to - // the struct type's variability - ret = ret->ResolveUnboundVariability(variability); - - return isConst ? ret->GetAsConstType() : ret; + return finalElementTypes[i]; } diff --git a/type.h b/type.h index 6102e0dd..eb80281b 100644 --- a/type.h +++ b/type.h @@ -716,6 +716,8 @@ private: const bool isConst; const SourcePos pos; + mutable std::vector finalElementTypes; + mutable const StructType *oppositeConstStructType; }; From e495ef2c4853579276f6790b69f39cb3ac92997b Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 12:45:22 -0700 Subject: [PATCH 125/173] Reduce dynamic memory allocation by reusing scope maps in symbol table. --- sym.cpp | 13 +++++++++++-- sym.h | 2 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/sym.cpp b/sym.cpp index 0a211884..42d1f66f 100644 --- a/sym.cpp +++ b/sym.cpp @@ -73,14 +73,23 @@ SymbolTable::~SymbolTable() { void SymbolTable::PushScope() { - variables.push_back(new SymbolMapType); + SymbolMapType *sm; + if (freeSymbolMaps.size() > 0) { + sm = freeSymbolMaps.back(); + freeSymbolMaps.pop_back(); + sm->erase(sm->begin(), sm->end()); + } + else + sm = new SymbolMapType; + + variables.push_back(sm); } void SymbolTable::PopScope() { Assert(variables.size() > 1); - delete variables.back(); + freeSymbolMaps.push_back(variables.back()); variables.pop_back(); } diff --git a/sym.h b/sym.h index 43c8ff16..07bbe187 100644 --- a/sym.h +++ b/sym.h @@ -260,6 +260,8 @@ private: typedef std::map SymbolMapType; std::vector variables; + std::vector freeSymbolMaps; + /** Function declarations are *not* scoped. (C99, for example, allows an implementation to maintain function declarations in a single namespace.) A STL \c vector is used to store the function symbols From c0019bd8e58f498e3a4e7f5fcaf4b8fb72c2758a Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 13:14:17 -0700 Subject: [PATCH 126/173] Cache type and lvalue type in IndexExpr and MemberExpr This saves a bunch of redundant work and unnecessary duplicated memory allocations. --- expr.cpp | 81 ++++++++++++++++++++++++++++++++++++-------------------- expr.h | 7 +++++ 2 files changed, 59 insertions(+), 29 deletions(-) diff --git a/expr.cpp b/expr.cpp index d7c7d04c..47111936 100644 --- a/expr.cpp +++ b/expr.cpp @@ -447,9 +447,10 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, } return false; } - else - return lDoTypeConv(new ReferenceType(fromType), toType, NULL, - failureOk, errorMsgBase, pos); + else { + ReferenceType rt(fromType); + return lDoTypeConv(&rt, toType, NULL, failureOk, errorMsgBase, pos); + } } else if (Type::Equal(toType, fromType->GetAsNonConstType())) // convert: const T -> T (as long as T isn't a reference) @@ -3799,6 +3800,7 @@ IndexExpr::IndexExpr(Expr *a, Expr *i, SourcePos p) : Expr(p) { baseExpr = a; index = i; + type = lvalueType = NULL; } @@ -3939,7 +3941,7 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Value *ptr = GetLValue(ctx); llvm::Value *mask = NULL; - const Type *lvalueType = GetLValueType(); + const Type *lvType = GetLValueType(); if (ptr == NULL) { // We may be indexing into a temporary that hasn't hit memory, so // get the full value and stuff it into temporary alloca'd space so @@ -3958,12 +3960,12 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { // Get a pointer type to the underlying elements const SequentialType *st = CastType(baseExprType); Assert(st != NULL); - lvalueType = PointerType::GetUniform(st->GetElementType()); + lvType = PointerType::GetUniform(st->GetElementType()); // And do the indexing calculation into the temporary array in memory ptr = ctx->GetElementPtrInst(tmpPtr, LLVMInt32(0), index->GetValue(ctx), PointerType::GetUniform(baseExprType)); - ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, lvalueType); + ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, lvType); mask = LLVMMaskAllOn; } @@ -3974,12 +3976,15 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { } ctx->SetDebugPos(pos); - return ctx->LoadInst(ptr, mask, lvalueType); + return ctx->LoadInst(ptr, mask, lvType); } const Type * IndexExpr::GetType() const { + if (type != NULL) + return type; + const Type *baseExprType, *indexType; if (!baseExpr || !index || ((baseExprType = baseExpr->GetType()) == NULL) || @@ -4015,9 +4020,11 @@ IndexExpr::GetType() const { // type. if (indexType->IsUniformType() && (pointerType == NULL || pointerType->IsUniformType())) - return elementType; + type = elementType; else - return elementType->GetAsVaryingType(); + type = elementType->GetAsVaryingType(); + + return type; } @@ -4089,8 +4096,7 @@ lCheckIndicesVersusBounds(const Type *baseExprType, Expr *index) { */ static llvm::Value * lConvertPtrToSliceIfNeeded(FunctionEmitContext *ctx, - llvm::Value *ptr, - const Type **type) { + llvm::Value *ptr, const Type **type) { Assert(*type != NULL); const PointerType *ptrType = CastType(*type); bool convertToSlice = (ptrType->GetBaseType()->IsSOAType() && @@ -4178,6 +4184,9 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const { const Type * IndexExpr::GetLValueType() const { + if (lvalueType != NULL) + return lvalueType; + const Type *baseExprType, *baseExprLValueType, *indexType; if (baseExpr == NULL || index == NULL || ((baseExprType = baseExpr->GetType()) == NULL) || @@ -4215,19 +4224,18 @@ IndexExpr::GetLValueType() const { // The return type is uniform iff. the base is a uniform pointer / a // collection of uniform typed elements and the index is uniform. - const PointerType *retType; if (baseVarying == false && indexType->IsUniformType()) - retType = PointerType::GetUniform(elementType); + lvalueType = PointerType::GetUniform(elementType); else - retType = PointerType::GetVarying(elementType); + lvalueType = PointerType::GetVarying(elementType); // Finally, if we're indexing into an SOA type, then the resulting // pointer must (currently) be a slice pointer; we don't allow indexing // the soa-width-wide structs directly. if (elementType->IsSOAType()) - retType = retType->GetAsSlice(); + lvalueType = lvalueType->GetAsSlice(); - return retType; + return lvalueType; } @@ -4370,6 +4378,9 @@ StructMemberExpr::StructMemberExpr(Expr *e, const char *id, SourcePos p, const Type * StructMemberExpr::GetType() const { + if (type != NULL) + return type; + // It's a struct, and the result type is the element type, possibly // promoted to varying if the struct type / lvalue is varying. const Type *exprType, *lvalueType; @@ -4412,12 +4423,16 @@ StructMemberExpr::GetType() const { // result type must be the varying version of the element type. elementType = elementType->GetAsVaryingType(); - return elementType; + type = elementType; + return type; } const Type * StructMemberExpr::GetLValueType() const { + if (lvalueType != NULL) + return lvalueType; + if (expr == NULL) { Assert(m->errorCount > 0); return NULL; @@ -4446,7 +4461,8 @@ StructMemberExpr::GetLValueType() const { CastType(exprLValueType)->IsSlice()) ptrType = ptrType->GetAsFrozenSlice(); - return ptrType; + lvalueType = ptrType; + return lvalueType; } @@ -4548,25 +4564,28 @@ VectorMemberExpr::VectorMemberExpr(Expr *e, const char *id, SourcePos p, const Type * VectorMemberExpr::GetType() const { + if (type != NULL) + return type; + // For 1-element expressions, we have the base vector element // type. For n-element expressions, we have a shortvec type // with n > 1 elements. This can be changed when we get // type<1> -> type conversions. - const Type *type = (identifier.length() == 1) ? + type = (identifier.length() == 1) ? (const Type *)exprVectorType->GetElementType() : (const Type *)memberType; - const Type *lvalueType = GetLValueType(); - if (lvalueType != NULL) { - bool isSlice = (CastType(lvalueType) && - CastType(lvalueType)->IsSlice()); + const Type *lvType = GetLValueType(); + if (lvType != NULL) { + bool isSlice = (CastType(lvType) && + CastType(lvType)->IsSlice()); if (isSlice) { //CO Assert(type->IsSOAType()); - if (lvalueType->IsUniformType()) + if (lvType->IsUniformType()) type = type->GetAsUniformType(); } - if (lvalueType->IsVaryingType()) + if (lvType->IsVaryingType()) type = type->GetAsVaryingType(); } @@ -4586,6 +4605,9 @@ VectorMemberExpr::GetLValue(FunctionEmitContext* ctx) const { const Type * VectorMemberExpr::GetLValueType() const { + if (lvalueType != NULL) + return lvalueType; + if (identifier.length() == 1) { if (expr == NULL) { Assert(m->errorCount > 0); @@ -4608,7 +4630,7 @@ VectorMemberExpr::GetLValueType() const { // but a pointer to a float, etc. const Type *elementType = vt->GetElementType(); if (CastType(exprLValueType) != NULL) - return new ReferenceType(elementType); + lvalueType = new ReferenceType(elementType); else { const PointerType *ptrType = exprLValueType->IsUniformType() ? PointerType::GetUniform(elementType) : @@ -4617,11 +4639,11 @@ VectorMemberExpr::GetLValueType() const { if (CastType(exprLValueType) && CastType(exprLValueType)->IsSlice()) ptrType = ptrType->GetAsFrozenSlice(); - return ptrType; + lvalueType = ptrType; } } - else - return NULL; + + return lvalueType; } @@ -4775,6 +4797,7 @@ MemberExpr::MemberExpr(Expr *e, const char *id, SourcePos p, SourcePos idpos, expr = e; identifier = id; dereferenceExpr = derefLValue; + type = lvalueType = NULL; } diff --git a/expr.h b/expr.h index f7d112b9..d65bc8c3 100644 --- a/expr.h +++ b/expr.h @@ -284,6 +284,10 @@ public: int EstimateCost() const; Expr *baseExpr, *index; + +private: + mutable const Type *type; + mutable const PointerType *lvalueType; }; @@ -320,6 +324,9 @@ public: member is found. (i.e. this is true if the MemberExpr was a '->' operator, and is false if it was a '.' operator. */ bool dereferenceExpr; + +protected: + mutable const Type *type, *lvalueType; }; From bff02017da0b960a1fcf799d9639c1bc30f769ea Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 13:14:44 -0700 Subject: [PATCH 127/173] Cache const/non-const variants of Atomic and ReferenceTypes. More reduction of dynamic memory allocation. --- type.cpp | 28 ++++++++++++++++++++++++---- type.h | 3 +++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/type.cpp b/type.cpp index a978a16b..d73e2786 100644 --- a/type.cpp +++ b/type.cpp @@ -185,6 +185,7 @@ const AtomicType *AtomicType::Void = AtomicType::AtomicType(BasicType bt, Variability v, bool ic) : Type(ATOMIC_TYPE), basicType(bt), variability(v), isConst(ic) { + asOtherConstType = NULL; } @@ -257,7 +258,11 @@ AtomicType::GetAsConstType() const { if (basicType == TYPE_VOID || isConst == true) return this; - return new AtomicType(basicType, variability, true); + if (asOtherConstType == NULL) { + asOtherConstType = new AtomicType(basicType, variability, true); + asOtherConstType->asOtherConstType = this; + } + return asOtherConstType; } @@ -266,7 +271,11 @@ AtomicType::GetAsNonConstType() const { if (basicType == TYPE_VOID || isConst == false) return this; - return new AtomicType(basicType, variability, false); + if (asOtherConstType == NULL) { + asOtherConstType = new AtomicType(basicType, variability, false); + asOtherConstType->asOtherConstType = this; + } + return asOtherConstType; } @@ -2310,6 +2319,7 @@ UndefinedStructType::GetDIType(llvm::DIDescriptor scope) const { ReferenceType::ReferenceType(const Type *t) : Type(REFERENCE_TYPE), targetType(t) { + asOtherConstType = NULL; } @@ -2450,7 +2460,12 @@ ReferenceType::GetAsConstType() const { } if (IsConstType()) return this; - return new ReferenceType(targetType->GetAsConstType()); + + if (asOtherConstType == NULL) { + asOtherConstType = new ReferenceType(targetType->GetAsConstType()); + asOtherConstType->asOtherConstType = this; + } + return asOtherConstType; } @@ -2462,7 +2477,12 @@ ReferenceType::GetAsNonConstType() const { } if (!IsConstType()) return this; - return new ReferenceType(targetType->GetAsNonConstType()); + + if (asOtherConstType == NULL) { + asOtherConstType = new ReferenceType(targetType->GetAsNonConstType()); + asOtherConstType->asOtherConstType = this; + } + return asOtherConstType; } diff --git a/type.h b/type.h index eb80281b..c58a4ce9 100644 --- a/type.h +++ b/type.h @@ -332,6 +332,8 @@ private: const Variability variability; const bool isConst; AtomicType(BasicType basicType, Variability v, bool isConst); + + mutable const AtomicType *asOtherConstType; }; @@ -802,6 +804,7 @@ public: private: const Type * const targetType; + mutable const ReferenceType *asOtherConstType; }; From e1bc010bd139e45ba9360c8333ed96c4f1066cbd Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 13:41:47 -0700 Subject: [PATCH 128/173] More reduction of dynamic allocations in lDoTypeConv() --- expr.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/expr.cpp b/expr.cpp index 47111936..8236e8e4 100644 --- a/expr.cpp +++ b/expr.cpp @@ -279,10 +279,10 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, const Type *eltType = fromArrayType->GetElementType(); if (toPointerType->GetBaseType()->IsConstType()) eltType = eltType->GetAsConstType(); - if (Type::Equal(toPointerType, - new PointerType(eltType, - toPointerType->GetVariability(), - toPointerType->IsConstType()))) + + PointerType pt(eltType, toPointerType->GetVariability(), + toPointerType->IsConstType()); + if (Type::Equal(toPointerType, &pt)) goto typecast_ok; else { if (!failureOk) @@ -392,7 +392,9 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, // Convert from type T -> const T; just return a TypeCast expr, which // can handle this - if (Type::Equal(toType, fromType->GetAsConstType())) + if (Type::EqualIgnoringConst(toType, fromType) && + toType->IsConstType() == true && + fromType->IsConstType() == false) goto typecast_ok; if (CastType(fromType)) { From 7db8824da2132b13a6e25188515996cfae906163 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 13:42:23 -0700 Subject: [PATCH 129/173] Reduce dynamic memory allocation in getting unif/varying variants of AtomicTypes --- type.cpp | 17 +++++++++++++++-- type.h | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/type.cpp b/type.cpp index d73e2786..a492af1d 100644 --- a/type.cpp +++ b/type.cpp @@ -186,6 +186,7 @@ const AtomicType *AtomicType::Void = AtomicType::AtomicType(BasicType bt, Variability v, bool ic) : Type(ATOMIC_TYPE), basicType(bt), variability(v), isConst(ic) { asOtherConstType = NULL; + asUniformType = asVaryingType = NULL; } @@ -290,7 +291,13 @@ AtomicType::GetAsVaryingType() const { Assert(basicType != TYPE_VOID); if (variability == Variability::Varying) return this; - return new AtomicType(basicType, Variability::Varying, isConst); + + if (asVaryingType == NULL) { + asVaryingType = new AtomicType(basicType, Variability::Varying, isConst); + if (variability == Variability::Uniform) + asVaryingType->asUniformType = this; + } + return asVaryingType; } @@ -299,7 +306,13 @@ AtomicType::GetAsUniformType() const { Assert(basicType != TYPE_VOID); if (variability == Variability::Uniform) return this; - return new AtomicType(basicType, Variability::Uniform, isConst); + + if (asUniformType == NULL) { + asUniformType = new AtomicType(basicType, Variability::Uniform, isConst); + if (variability == Variability::Varying) + asUniformType->asVaryingType = this; + } + return asUniformType; } diff --git a/type.h b/type.h index c58a4ce9..8c3e8de2 100644 --- a/type.h +++ b/type.h @@ -333,7 +333,7 @@ private: const bool isConst; AtomicType(BasicType basicType, Variability v, bool isConst); - mutable const AtomicType *asOtherConstType; + mutable const AtomicType *asOtherConstType, *asUniformType, *asVaryingType; }; From 413264eaae51b657fd4349c291d1419727231085 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 13:42:43 -0700 Subject: [PATCH 130/173] Make return values const &s to save copying. --- type.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/type.h b/type.h index 8c3e8de2..0d155ef2 100644 --- a/type.h +++ b/type.h @@ -685,12 +685,12 @@ public: int GetElementNumber(const std::string &name) const; /** Returns the name of the i'th element of the structure. */ - const std::string GetElementName(int i) const { return elementNames[i]; } + const std::string &GetElementName(int i) const { return elementNames[i]; } /** Returns the total number of elements in the structure. */ int GetElementCount() const { return int(elementTypes.size()); } - SourcePos GetElementPosition(int i) const { return elementPositions[i]; } + const SourcePos &GetElementPosition(int i) const { return elementPositions[i]; } /** Returns the name of the structure type. (e.g. struct Foo -> "Foo".) */ const std::string &GetStructName() const { return name; } From 80065898289b59251e4ca512f7f05cf8eb7fd4bf Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 4 May 2012 13:43:29 -0700 Subject: [PATCH 131/173] Use llvm::SmallVectors for struct member types and function types. Further reduction of dynamic memory allocation... --- builtins.cpp | 8 ++++---- decl.cpp | 16 ++++++++-------- decl.h | 7 ++++--- parse.yy | 12 ++++++------ type.cpp | 24 ++++++++++++------------ type.h | 33 +++++++++++++++++---------------- 6 files changed, 51 insertions(+), 49 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index d9432ae9..14444f40 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -157,7 +157,7 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) { static void lCreateSymbol(const std::string &name, const Type *returnType, - const std::vector &argTypes, + llvm::SmallVector &argTypes, const llvm::FunctionType *ftype, llvm::Function *func, SymbolTable *symbolTable) { SourcePos noPos; @@ -199,7 +199,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) { // bool, so just have a one-off override for that one... if (g->target.maskBitCount != 1 && name == "__sext_varying_bool") { const Type *returnType = AtomicType::VaryingInt32; - std::vector argTypes; + llvm::SmallVector argTypes; argTypes.push_back(AtomicType::VaryingBool); FunctionType *funcType = new FunctionType(returnType, argTypes, noPos); @@ -229,7 +229,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) { // Iterate over the arguments and try to find their equivalent ispc // types. Track if any of the arguments has an integer type. bool anyIntArgs = false; - std::vector argTypes; + llvm::SmallVector argTypes; for (unsigned int j = 0; j < ftype->getNumParams(); ++j) { const llvm::Type *llvmArgType = ftype->getParamType(j); const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned); @@ -674,7 +674,7 @@ lDefineConstantInt(const char *name, int val, llvm::Module *module, static void lDefineConstantIntFunc(const char *name, int val, llvm::Module *module, SymbolTable *symbolTable) { - std::vector args; + llvm::SmallVector args; FunctionType *ft = new FunctionType(AtomicType::UniformInt32, args, SourcePos()); Symbol *sym = new Symbol(name, SourcePos(), ft, SC_STATIC); diff --git a/decl.cpp b/decl.cpp index 00caa856..728206fd 100644 --- a/decl.cpp +++ b/decl.cpp @@ -386,11 +386,11 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { type = arrayType; } else if (kind == DK_FUNCTION) { - std::vector args; - std::vector argNames; - std::vector argDefaults; - std::vector argPos; - + llvm::SmallVector args; + llvm::SmallVector argNames; + llvm::SmallVector argDefaults; + llvm::SmallVector argPos; + // Loop over the function arguments and store the names, types, // default values (if any), and source file positions each one in // the corresponding vector. @@ -646,9 +646,9 @@ Declaration::Print(int indent) const { void GetStructTypesNamesPositions(const std::vector &sd, - std::vector *elementTypes, - std::vector *elementNames, - std::vector *elementPositions) { + llvm::SmallVector *elementTypes, + llvm::SmallVector *elementNames, + llvm::SmallVector *elementPositions) { std::set seenNames; for (unsigned int i = 0; i < sd.size(); ++i) { const Type *type = sd[i]->type; diff --git a/decl.h b/decl.h index ea2cb0fd..f8b5f3d4 100644 --- a/decl.h +++ b/decl.h @@ -55,6 +55,7 @@ #define ISPC_DECL_H #include "ispc.h" +#include struct VariableDeclaration; @@ -219,8 +220,8 @@ struct StructDeclaration { /** Given a set of StructDeclaration instances, this returns the types of the elements of the corresponding struct and their names. */ extern void GetStructTypesNamesPositions(const std::vector &sd, - std::vector *elementTypes, - std::vector *elementNames, - std::vector *elementPositions); + llvm::SmallVector *elementTypes, + llvm::SmallVector *elementNames, + llvm::SmallVector *elementPositions); #endif // ISPC_DECL_H diff --git a/parse.yy b/parse.yy index e983afdf..605d5d7d 100644 --- a/parse.yy +++ b/parse.yy @@ -853,9 +853,9 @@ struct_or_union_specifier : struct_or_union struct_or_union_name '{' struct_declaration_list '}' { if ($4 != NULL) { - std::vector elementTypes; - std::vector elementNames; - std::vector elementPositions; + llvm::SmallVector elementTypes; + llvm::SmallVector elementNames; + llvm::SmallVector elementPositions; GetStructTypesNamesPositions(*$4, &elementTypes, &elementNames, &elementPositions); StructType *st = new StructType($2, elementTypes, elementNames, @@ -869,9 +869,9 @@ struct_or_union_specifier | struct_or_union '{' struct_declaration_list '}' { if ($3 != NULL) { - std::vector elementTypes; - std::vector elementNames; - std::vector elementPositions; + llvm::SmallVector elementTypes; + llvm::SmallVector elementNames; + llvm::SmallVector elementPositions; GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames, &elementPositions); $$ = new StructType("", elementTypes, elementNames, elementPositions, diff --git a/type.cpp b/type.cpp index a492af1d..f7edc485 100644 --- a/type.cpp +++ b/type.cpp @@ -1772,9 +1772,9 @@ lMangleStructName(const std::string &name, Variability variability) { } -StructType::StructType(const std::string &n, const std::vector &elts, - const std::vector &en, - const std::vector &ep, +StructType::StructType(const std::string &n, const llvm::SmallVector &elts, + const llvm::SmallVector &en, + const llvm::SmallVector &ep, bool ic, Variability v, SourcePos p) : CollectionType(STRUCT_TYPE), name(n), elementTypes(elts), elementNames(en), elementPositions(ep), variability(v), isConst(ic), pos(p) { @@ -2590,22 +2590,22 @@ ReferenceType::GetDIType(llvm::DIDescriptor scope) const { /////////////////////////////////////////////////////////////////////////// // FunctionType -FunctionType::FunctionType(const Type *r, const std::vector &a, +FunctionType::FunctionType(const Type *r, const llvm::SmallVector &a, SourcePos p) : Type(FUNCTION_TYPE), isTask(false), isExported(false), isExternC(false), - returnType(r), paramTypes(a), paramNames(std::vector(a.size(), "")), - paramDefaults(std::vector(a.size(), NULL)), - paramPositions(std::vector(a.size(), p)) { + returnType(r), paramTypes(a), paramNames(llvm::SmallVector(a.size(), "")), + paramDefaults(llvm::SmallVector(a.size(), NULL)), + paramPositions(llvm::SmallVector(a.size(), p)) { Assert(returnType != NULL); isSafe = false; costOverride = -1; } -FunctionType::FunctionType(const Type *r, const std::vector &a, - const std::vector &an, - const std::vector &ad, - const std::vector &ap, +FunctionType::FunctionType(const Type *r, const llvm::SmallVector &a, + const llvm::SmallVector &an, + const llvm::SmallVector &ad, + const llvm::SmallVector &ap, bool it, bool is, bool ec) : Type(FUNCTION_TYPE), isTask(it), isExported(is), isExternC(ec), returnType(r), paramTypes(a), paramNames(an), paramDefaults(ad), paramPositions(ap) { @@ -2697,7 +2697,7 @@ FunctionType::ResolveUnboundVariability(Variability v) const { } const Type *rt = returnType->ResolveUnboundVariability(v); - std::vector pt; + llvm::SmallVector pt; for (unsigned int i = 0; i < paramTypes.size(); ++i) { if (paramTypes[i] == NULL) { Assert(m->errorCount > 0); diff --git a/type.h b/type.h index 0d155ef2..ebd69af9 100644 --- a/type.h +++ b/type.h @@ -42,6 +42,7 @@ #include "util.h" #include #include +#include class ConstExpr; class StructType; @@ -642,9 +643,9 @@ private: */ class StructType : public CollectionType { public: - StructType(const std::string &name, const std::vector &elts, - const std::vector &eltNames, - const std::vector &eltPositions, bool isConst, + StructType(const std::string &name, const llvm::SmallVector &elts, + const llvm::SmallVector &eltNames, + const llvm::SmallVector &eltPositions, bool isConst, Variability variability, SourcePos pos); Variability GetVariability() const; @@ -709,16 +710,16 @@ private: make a uniform version of the struct, we've maintained the original information about the member types. */ - const std::vector elementTypes; - const std::vector elementNames; + const llvm::SmallVector elementTypes; + const llvm::SmallVector elementNames; /** Source file position at which each structure element declaration appeared. */ - const std::vector elementPositions; + const llvm::SmallVector elementPositions; const Variability variability; const bool isConst; const SourcePos pos; - mutable std::vector finalElementTypes; + mutable llvm::SmallVector finalElementTypes; mutable const StructType *oppositeConstStructType; }; @@ -822,12 +823,12 @@ private: class FunctionType : public Type { public: FunctionType(const Type *returnType, - const std::vector &argTypes, SourcePos pos); + const llvm::SmallVector &argTypes, SourcePos pos); FunctionType(const Type *returnType, - const std::vector &argTypes, - const std::vector &argNames, - const std::vector &argDefaults, - const std::vector &argPos, + const llvm::SmallVector &argTypes, + const llvm::SmallVector &argNames, + const llvm::SmallVector &argDefaults, + const llvm::SmallVector &argPos, bool isTask, bool isExported, bool isExternC); Variability GetVariability() const; @@ -897,16 +898,16 @@ private: // The following four vectors should all have the same length (which is // in turn the length returned by GetNumParameters()). - const std::vector paramTypes; - const std::vector paramNames; + const llvm::SmallVector paramTypes; + const llvm::SmallVector paramNames; /** Default values of the function's arguments. For arguments without default values provided, NULL is stored. */ - mutable std::vector paramDefaults; + mutable llvm::SmallVector paramDefaults; /** The names provided (if any) with the function arguments in the function's signature. These should only be used for error messages and the like and so not affect testing function types for equality, etc. */ - const std::vector paramPositions; + const llvm::SmallVector paramPositions; }; From ee7e3679818d281c08cf1bcf50d5ae37ce2bbb1f Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sat, 5 May 2012 15:13:11 -0700 Subject: [PATCH 132/173] Do global dead code elimination early in optimization. This gives a 15-20% speedup in compilation time for simple programs (but only ~2% for the big 21k monster program). --- builtins/util.m4 | 258 ++++++++++++++++++++++++++++++++++++++++++++++- opt.cpp | 2 + 2 files changed, 257 insertions(+), 3 deletions(-) diff --git a/builtins/util.m4 b/builtins/util.m4 index 042b2ef5..023ca411 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -1661,6 +1661,258 @@ declare @__exp_varying_float() nounwind readnone declare float @__pow_uniform_float(float, float) nounwind readnone declare @__pow_varying_float(, ) nounwind readnone +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +declare void @__use8() +declare void @__use16() +declare void @__use32() +declare void @__use64() + +;; This is a temporary function that will be removed at the end of +;; compilation--the idea is that it calls out to all of the various +;; functions / pseudo-function declarations that we need to keep around +;; so that they are available to the various optimization passes. This +;; then prevents those functions from being removed as dead code when +;; we do early DCE... + +define void @__keep_funcs_live(i8 * %ptr, %v8, %v16, + %v32, %v64, + %mask) { + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; loads + %ml8 = call @__masked_load_8(i8 * %ptr, %mask) + call void @__use8( %ml8) + %ml16 = call @__masked_load_16(i8 * %ptr, %mask) + call void @__use16( %ml16) + %ml32 = call @__masked_load_32(i8 * %ptr, %mask) + call void @__use32( %ml32) + %ml64 = call @__masked_load_64(i8 * %ptr, %mask) + call void @__use64( %ml64) + + %lb8 = call @__load_and_broadcast_8(i8 * %ptr, %mask) + call void @__use8( %lb8) + %lb16 = call @__load_and_broadcast_16(i8 * %ptr, %mask) + call void @__use16( %lb16) + %lb32 = call @__load_and_broadcast_32(i8 * %ptr, %mask) + call void @__use32( %lb32) + %lb64 = call @__load_and_broadcast_64(i8 * %ptr, %mask) + call void @__use64( %lb64) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; stores + %pv8 = bitcast i8 * %ptr to * + call void @__pseudo_masked_store_8( * %pv8, %v8, + %mask) + %pv16 = bitcast i8 * %ptr to * + call void @__pseudo_masked_store_16( * %pv16, %v16, + %mask) + %pv32 = bitcast i8 * %ptr to * + call void @__pseudo_masked_store_32( * %pv32, %v32, + %mask) + %pv64 = bitcast i8 * %ptr to * + call void @__pseudo_masked_store_64( * %pv64, %v64, + %mask) + + call void @__masked_store_8( * %pv8, %v8, %mask) + call void @__masked_store_16( * %pv16, %v16, %mask) + call void @__masked_store_32( * %pv32, %v32, %mask) + call void @__masked_store_64( * %pv64, %v64, %mask) + + call void @__masked_store_blend_8( * %pv8, %v8, + %mask) + call void @__masked_store_blend_16( * %pv16, %v16, + %mask) + call void @__masked_store_blend_32( * %pv32, %v32, + %mask) + call void @__masked_store_blend_64( * %pv64, %v64, + %mask) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; gathers + + %pg32_8 = call @__pseudo_gather32_8( %v32, + %mask) + call void @__use8( %pg32_8) + %pg32_16 = call @__pseudo_gather32_16( %v32, + %mask) + call void @__use16( %pg32_16) + %pg32_32 = call @__pseudo_gather32_32( %v32, + %mask) + call void @__use32( %pg32_32) + %pg32_64 = call @__pseudo_gather32_64( %v32, + %mask) + call void @__use64( %pg32_64) + + %pg64_8 = call @__pseudo_gather64_8( %v64, + %mask) + call void @__use8( %pg64_8) + %pg64_16 = call @__pseudo_gather64_16( %v64, + %mask) + call void @__use16( %pg64_16) + %pg64_32 = call @__pseudo_gather64_32( %v64, + %mask) + call void @__use32( %pg64_32) + %pg64_64 = call @__pseudo_gather64_64( %v64, + %mask) + call void @__use64( %pg64_64) + + %g32_8 = call @__gather32_i8( %v32, + %mask) + call void @__use8( %g32_8) + %g32_16 = call @__gather32_i16( %v32, + %mask) + call void @__use16( %g32_16) + %g32_32 = call @__gather32_i32( %v32, + %mask) + call void @__use32( %g32_32) + %g32_64 = call @__gather32_i64( %v32, + %mask) + call void @__use64( %g32_64) + + %g64_8 = call @__gather64_i8( %v64, + %mask) + call void @__use8( %g64_8) + %g64_16 = call @__gather64_i16( %v64, + %mask) + call void @__use16( %g64_16) + %g64_32 = call @__gather64_i32( %v64, + %mask) + call void @__use32( %g64_32) + %g64_64 = call @__gather64_i64( %v64, + %mask) + call void @__use64( %g64_64) + + %pgbo32_8 = call + @__pseudo_gather_base_offsets32_8(i8 * %ptr, %v32, i32 0, + %v32, %mask) + call void @__use8( %pgbo32_8) + %pgbo32_16 = call + @__pseudo_gather_base_offsets32_16(i8 * %ptr, %v32, i32 0, + %v32, %mask) + call void @__use16( %pgbo32_16) + %pgbo32_32 = call + @__pseudo_gather_base_offsets32_32(i8 * %ptr, %v32, i32 0, + %v32, %mask) + call void @__use32( %pgbo32_32) + %pgbo32_64 = call + @__pseudo_gather_base_offsets32_64(i8 * %ptr, %v32, i32 0, + %v32, %mask) + call void @__use64( %pgbo32_64) + + %gbo32_8 = call + @__gather_base_offsets32_i8(i8 * %ptr, %v32, i32 0, + %v32, %mask) + call void @__use8( %gbo32_8) + %gbo32_16 = call + @__gather_base_offsets32_i16(i8 * %ptr, %v32, i32 0, + %v32, %mask) + call void @__use16( %gbo32_16) + %gbo32_32 = call + @__gather_base_offsets32_i32(i8 * %ptr, %v32, i32 0, + %v32, %mask) + call void @__use32( %gbo32_32) + %gbo32_64 = call + @__gather_base_offsets32_i64(i8 * %ptr, %v32, i32 0, + %v32, %mask) + call void @__use64( %gbo32_64) + + + %pgbo64_8 = call + @__pseudo_gather_base_offsets64_8(i8 * %ptr, %v64, i32 0, + %v64, %mask) + call void @__use8( %pgbo64_8) + %pgbo64_16 = call + @__pseudo_gather_base_offsets64_16(i8 * %ptr, %v64, i32 0, + %v64, %mask) + call void @__use16( %pgbo64_16) + %pgbo64_32 = call + @__pseudo_gather_base_offsets64_32(i8 * %ptr, %v64, i32 0, + %v64, %mask) + call void @__use32( %pgbo64_32) + %pgbo64_64 = call + @__pseudo_gather_base_offsets64_64(i8 * %ptr, %v64, i32 0, + %v64, %mask) + call void @__use64( %pgbo64_64) + + %gbo64_8 = call + @__gather_base_offsets64_i8(i8 * %ptr, %v64, i32 0, + %v64, %mask) + call void @__use8( %gbo64_8) + %gbo64_16 = call + @__gather_base_offsets64_i16(i8 * %ptr, %v64, i32 0, + %v64, %mask) + call void @__use16( %gbo64_16) + %gbo64_32 = call + @__gather_base_offsets64_i32(i8 * %ptr, %v64, i32 0, + %v64, %mask) + call void @__use32( %gbo64_32) + %gbo64_64 = call + @__gather_base_offsets64_i64(i8 * %ptr, %v64, i32 0, + %v64, %mask) + call void @__use64( %gbo64_64) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; scatters + + call void @__pseudo_scatter32_8( %v32, %v8, %mask) + call void @__pseudo_scatter32_16( %v32, %v16, %mask) + call void @__pseudo_scatter32_32( %v32, %v32, %mask) + call void @__pseudo_scatter32_64( %v32, %v64, %mask) + + call void @__pseudo_scatter64_8( %v64, %v8, %mask) + call void @__pseudo_scatter64_16( %v64, %v16, %mask) + call void @__pseudo_scatter64_32( %v64, %v32, %mask) + call void @__pseudo_scatter64_64( %v64, %v64, %mask) + + call void @__scatter32_i8( %v32, %v8, %mask) + call void @__scatter32_i16( %v32, %v16, %mask) + call void @__scatter32_i32( %v32, %v32, %mask) + call void @__scatter32_i64( %v32, %v64, %mask) + + call void @__scatter64_i8( %v64, %v8, %mask) + call void @__scatter64_i16( %v64, %v16, %mask) + call void @__scatter64_i32( %v64, %v32, %mask) + call void @__scatter64_i64( %v64, %v64, %mask) + + call void @__pseudo_scatter_base_offsets32_8(i8 * %ptr, %v32, i32 0, %v32, + %v8, %mask) + call void @__pseudo_scatter_base_offsets32_16(i8 * %ptr, %v32, i32 0, %v32, + %v16, %mask) + call void @__pseudo_scatter_base_offsets32_32(i8 * %ptr, %v32, i32 0, %v32, + %v32, %mask) + call void @__pseudo_scatter_base_offsets32_64(i8 * %ptr, %v32, i32 0, %v32, + %v64, %mask) + + call void @__pseudo_scatter_base_offsets64_8(i8 * %ptr, %v64, i32 0, %v64, + %v8, %mask) + call void @__pseudo_scatter_base_offsets64_16(i8 * %ptr, %v64, i32 0, %v64, + %v16, %mask) + call void @__pseudo_scatter_base_offsets64_32(i8 * %ptr, %v64, i32 0, %v64, + %v32, %mask) + call void @__pseudo_scatter_base_offsets64_64(i8 * %ptr, %v64, i32 0, %v64, + %v64, %mask) + + call void @__scatter_base_offsets32_i8(i8 * %ptr, %v32, i32 0, %v32, + %v8, %mask) + call void @__scatter_base_offsets32_i16(i8 * %ptr, %v32, i32 0, %v32, + %v16, %mask) + call void @__scatter_base_offsets32_i32(i8 * %ptr, %v32, i32 0, %v32, + %v32, %mask) + call void @__scatter_base_offsets32_i64(i8 * %ptr, %v32, i32 0, %v32, + %v64, %mask) + + call void @__scatter_base_offsets64_i8(i8 * %ptr, %v64, i32 0, %v64, + %v8, %mask) + call void @__scatter_base_offsets64_i16(i8 * %ptr, %v64, i32 0, %v64, + %v16, %mask) + call void @__scatter_base_offsets64_i32(i8 * %ptr, %v64, i32 0, %v64, + %v32, %mask) + call void @__scatter_base_offsets64_i64(i8 * %ptr, %v64, i32 0, %v64, + %v64, %mask) + + ret void +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; vector ops @@ -2251,9 +2503,9 @@ define <$1 x $2> @__load_and_broadcast_$3(i8 *, <$1 x MASK> %mask) nounwind alwa ;; $4: alignment for elements of type $2 (4, 8, ...) define(`masked_load', ` -define <$1 x $2> @__masked_load_$3(i8 *, <$1 x i32> %mask) nounwind alwaysinline { +define <$1 x $2> @__masked_load_$3(i8 *, <$1 x MASK> %mask) nounwind alwaysinline { entry: - %mm = call i32 @__movmsk(<$1 x i32> %mask) + %mm = call i32 @__movmsk(<$1 x MASK> %mask) ; if the first lane and the last lane are on, then it is safe to do a vector load ; of the whole thing--what the lanes in the middle want turns out to not matter... diff --git a/opt.cpp b/opt.cpp index 063be681..dc201367 100644 --- a/opt.cpp +++ b/opt.cpp @@ -446,6 +446,7 @@ Optimize(llvm::Module *module, int optLevel) { llvm::initializeTarget(*registry); bool runSROA = true; + optPM.add(llvm::createGlobalDCEPass()); // Early optimizations to try to reduce the total amount of code to // work with if we can @@ -3906,6 +3907,7 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) { "__scatter32_i32", "__scatter32_i64", "__scatter64_i8", "__scatter64_i16", "__scatter64_i32", "__scatter64_i64", + "__keep_funcs_live", }; bool modifiedAny = false; From 15ea0af6877508421b728f6603b70b63a868fd69 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sat, 5 May 2012 15:13:38 -0700 Subject: [PATCH 133/173] Add -f option to run_tests.py This allows providing additional command-line arguments to ispc, e.g. to force compilation with -O1, -g, etc. --- run_tests.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/run_tests.py b/run_tests.py index 79465267..6bfadf05 100755 --- a/run_tests.py +++ b/run_tests.py @@ -32,6 +32,8 @@ parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order default=False, action="store_true") parser.add_option("-g", "--generics-include", dest="include_file", help="Filename for header implementing functions for generics", default=None) +parser.add_option("-f", "--ispc-flags", dest="ispc_flags", help="Additional flags for ispc (-g, -O1, ...)", + default="") parser.add_option('-t', '--target', dest='target', help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16, generic-32)', default="sse4") @@ -57,6 +59,10 @@ if not is_windows: else: ispc_exe = "../Release/ispc.exe" +ispc_exe += " " + options.ispc_flags + +print ispc_exe + is_generic_target = (options.target.find("generic-") != -1 and options.target != "generic-1") if is_generic_target and options.include_file == None: From 72b6c1285694b1891ed7c1cd78b77d936f485eeb Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sat, 5 May 2012 15:35:10 -0700 Subject: [PATCH 134/173] Notify LLVM pass mgr that the MakeInternalFuncsStaticPass doesn't change the CFG. --- opt.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/opt.cpp b/opt.cpp index dc201367..3de7794e 100644 --- a/opt.cpp +++ b/opt.cpp @@ -3865,6 +3865,10 @@ public: MakeInternalFuncsStaticPass(bool last = false) : ModulePass(ID) { } + void getAnalysisUsage(llvm::AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + const char *getPassName() const { return "Make internal funcs \"static\""; } bool runOnModule(llvm::Module &m); }; From 55c754750e25cff18d2a99b114cb73cb99f346fd Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sat, 5 May 2012 15:46:57 -0700 Subject: [PATCH 135/173] Remove a number of redundant/unneeded optimization passes. Performance and code quality of performance suite is unchanged, compilation times are improved by another 20% or so for simple programs (e.g. rt.ispc). One very complex programs compiles about 2.4x faster now. --- opt.cpp | 95 ++++++--------------------------------------------------- 1 file changed, 9 insertions(+), 86 deletions(-) diff --git a/opt.cpp b/opt.cpp index 3de7794e..e6e68297 100644 --- a/opt.cpp +++ b/opt.cpp @@ -393,10 +393,13 @@ Optimize(llvm::Module *module, int optLevel) { } llvm::PassManager optPM; - llvm::FunctionPassManager funcPM(module); - optPM.add(llvm::createVerifierPass()); +#if 0 + std::string err; + optPM.add(llvm::createPrintModulePass(new llvm::raw_fd_ostream("-", err))); +#endif + llvm::TargetLibraryInfo *targetLibraryInfo = new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple())); optPM.add(targetLibraryInfo); @@ -423,17 +426,6 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createGlobalDCEPass()); } else { - // Otherwise throw the kitchen sink of optimizations at the code. - // This is almost certainly overkill and likely could be reduced, - // but on the other hand trying to remove some of these has - // historically caused performance slowdowns. Benchmark carefully - // if changing these around. - // - // Note in particular that a number of the ispc optimization - // passes are run repeatedly along the way; they often can kick in - // only later in the optimization process as things like constant - // propagation have done their thing, and then when they do kick - // in, they can often open up new opportunities for optimization... llvm::PassRegistry *registry = llvm::PassRegistry::getPassRegistry(); llvm::initializeCore(*registry); llvm::initializeScalarOpts(*registry); @@ -445,14 +437,12 @@ Optimize(llvm::Module *module, int optLevel) { llvm::initializeInstrumentation(*registry); llvm::initializeTarget(*registry); - bool runSROA = true; optPM.add(llvm::createGlobalDCEPass()); // Early optimizations to try to reduce the total amount of code to // work with if we can optPM.add(llvm::createReassociatePass()); optPM.add(llvm::createConstantPropagationPass()); - optPM.add(llvm::createConstantPropagationPass()); optPM.add(llvm::createDeadInstEliminationPass()); optPM.add(llvm::createCFGSimplificationPass()); @@ -466,8 +456,7 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createDeadInstEliminationPass()); // On to more serious optimizations - if (runSROA) - optPM.add(llvm::createScalarReplAggregatesPass()); + optPM.add(llvm::createScalarReplAggregatesPass()); optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createPromoteMemoryToRegisterPass()); @@ -489,8 +478,7 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createJumpThreadingPass()); optPM.add(llvm::createCFGSimplificationPass()); - if (runSROA) - optPM.add(llvm::createScalarReplAggregatesPass()); + optPM.add(llvm::createScalarReplAggregatesPass()); optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createTailCallEliminationPass()); @@ -527,25 +515,13 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateVSelMovmskOptPass()); - funcPM.add(llvm::createTypeBasedAliasAnalysisPass()); - funcPM.add(llvm::createBasicAliasAnalysisPass()); - funcPM.add(llvm::createCFGSimplificationPass()); - if (runSROA) - funcPM.add(llvm::createScalarReplAggregatesPass()); - funcPM.add(llvm::createEarlyCSEPass()); - funcPM.add(llvm::createLowerExpectIntrinsicPass()); - - optPM.add(llvm::createTypeBasedAliasAnalysisPass()); - optPM.add(llvm::createBasicAliasAnalysisPass()); - optPM.add(llvm::createGlobalOptimizerPass()); optPM.add(llvm::createIPSCCPPass()); optPM.add(llvm::createDeadArgEliminationPass()); optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createFunctionInliningPass()); optPM.add(llvm::createArgumentPromotionPass()); - if (runSROA) - optPM.add(llvm::createScalarReplAggregatesPass(-1, false)); + optPM.add(llvm::createScalarReplAggregatesPass(-1, false)); optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createReassociatePass()); @@ -559,57 +535,11 @@ Optimize(llvm::Module *module, int optLevel) { if (g->opt.unrollLoops) optPM.add(llvm::createLoopUnrollPass()); optPM.add(llvm::createGVNPass()); - optPM.add(llvm::createMemCpyOptPass()); - optPM.add(llvm::createSCCPPass()); - optPM.add(llvm::createInstructionCombiningPass()); - optPM.add(llvm::createJumpThreadingPass()); - optPM.add(llvm::createCorrelatedValuePropagationPass()); - optPM.add(llvm::createDeadStoreEliminationPass()); - optPM.add(llvm::createAggressiveDCEPass()); - optPM.add(llvm::createCFGSimplificationPass()); - optPM.add(llvm::createInstructionCombiningPass()); - optPM.add(llvm::createStripDeadPrototypesPass()); - optPM.add(llvm::createGlobalDCEPass()); - optPM.add(llvm::createConstantMergePass()); - - optPM.add(CreateIsCompileTimeConstantPass(false)); - optPM.add(CreateIntrinsicsOptPass()); - optPM.add(CreateVSelMovmskOptPass()); - - optPM.add(llvm::createGlobalOptimizerPass()); - optPM.add(llvm::createGlobalDCEPass()); - optPM.add(llvm::createArgumentPromotionPass()); - optPM.add(llvm::createInstructionCombiningPass()); - optPM.add(llvm::createJumpThreadingPass()); - if (runSROA) - optPM.add(llvm::createScalarReplAggregatesPass()); - optPM.add(llvm::createFunctionAttrsPass()); - optPM.add(llvm::createGlobalsModRefPass()); - optPM.add(llvm::createLICMPass()); - optPM.add(llvm::createGVNPass()); - optPM.add(llvm::createMemCpyOptPass()); - optPM.add(llvm::createDeadStoreEliminationPass()); - optPM.add(llvm::createInstructionCombiningPass()); - optPM.add(llvm::createJumpThreadingPass()); - optPM.add(llvm::createCFGSimplificationPass()); - optPM.add(llvm::createGlobalDCEPass()); optPM.add(CreateIsCompileTimeConstantPass(true)); optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateVSelMovmskOptPass()); - optPM.add(llvm::createArgumentPromotionPass()); - if (runSROA) - optPM.add(llvm::createScalarReplAggregatesPass(-1, false)); - optPM.add(llvm::createEarlyCSEPass()); - optPM.add(llvm::createSimplifyLibCallsPass()); - optPM.add(llvm::createJumpThreadingPass()); - optPM.add(llvm::createCorrelatedValuePropagationPass()); - optPM.add(llvm::createCFGSimplificationPass()); - optPM.add(llvm::createInstructionCombiningPass()); - optPM.add(llvm::createCFGSimplificationPass()); - optPM.add(llvm::createReassociatePass()); - optPM.add(llvm::createGVNPass()); optPM.add(llvm::createMemCpyOptPass()); optPM.add(llvm::createSCCPPass()); optPM.add(llvm::createInstructionCombiningPass()); @@ -620,21 +550,14 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createStripDeadPrototypesPass()); + optPM.add(CreateMakeInternalFuncsStaticPass()); optPM.add(llvm::createGlobalDCEPass()); optPM.add(llvm::createConstantMergePass()); - - optPM.add(CreateMakeInternalFuncsStaticPass()); - optPM.add(llvm::createGlobalDCEPass()); } // Finish up by making sure we didn't mess anything up in the IR along // the way. optPM.add(llvm::createVerifierPass()); - - for (llvm::Module::iterator fiter = module->begin(); fiter != module->end(); - ++fiter) - funcPM.run(*fiter); - optPM.run(*module); if (g->debugPrint) { From 067a2949ba50b206557bb197f3273fdc64248189 Mon Sep 17 00:00:00 2001 From: Nipunn Koorapati Date: Sun, 6 May 2012 06:58:53 -0700 Subject: [PATCH 136/173] Added syntax highlighting for 'uniform' and 'varying' types. --- contrib/ispc.vim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/ispc.vim b/contrib/ispc.vim index 5c178c0f..cc8493f0 100644 --- a/contrib/ispc.vim +++ b/contrib/ispc.vim @@ -17,7 +17,7 @@ syn keyword ispcStatement cbreak ccontinue creturn launch print reference soa sy syn keyword ispcConditional cif syn keyword ispcRepeat cdo cfor cwhile syn keyword ispcBuiltin programCount programIndex -syn keyword ispcType export int8 int16 int32 int64 +syn keyword ispcType export uniform varying int8 int16 int32 int64 " Default highlighting command -nargs=+ HiLink hi def link From 041ade66d52e62fb3fd758fec8fdcc7b72f47ffe Mon Sep 17 00:00:00 2001 From: Nipunn Koorapati Date: Sun, 6 May 2012 06:59:17 -0700 Subject: [PATCH 137/173] Placated compiler by initializing variable --- opt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opt.cpp b/opt.cpp index e6e68297..df4dd572 100644 --- a/opt.cpp +++ b/opt.cpp @@ -3292,7 +3292,7 @@ lCoalesceGathers(const std::vector &coalesceGroup) { // First, compute the shared base pointer for all of the gathers llvm::Value *basePtr = lComputeBasePtr(coalesceGroup[0], insertBefore); - int elementSize; + int elementSize = 0; if (coalesceGroup[0]->getType() == LLVMTypes::Int32VectorType) elementSize = 4; else if (coalesceGroup[0]->getType() == LLVMTypes::Int64VectorType) From c6241581a08b5ef6fd91669530c2f4e4adcc81ae Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 8 May 2012 09:54:23 -0700 Subject: [PATCH 138/173] Add an extra parameter to __smear functions to encode return type. Now, the __smear* functions in generated C++ code have an unused first parameter of the desired return type; this allows us to have headers that include variants of __smear for multiple target widths. (This approach is necessary since we can't overload by return type in C++.) Issue #256. --- builtins/target-generic-common.ll | 12 ++++----- cbackend.cpp | 15 ++++++++--- examples/intrinsics/generic-16.h | 18 ++++++------- examples/intrinsics/sse4.h | 43 +++++++++++++++++-------------- 4 files changed, 49 insertions(+), 39 deletions(-) diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index e4c70aa4..6d4d2db6 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -39,12 +39,12 @@ reduce_equal(WIDTH) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; broadcast/rotate/shuffle -declare @__smear_float(float) nounwind readnone -declare @__smear_double(double) nounwind readnone -declare @__smear_i8(i8) nounwind readnone -declare @__smear_i16(i16) nounwind readnone -declare @__smear_i32(i32) nounwind readnone -declare @__smear_i64(i64) nounwind readnone +declare @__smear_float(, float) nounwind readnone +declare @__smear_double(, double) nounwind readnone +declare @__smear_i8(, i8) nounwind readnone +declare @__smear_i16(, i16) nounwind readnone +declare @__smear_i32(, i32) nounwind readnone +declare @__smear_i64(, i64) nounwind readnone declare @__broadcast_float(, i32) nounwind readnone declare @__broadcast_double(, i32) nounwind readnone diff --git a/cbackend.cpp b/cbackend.cpp index e932dff2..d43880dc 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -1464,6 +1464,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Constant *CZ = Constant::getNullValue(VT->getElementType()); Out << smearFunc << "("; + printType(Out, VT); + Out << "(), "; printConstant(CZ, Static); Out << ")"; } @@ -1471,6 +1473,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) { llvm::Constant *splatValue = CV->getSplatValue(); if (splatValue != NULL && smearFunc != NULL) { Out << smearFunc << "("; + printType(Out, VT); + Out << "(), "; printConstant(splatValue, Static); Out << ")"; } @@ -1486,6 +1490,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) { llvm::Constant *splatValue = CDV->getSplatValue(); if (splatValue != NULL && smearFunc != NULL) { Out << smearFunc << "("; + printType(Out, VT); + Out << "(), "; printConstant(splatValue, Static); Out << ")"; } @@ -4392,16 +4398,17 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { if (smearFunc == NULL) { Constant *sf = module->getOrInsertFunction(smearFuncName, iter->getType(), - matchType, NULL); + iter->getType(), matchType, NULL); smearFunc = dyn_cast(sf); assert(smearFunc != NULL); smearFunc->setDoesNotThrow(true); smearFunc->setDoesNotAccessMemory(true); } - + + llvm::Value *undefResult = llvm::UndefValue::get(vt); assert(smearFunc != NULL); - Value *args[1] = { toMatch }; - ArrayRef argArray(&args[0], &args[1]); + Value *args[2] = { undefResult, toMatch }; + ArrayRef argArray(&args[0], &args[2]); Instruction *smearCall = CallInst::Create(smearFunc, argArray, LLVMGetName(toMatch, "_smear"), (Instruction *)NULL); diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index 384a9ece..4c696975 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -259,13 +259,13 @@ static FORCEINLINE TYPE NAME(TYPE a, int32_t b) { \ return ret; \ } -#define SMEAR(VTYPE, NAME, STYPE) \ -static FORCEINLINE VTYPE __smear_##NAME(STYPE v) { \ - VTYPE ret; \ - for (int i = 0; i < 16; ++i) \ - ret.v[i] = v; \ - return ret; \ -} \ +#define SMEAR(VTYPE, NAME, STYPE) \ +static FORCEINLINE VTYPE __smear_##NAME(VTYPE retType, STYPE v) { \ + VTYPE ret; \ + for (int i = 0; i < 16; ++i) \ + ret.v[i] = v; \ + return ret; \ +} #define BROADCAST(VTYPE, NAME, STYPE) \ static FORCEINLINE VTYPE __broadcast_##NAME(VTYPE v, int index) { \ @@ -374,7 +374,7 @@ static FORCEINLINE void __store(__vec16_i1 *p, __vec16_i1 v, int align) { *ptr = v.v; } -static FORCEINLINE __vec16_i1 __smear_i1(int v) { +static FORCEINLINE __vec16_i1 __smear_i1(__vec16_i1, int v) { return __vec16_i1(v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v); } diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index 48a67719..5fe22b78 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -266,7 +266,7 @@ static FORCEINLINE void __store(__vec4_i1 *p, __vec4_i1 value, int align) { _mm_storeu_ps((float *)(&p->v), value.v); } -static FORCEINLINE __vec4_i1 __smear_i1(int v) { +static FORCEINLINE __vec4_i1 __smear_i1(__vec4_i1, int v) { return __vec4_i1(v, v, v, v); } @@ -493,7 +493,7 @@ static FORCEINLINE void __insert_element(__vec4_i8 *v, int index, int8_t val) { ((int8_t *)v)[index] = val; } -static FORCEINLINE __vec4_i8 __smear_i8(int8_t v) { +static FORCEINLINE __vec4_i8 __smear_i8(__vec4_i8, int8_t v) { return _mm_set1_epi8(v); } @@ -752,7 +752,7 @@ static FORCEINLINE void __insert_element(__vec4_i16 *v, int index, int16_t val) ((int16_t *)v)[index] = val; } -static FORCEINLINE __vec4_i16 __smear_i16(int16_t v) { +static FORCEINLINE __vec4_i16 __smear_i16(__vec4_i16, int16_t v) { return _mm_set1_epi16(v); } @@ -989,7 +989,7 @@ static FORCEINLINE __vec4_i32 __select(__vec4_i1 mask, __vec4_i32 a, __vec4_i32 _mm_castsi128_ps(a.v), mask.v)); } -static FORCEINLINE __vec4_i32 __smear_i32(int32_t v) { +static FORCEINLINE __vec4_i32 __smear_i32(__vec4_i32, int32_t v) { return _mm_set1_epi32(v); } @@ -1250,7 +1250,7 @@ static FORCEINLINE __vec4_i64 __select(__vec4_i1 mask, __vec4_i64 a, __vec4_i64 return __vec4_i64(_mm_castpd_si128(r0), _mm_castpd_si128(r1)); } -static FORCEINLINE __vec4_i64 __smear_i64(int64_t v) { +static FORCEINLINE __vec4_i64 __smear_i64(__vec4_i64, int64_t v) { return __vec4_i64(v, v, v, v); } @@ -1354,7 +1354,7 @@ static FORCEINLINE __vec4_f __select(__vec4_i1 mask, __vec4_f a, __vec4_f b) { return _mm_blendv_ps(b.v, a.v, mask.v); } -static FORCEINLINE __vec4_f __smear_float(float v) { +static FORCEINLINE __vec4_f __smear_float(__vec4_f, float v) { return _mm_set1_ps(v); } @@ -1486,7 +1486,7 @@ static FORCEINLINE __vec4_d __select(__vec4_i1 mask, __vec4_d a, __vec4_d b) { return __vec4_d(r0, r1); } -static FORCEINLINE __vec4_d __smear_double(double v) { +static FORCEINLINE __vec4_d __smear_double(__vec4_d, double v) { return __vec4_d(_mm_set1_pd(v), _mm_set1_pd(v)); } @@ -1586,11 +1586,13 @@ static FORCEINLINE __vec4_i16 __cast_sext(__vec4_i16, __vec4_i8 val) { } static FORCEINLINE __vec4_i8 __cast_sext(__vec4_i8, __vec4_i1 v) { - return __select(v, __smear_i8(0xff), __smear_i8(0)); + return __select(v, __smear_i8(__vec4_i8(), 0xff), + __smear_i8(__vec4_i8(), 0)); } static FORCEINLINE __vec4_i16 __cast_sext(__vec4_i16, __vec4_i1 v) { - return __select(v, __smear_i16(0xffff), __smear_i16(0)); + return __select(v, __smear_i16(__vec4_i16(), 0xffff), + __smear_i16(__vec4_i16(), 0)); } static FORCEINLINE __vec4_i32 __cast_sext(__vec4_i32, __vec4_i1 v) { @@ -1650,11 +1652,12 @@ static FORCEINLINE __vec4_i16 __cast_zext(__vec4_i16, __vec4_i8 val) { } static FORCEINLINE __vec4_i8 __cast_zext(__vec4_i8, __vec4_i1 v) { - return __select(v, __smear_i8(1), __smear_i8(0)); + return __select(v, __smear_i8(__vec4_i8(), 1), __smear_i8(__vec4_i8(), 0)); } static FORCEINLINE __vec4_i16 __cast_zext(__vec4_i16, __vec4_i1 v) { - return __select(v, __smear_i16(1), __smear_i16(0)); + return __select(v, __smear_i16(__vec4_i16(), 1), + __smear_i16(__vec4_i16(), 0)); } static FORCEINLINE __vec4_i32 __cast_zext(__vec4_i32, __vec4_i1 v) { @@ -1662,7 +1665,7 @@ static FORCEINLINE __vec4_i32 __cast_zext(__vec4_i32, __vec4_i1 v) { } static FORCEINLINE __vec4_i64 __cast_zext(__vec4_i64, __vec4_i1 v) { - return __select(v, __smear_i64(1), __smear_i64(0)); + return __select(v, __smear_i64(__vec4_i64(), 1), __smear_i64(__vec4_i64(), 0)); } // truncations @@ -1822,11 +1825,11 @@ static FORCEINLINE __vec4_d __cast_uitofp(__vec4_d, __vec4_i64 val) { } static FORCEINLINE __vec4_f __cast_uitofp(__vec4_f, __vec4_i1 v) { - return __select(v, __smear_float(1.), __smear_float(0.)); + return __select(v, __smear_float(__vec4_f(), 1.), __smear_float(__vec4_f(), 0.)); } static FORCEINLINE __vec4_d __cast_uitofp(__vec4_d, __vec4_i1 v) { - return __select(v, __smear_double(1.), __smear_double(0.)); + return __select(v, __smear_double(__vec4_d(), 1.), __smear_double(__vec4_d(), 0.)); } // float/double to signed int @@ -2617,8 +2620,8 @@ lGatherBaseOffsets32(RetVec, RetScalar, unsigned char *p, __vec4_i32 offsets, RetScalar r[4]; #if 1 // "Fast gather" trick... - offsets = __select(mask, offsets, __smear_i32(0)); - constOffset = __select(mask, constOffset, __smear_i32(0)); + offsets = __select(mask, offsets, __smear_i32(__vec4_i32(), 0)); + constOffset = __select(mask, constOffset, __smear_i32(__vec4_i32(), 0)); int offset = scale * _mm_extract_epi32(offsets.v, 0) + _mm_extract_epi32(constOffset.v, 0); RetScalar *ptr = (RetScalar *)(p + offset); @@ -2675,8 +2678,8 @@ lGatherBaseOffsets64(RetVec, RetScalar, unsigned char *p, __vec4_i64 offsets, RetScalar r[4]; #if 1 // "Fast gather" trick... - offsets = __select(mask, offsets, __smear_i64(0)); - constOffset = __select(mask, constOffset, __smear_i64(0)); + offsets = __select(mask, offsets, __smear_i64(__vec4_i64(), 0)); + constOffset = __select(mask, constOffset, __smear_i64(__vec4_i64(), 0)); int64_t offset = scale * _mm_extract_epi64(offsets.v[0], 0) + _mm_extract_epi64(constOffset.v[0], 0); RetScalar *ptr = (RetScalar *)(p + offset); @@ -2760,8 +2763,8 @@ __gather_base_offsets32_i32(uint8_t *p, __vec4_i32 offsets, uint32_t scale, __m128i r = _mm_set_epi32(0, 0, 0, 0); #if 1 // "Fast gather"... - offsets = __select(mask, offsets, __smear_i32(0)); - constOffset = __select(mask, constOffset, __smear_i32(0)); + offsets = __select(mask, offsets, __smear_i32(__vec4_i32(), 0)); + constOffset = __select(mask, constOffset, __smear_i32(__vec4_i32(), 0)); int offset = scale * _mm_extract_epi32(offsets.v, 0) + _mm_extract_epi32(constOffset.v, 0); From 4f053e5b83d3e67affe4f6fcd802f42fe98523ce Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 8 May 2012 13:24:56 -0700 Subject: [PATCH 139/173] Pass OPT flags when linking --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0011c2ac..23a2428a 100644 --- a/Makefile +++ b/Makefile @@ -124,7 +124,7 @@ doxygen: ispc: print_llvm_src dirs $(OBJS) @echo Creating ispc executable - @$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(ISPC_LIBS) + @$(CXX) $(OPT) $(LDFLAGS) -o $@ $(OBJS) $(ISPC_LIBS) objs/%.o: %.cpp @echo Compiling $< From dc120f3962e14429e3c03e09735b1318bd3ed75f Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 9 May 2012 07:00:58 -0700 Subject: [PATCH 140/173] Fix regression in masked_store_blend for generic target. In ee1fe3aa9f, the LLVM_VERSION define was updated to never have the 'svn' suffix and the build was updated to handle LLVM 3.2. This file had a check for LLVM_3_1svn that was no longer hitting. This fixes some issues with unnecessary loads and stores in generated C++ code for the generic targets. --- builtins/target-generic-common.ll | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 6d4d2db6..50daf23e 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -249,7 +249,16 @@ declare void @__masked_store_32(* nocapture, , declare void @__masked_store_64(* nocapture, , %mask) nounwind -ifelse(LLVM_VERSION, `LLVM_3_1svn',` +ifelse(LLVM_VERSION, `LLVM_3_0', ` +declare void @__masked_store_blend_8(* nocapture, , + ) nounwind +declare void @__masked_store_blend_16(* nocapture, , + ) nounwind +declare void @__masked_store_blend_32(* nocapture, , + ) nounwind +declare void @__masked_store_blend_64(* nocapture, , + %mask) nounwind +', ` define void @__masked_store_blend_8(* nocapture, , ) nounwind alwaysinline { %v = load * %0 @@ -281,15 +290,6 @@ define void @__masked_store_blend_64(* nocapture, store %v1, * %0 ret void } -',` -declare void @__masked_store_blend_8(* nocapture, , - ) nounwind -declare void @__masked_store_blend_16(* nocapture, , - ) nounwind -declare void @__masked_store_blend_32(* nocapture, , - ) nounwind -declare void @__masked_store_blend_64(* nocapture, , - %mask) nounwind ') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; From fbed0ac56b2797e9c91e08bbb7d4d5121e92ee3d Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 9 May 2012 10:31:53 -0700 Subject: [PATCH 141/173] Remove allOffMaskIsSafe from Target The intent of this was to indicate whether it was safe to run code with an 'all of' mask on the given target (and then sometimes be more flexible about e.g. running both true and false blocks of if statements, etc.) The problem is that even if the architecture has full native mask support, it's still not safe to run 'uniform' memory operations with the mask all off. Even more tricky, we sometimes transform masked varying memory operations to uniform ones during optimization (e.g. gather->load and broadcast). This fixes a number of the tests/switch-* tests that were failing on the generic targets due to this issue. --- ast.cpp | 5 ----- ispc.cpp | 13 ------------- ispc.h | 6 ------ 3 files changed, 24 deletions(-) diff --git a/ast.cpp b/ast.cpp index 1bf00a0e..96c41616 100644 --- a/ast.cpp +++ b/ast.cpp @@ -395,11 +395,6 @@ lCheckAllOffSafety(ASTNode *node, void *data) { return false; } - if (g->target.allOffMaskIsSafe == true) - // Don't worry about memory accesses if we have a target that can - // safely run them with the mask all off - return true; - IndexExpr *ie; if ((ie = dynamic_cast(node)) != NULL && ie->baseExpr != NULL) { const Type *type = ie->baseExpr->GetType(); diff --git a/ispc.cpp b/ispc.cpp index 9d1220d5..05ca7c07 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -206,7 +206,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->vectorWidth = 4; t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt"; t->maskingIsFree = false; - t->allOffMaskIsSafe = false; t->maskBitCount = 32; } else if (!strcasecmp(isa, "sse2-x2")) { @@ -215,7 +214,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->vectorWidth = 8; t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt"; t->maskingIsFree = false; - t->allOffMaskIsSafe = false; t->maskBitCount = 32; } else if (!strcasecmp(isa, "sse4")) { @@ -224,7 +222,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->vectorWidth = 4; t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov"; t->maskingIsFree = false; - t->allOffMaskIsSafe = false; t->maskBitCount = 32; } else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2")) { @@ -233,7 +230,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->vectorWidth = 8; t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov"; t->maskingIsFree = false; - t->allOffMaskIsSafe = false; t->maskBitCount = 32; } else if (!strcasecmp(isa, "generic-4")) { @@ -241,7 +237,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->nativeVectorWidth = 4; t->vectorWidth = 4; t->maskingIsFree = true; - t->allOffMaskIsSafe = true; t->maskBitCount = 1; } else if (!strcasecmp(isa, "generic-8")) { @@ -249,7 +244,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->nativeVectorWidth = 8; t->vectorWidth = 8; t->maskingIsFree = true; - t->allOffMaskIsSafe = true; t->maskBitCount = 1; } else if (!strcasecmp(isa, "generic-16")) { @@ -257,7 +251,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->nativeVectorWidth = 16; t->vectorWidth = 16; t->maskingIsFree = true; - t->allOffMaskIsSafe = true; t->maskBitCount = 1; t->hasHalf = true; t->hasTranscendentals = true; @@ -267,7 +260,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->nativeVectorWidth = 32; t->vectorWidth = 32; t->maskingIsFree = true; - t->allOffMaskIsSafe = true; t->maskBitCount = 1; t->hasHalf = true; t->hasTranscendentals = true; @@ -277,7 +269,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->nativeVectorWidth = 1; t->vectorWidth = 1; t->maskingIsFree = false; - t->allOffMaskIsSafe = false; t->maskBitCount = 32; } else if (!strcasecmp(isa, "avx")) { @@ -286,7 +277,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->vectorWidth = 8; t->attributes = "+avx,+popcnt,+cmov"; t->maskingIsFree = false; - t->allOffMaskIsSafe = false; t->maskBitCount = 32; } else if (!strcasecmp(isa, "avx-x2")) { @@ -295,7 +285,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->vectorWidth = 16; t->attributes = "+avx,+popcnt,+cmov"; t->maskingIsFree = false; - t->allOffMaskIsSafe = false; t->maskBitCount = 32; } #ifndef LLVM_3_0 @@ -305,7 +294,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->vectorWidth = 8; t->attributes = "+avx2,+popcnt,+cmov,+f16c"; t->maskingIsFree = false; - t->allOffMaskIsSafe = false; t->maskBitCount = 32; t->hasHalf = true; } @@ -315,7 +303,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->vectorWidth = 16; t->attributes = "+avx2,+popcnt,+cmov,+f16c"; t->maskingIsFree = false; - t->allOffMaskIsSafe = false; t->maskBitCount = 32; t->hasHalf = true; } diff --git a/ispc.h b/ispc.h index e2d9294d..bd170936 100644 --- a/ispc.h +++ b/ispc.h @@ -239,12 +239,6 @@ struct Target { natively. */ bool maskingIsFree; - /** Is it safe to run code with the mask all if: e.g. on SSE, the fast - gather trick assumes that at least one program instance is running - (so that it can safely assume that the array base pointer is - valid). */ - bool allOffMaskIsSafe; - /** How many bits are used to store each element of the mask: e.g. this is 32 on SSE/AVX, since that matches the HW better, but it's 1 for the generic target. */ From 625fbef6133ee02a23dde923282d7de65f713b08 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 15 May 2012 12:19:10 -0700 Subject: [PATCH 142/173] Fix Windows build --- module.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/module.cpp b/module.cpp index 7e83725f..78cbe26f 100644 --- a/module.cpp +++ b/module.cpp @@ -245,8 +245,13 @@ Module::Module(const char *fn) { GetDirectoryAndFileName(g->currentDirectory, filename, &directory, &name); char producerString[512]; +#if defined(BUILD_VERSION) && defined (BUILD_DATE) sprintf(producerString, "ispc version %s (build %s on %s)", ISPC_VERSION, BUILD_VERSION, BUILD_DATE); +#else + sprintf(producerString, "ispc version %s (built on %s)", + ISPC_VERSION, __DATE__); +#endif diBuilder->createCompileUnit(llvm::dwarf::DW_LANG_C99, /* lang */ name, /* filename */ directory, /* directory */ From f4df2fb176d9a4328d881a657c0e22effe54b218 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 16 May 2012 13:52:42 -0700 Subject: [PATCH 143/173] Improvements to mask update code for generic targets. Rather than XOR'ing with a temporary 'all-on' vector, we call __not. Also, we call out to __and_not1 and __and_not2, for an AND where the first or second operand, respectively, has had NOT applied to it. --- cbackend.cpp | 154 ++++++++++++++++++++++++++++++- examples/intrinsics/generic-16.h | 19 ++++ 2 files changed, 171 insertions(+), 2 deletions(-) diff --git a/cbackend.cpp b/cbackend.cpp index d43880dc..612358da 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -4327,14 +4327,14 @@ void CWriter::visitAtomicCmpXchgInst(AtomicCmpXchgInst &ACXI) { class SmearCleanupPass : public llvm::BasicBlockPass { public: - SmearCleanupPass(llvm::Module *m, int width) + SmearCleanupPass(Module *m, int width) : BasicBlockPass(ID) { module = m; vectorWidth = width; } const char *getPassName() const { return "Smear Cleanup Pass"; } bool runOnBasicBlock(llvm::BasicBlock &BB); static char ID; - llvm::Module *module; + Module *module; int vectorWidth; }; @@ -4475,6 +4475,155 @@ BitcastCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { return modifiedAny; } +/////////////////////////////////////////////////////////////////////////// +// MaskOpsCleanupPass + +/** This pass does various peephole improvements to mask modification + operations. In particular, it converts mask XORs with "all true" to + calls to __not() and replaces operations like and(not(a), b) to + __and_not1(a, b) (and similarly if the second operand has not applied + to it...) + */ +class MaskOpsCleanupPass : public llvm::BasicBlockPass { +public: + MaskOpsCleanupPass(Module *m) + : BasicBlockPass(ID) { + Type *mt = LLVMTypes::MaskType; + + // Declare the __not, __and_not1, and __and_not2 functions that we + // expect the target to end up providing. + notFunc = + dyn_cast(m->getOrInsertFunction("__not", mt, mt, NULL)); + assert(notFunc != NULL); + notFunc->addFnAttr(Attribute::NoUnwind); + notFunc->addFnAttr(Attribute::ReadNone); + + andNotFuncs[0] = + dyn_cast(m->getOrInsertFunction("__and_not1", mt, mt, mt, + NULL)); + assert(andNotFuncs[0] != NULL); + andNotFuncs[0]->addFnAttr(Attribute::NoUnwind); + andNotFuncs[0]->addFnAttr(Attribute::ReadNone); + + andNotFuncs[1] = + dyn_cast(m->getOrInsertFunction("__and_not2", mt, mt, mt, + NULL)); + assert(andNotFuncs[1] != NULL); + andNotFuncs[1]->addFnAttr(Attribute::NoUnwind); + andNotFuncs[1]->addFnAttr(Attribute::ReadNone); + } + + const char *getPassName() const { return "MaskOps Cleanup Pass"; } + bool runOnBasicBlock(llvm::BasicBlock &BB); + +private: + Value *lGetNotOperand(Value *v) const; + + Function *notFunc, *andNotFuncs[2]; + + static char ID; +}; + +char MaskOpsCleanupPass::ID = 0; + + +/** Returns true if the given value is a compile-time constant vector of + i1s with all elements 'true'. +*/ +static bool +lIsAllTrue(Value *v) { + if (ConstantVector *cv = dyn_cast(v)) { + ConstantInt *ci; + return (cv->getSplatValue() != NULL && + (ci = dyn_cast(cv->getSplatValue())) != NULL && + ci->isOne()); + } + +#ifndef LLVM_3_0 + if (ConstantDataVector *cdv = dyn_cast(v)) { + ConstantInt *ci; + return (cdv->getSplatValue() != NULL && + (ci = dyn_cast(cdv->getSplatValue())) != NULL && + ci->isOne()); + } +#endif + + return false; +} + + +/** Checks to see if the given value is the NOT of some other value. If + so, it returns the operand of the NOT; otherwise returns NULL. + */ +Value * +MaskOpsCleanupPass::lGetNotOperand(Value *v) const { + if (CallInst *ci = dyn_cast(v)) + if (ci->getCalledFunction() == notFunc) + // Direct call to __not() + return ci->getArgOperand(0); + + if (BinaryOperator *bop = dyn_cast(v)) + if (bop->getOpcode() == Instruction::Xor && + lIsAllTrue(bop->getOperand(1))) + // XOR of all-true vector. + return bop->getOperand(0); + + return NULL; +} + + +bool +MaskOpsCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { + bool modifiedAny = false; + + restart: + for (BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { + BinaryOperator *bop = dyn_cast(&*iter); + if (bop == NULL) + continue; + + if (bop->getType() != LLVMTypes::MaskType) + continue; + + if (bop->getOpcode() == Instruction::Xor) { + // Check for XOR with all-true values + if (lIsAllTrue(bop->getOperand(1))) { + ArrayRef arg(bop->getOperand(0)); + CallInst *notCall = CallInst::Create(notFunc, arg, + bop->getName()); + ReplaceInstWithInst(iter, notCall); + modifiedAny = true; + goto restart; + } + } + else if (bop->getOpcode() == Instruction::And) { + // Check each of the operands to see if they have NOT applied + // to them. + for (int i = 0; i < 2; ++i) { + if (Value *notOp = lGetNotOperand(bop->getOperand(i))) { + // In notOp we have the target of the NOT operation; + // put it in its appropriate spot in the operand array. + // Copy in the other operand directly. + Value *args[2]; + args[i] = notOp; + args[i ^ 1] = bop->getOperand(i ^ 1); + ArrayRef argsRef(&args[0], 2); + + // Call the appropriate __and_not* function. + CallInst *andNotCall = + CallInst::Create(andNotFuncs[i], argsRef, bop->getName()); + + ReplaceInstWithInst(iter, andNotCall); + modifiedAny = true; + goto restart; + } + } + } + } + + return modifiedAny; +} + //===----------------------------------------------------------------------===// // External Interface declaration @@ -4506,6 +4655,7 @@ WriteCXXFile(llvm::Module *module, const char *fn, int vectorWidth, pm.add(createCFGSimplificationPass()); // clean up after lower invoke. pm.add(new SmearCleanupPass(module, vectorWidth)); pm.add(new BitcastCleanupPass); + pm.add(new MaskOpsCleanupPass(module)); pm.add(createDeadCodeEliminationPass()); // clean up after smear pass //CO pm.add(createPrintModulePass(&fos)); pm.add(new CWriter(fos, includeName, vectorWidth)); diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index 4c696975..57eba63f 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -339,6 +339,24 @@ static FORCEINLINE __vec16_i1 __or(__vec16_i1 a, __vec16_i1 b) { return r; } +static FORCEINLINE __vec16_i1 __not(__vec16_i1 v) { + __vec16_i1 r; + r.v = ~v.v; + return r; +} + +static FORCEINLINE __vec16_i1 __and_not1(__vec16_i1 a, __vec16_i1 b) { + __vec16_i1 r; + r.v = ~a.v & b.v; + return r; +} + +static FORCEINLINE __vec16_i1 __and_not2(__vec16_i1 a, __vec16_i1 b) { + __vec16_i1 r; + r.v = a.v & ~b.v; + return r; +} + static FORCEINLINE __vec16_i1 __select(__vec16_i1 mask, __vec16_i1 a, __vec16_i1 b) { __vec16_i1 r; @@ -379,6 +397,7 @@ static FORCEINLINE __vec16_i1 __smear_i1(__vec16_i1, int v) { v, v, v, v, v, v, v, v); } + /////////////////////////////////////////////////////////////////////////// // int8 From 299ae186f1296d02ed645ab5f0889f944e3e63c9 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 18 May 2012 06:13:45 -0700 Subject: [PATCH 144/173] Expect support for half and transcendentals from all generic targets --- ispc.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ispc.cpp b/ispc.cpp index 05ca7c07..99155056 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -238,6 +238,8 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->vectorWidth = 4; t->maskingIsFree = true; t->maskBitCount = 1; + t->hasHalf = true; + t->hasTranscendentals = true; } else if (!strcasecmp(isa, "generic-8")) { t->isa = Target::GENERIC; @@ -245,6 +247,8 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->vectorWidth = 8; t->maskingIsFree = true; t->maskBitCount = 1; + t->hasHalf = true; + t->hasTranscendentals = true; } else if (!strcasecmp(isa, "generic-16")) { t->isa = Target::GENERIC; From 8d3ac3ac1e45d1e245c7e9e6dd0a41cc33b71462 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 18 May 2012 10:09:09 -0700 Subject: [PATCH 145/173] Fix build with LLVM ToT --- cbackend.cpp | 174 +-------------------------------------------------- 1 file changed, 1 insertion(+), 173 deletions(-) diff --git a/cbackend.cpp b/cbackend.cpp index 612358da..671a21ce 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -337,8 +337,6 @@ namespace { bool IsVolatile, unsigned Alignment); private : - std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c); - void lowerIntrinsics(Function &F); /// Prints the definition of the intrinsic function F. Supports the /// intrinsics which need to be explicitly defined in the CBackend. @@ -3801,181 +3799,11 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, } } -//This converts the llvm constraint string to something gcc is expecting. -//TODO: work out platform independent constraints and factor those out -// of the per target tables -// handle multiple constraint codes -std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { - assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle"); - - // Grab the translation table from MCAsmInfo if it exists. - const MCAsmInfo *TargetAsm; - std::string Triple = TheModule->getTargetTriple(); - if (Triple.empty()) -#ifdef LLVM_3_0 - Triple = llvm::sys::getHostTriple(); -#else - Triple = llvm::sys::getDefaultTargetTriple(); -#endif - - std::string E; - if (const llvm::Target *Match = TargetRegistry::lookupTarget(Triple, E)) - TargetAsm = Match->createMCAsmInfo(Triple); - else - return c.Codes[0]; - - const char *const *table = TargetAsm->getAsmCBE(); - - // Search the translation table if it exists. - for (int i = 0; table && table[i]; i += 2) - if (c.Codes[0] == table[i]) { - delete TargetAsm; - return table[i+1]; - } - - // Default is identity. - delete TargetAsm; - return c.Codes[0]; -} - -//TODO: import logic from AsmPrinter.cpp -static std::string gccifyAsm(std::string asmstr) { - for (std::string::size_type i = 0; i != asmstr.size(); ++i) - if (asmstr[i] == '\n') - asmstr.replace(i, 1, "\\n"); - else if (asmstr[i] == '\t') - asmstr.replace(i, 1, "\\t"); - else if (asmstr[i] == '$') { - if (asmstr[i + 1] == '{') { - std::string::size_type a = asmstr.find_first_of(':', i + 1); - std::string::size_type b = asmstr.find_first_of('}', i + 1); - std::string n = "%" + - asmstr.substr(a + 1, b - a - 1) + - asmstr.substr(i + 2, a - i - 2); - asmstr.replace(i, b - i + 1, n); - i += n.size() - 1; - } else - asmstr.replace(i, 1, "%"); - } - else if (asmstr[i] == '%')//grr - { asmstr.replace(i, 1, "%%"); ++i;} - - return asmstr; -} //TODO: assumptions about what consume arguments from the call are likely wrong // handle communitivity void CWriter::visitInlineAsm(CallInst &CI) { - InlineAsm* as = cast(CI.getCalledValue()); - InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints(); - - std::vector > ResultVals; - if (CI.getType() == Type::getVoidTy(CI.getContext())) - ; - else if (StructType *ST = dyn_cast(CI.getType())) { - for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) - ResultVals.push_back(std::make_pair(&CI, (int)i)); - } else { - ResultVals.push_back(std::make_pair(&CI, -1)); - } - - // Fix up the asm string for gcc and emit it. - Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n"; - Out << " :"; - - unsigned ValueCount = 0; - bool IsFirst = true; - - // Convert over all the output constraints. - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - - if (I->Type != InlineAsm::isOutput) { - ++ValueCount; - continue; // Ignore non-output constraints. - } - - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; - - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } - - // Unpack the dest. - Value *DestVal; - int DestValNo = -1; - - if (ValueCount < ResultVals.size()) { - DestVal = ResultVals[ValueCount].first; - DestValNo = ResultVals[ValueCount].second; - } else - DestVal = CI.getArgOperand(ValueCount-ResultVals.size()); - - if (I->isEarlyClobber) - C = "&"+C; - - Out << "\"=" << C << "\"(" << GetValueName(DestVal); - if (DestValNo != -1) - Out << ".field" << DestValNo; // Multiple retvals. - Out << ")"; - ++ValueCount; - } - - - // Convert over all the input constraints. - Out << "\n :"; - IsFirst = true; - ValueCount = 0; - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - if (I->Type != InlineAsm::isInput) { - ++ValueCount; - continue; // Ignore non-input constraints. - } - - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; - - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } - - assert(ValueCount >= ResultVals.size() && "Input can't refer to result"); - Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size()); - - Out << "\"" << C << "\"("; - if (!I->isIndirect) - writeOperand(SrcVal); - else - writeOperandDeref(SrcVal); - Out << ")"; - } - - // Convert over the clobber constraints. - IsFirst = true; - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - if (I->Type != InlineAsm::isClobber) - continue; // Ignore non-input constraints. - - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; - - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } - - Out << '\"' << C << '"'; - } - - Out << ")"; + assert(!"Inline assembly not supported"); } void CWriter::visitAllocaInst(AllocaInst &I) { From 72c41f104e1c8ed235a4ae32794565910b4f998b Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 18 May 2012 10:44:45 -0700 Subject: [PATCH 146/173] Fix various malformed program crashes. --- ctx.cpp | 11 +++++++++++ expr.cpp | 10 +++++++++- ispc.cpp | 5 ++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index cb99ac91..e15ee72b 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -2206,6 +2206,17 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, if (resultPtrType != NULL) Assert(ptrRefType != NULL); + llvm::PointerType *llvmPtrType = + llvm::dyn_cast(fullBasePtr->getType()); + if (llvmPtrType != NULL) { + llvm::StructType *llvmStructType = + llvm::dyn_cast(llvmPtrType->getElementType()); + if (llvmStructType->isSized() == false) { + Assert(m->errorCount > 0); + return NULL; + } + } + // (Unfortunately) it's not required to pass a non-NULL ptrRefType, but // if we have one, regularize into a pointer type. const PointerType *ptrType = NULL; diff --git a/expr.cpp b/expr.cpp index 8236e8e4..99cd5bf5 100644 --- a/expr.cpp +++ b/expr.cpp @@ -4813,6 +4813,9 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Value *mask = NULL; if (lvalue == NULL) { + if (m->errorCount > 0) + return NULL; + // As in the array case, this may be a temporary that hasn't hit // memory; get the full value and stuff it into a temporary array // so that we can index from there... @@ -4889,6 +4892,10 @@ MemberExpr::GetLValue(FunctionEmitContext *ctx) const { llvm::Value *ptr = ctx->AddElementOffset(basePtr, elementNumber, exprLValueType, basePtr->getName().str().c_str()); + if (ptr == NULL) { + Assert(m->errorCount > 0); + return NULL; + } ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, GetLValueType()); @@ -6606,7 +6613,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { // The only legal type conversions for structs are to go from a // uniform to a varying instance of the same struct type. Assert(toStruct->IsVaryingType() && fromStruct->IsUniformType() && - Type::Equal(toStruct, fromStruct->GetAsVaryingType())); + Type::EqualIgnoringConst(toStruct, + fromStruct->GetAsVaryingType())); llvm::Value *origValue = expr->GetValue(ctx); if (!origValue) diff --git a/ispc.cpp b/ispc.cpp index 99155056..ac429cb9 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -520,7 +520,10 @@ Target::StructOffset(llvm::Type *type, int element, Assert(td != NULL); llvm::StructType *structType = llvm::dyn_cast(type); - Assert(structType != NULL); + if (structType == NULL || structType->isSized() == false) { + Assert(m->errorCount > 0); + return NULL; + } const llvm::StructLayout *sl = td->getStructLayout(structType); Assert(sl != NULL); From 22d584f302604192dc0d07f937475751fae0bf22 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 18 May 2012 11:56:11 -0700 Subject: [PATCH 147/173] Don't issue perf. warnings for various conversions with generic target. --- expr.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/expr.cpp b/expr.cpp index 99cd5bf5..4dac53ee 100644 --- a/expr.cpp +++ b/expr.cpp @@ -5911,7 +5911,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_UINT16: case AtomicType::TYPE_UINT32: case AtomicType::TYPE_UINT64: - if (fromType->IsVaryingType()) + if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC) PerformanceWarning(pos, "Conversion from unsigned int to float is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to float @@ -6027,14 +6027,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, cast = ctx->TruncInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_FLOAT: - if (fromType->IsVaryingType()) + if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC) PerformanceWarning(pos, "Conversion from float to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int exprVal, targetType, cOpName); break; case AtomicType::TYPE_DOUBLE: - if (fromType->IsVaryingType()) + if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC) PerformanceWarning(pos, "Conversion from double to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int @@ -6107,7 +6107,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, cast = exprVal; break; case AtomicType::TYPE_FLOAT: - if (fromType->IsVaryingType()) + if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC) PerformanceWarning(pos, "Conversion from float to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int @@ -6120,7 +6120,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, cast = ctx->TruncInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_DOUBLE: - if (fromType->IsVaryingType()) + if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC) PerformanceWarning(pos, "Conversion from double to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int @@ -6195,7 +6195,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, cast = exprVal; break; case AtomicType::TYPE_FLOAT: - if (fromType->IsVaryingType()) + if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC) PerformanceWarning(pos, "Conversion from float to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int @@ -6206,7 +6206,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, cast = ctx->TruncInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_DOUBLE: - if (fromType->IsVaryingType()) + if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC) PerformanceWarning(pos, "Conversion from double to unsigned int is slow. " "Use \"int\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int @@ -6277,7 +6277,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, cast = ctx->ZExtInst(exprVal, targetType, cOpName); break; case AtomicType::TYPE_FLOAT: - if (fromType->IsVaryingType()) + if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC) PerformanceWarning(pos, "Conversion from float to unsigned int64 is slow. " "Use \"int64\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // signed int @@ -6288,7 +6288,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, cast = exprVal; break; case AtomicType::TYPE_DOUBLE: - if (fromType->IsVaryingType()) + if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC) PerformanceWarning(pos, "Conversion from double to unsigned int64 is slow. " "Use \"int64\" if possible"); cast = ctx->CastInst(llvm::Instruction::FPToUI, // signed int From 4d1eb94dfd5470402633fcb8ccba8d297f3ed732 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 18 May 2012 11:57:05 -0700 Subject: [PATCH 148/173] Fix bug in AddElementOffset() error checking. --- ctx.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctx.cpp b/ctx.cpp index e15ee72b..896f9346 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -2211,7 +2211,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, if (llvmPtrType != NULL) { llvm::StructType *llvmStructType = llvm::dyn_cast(llvmPtrType->getElementType()); - if (llvmStructType->isSized() == false) { + if (llvmStructType != NULL && llvmStructType->isSized() == false) { Assert(m->errorCount > 0); return NULL; } From 99f57cfda65076f39c4fdc7cfbac2ad2db132cd7 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 18 May 2012 12:00:11 -0700 Subject: [PATCH 149/173] Issue more sensible error message for varying pointers in exported functions. --- module.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/module.cpp b/module.cpp index 78cbe26f..47c21e34 100644 --- a/module.cpp +++ b/module.cpp @@ -554,8 +554,10 @@ static void lCheckForVaryingParameter(const Type *type, const std::string &name, SourcePos pos) { if (lRecursiveCheckValidParamType(type)) { - const Type *t = type->GetBaseType(); - if (CastType(t)) + if (CastType(type)) + Error(pos, "Varying pointer type parameter \"%s\" is illegal " + "in an exported function.", name.c_str()); + else if (CastType(type->GetBaseType())) Error(pos, "Struct parameter \"%s\" with varying member(s) is illegal " "in an exported function.", name.c_str()); else From 7dd4d6c75e102162902c06d67101fca81adf631b Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 22 May 2012 15:53:14 -0700 Subject: [PATCH 150/173] Update for LLVM 3.2dev API change --- type.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/type.cpp b/type.cpp index f7edc485..aa6a6e77 100644 --- a/type.cpp +++ b/type.cpp @@ -2583,7 +2583,12 @@ ReferenceType::GetDIType(llvm::DIDescriptor scope) const { } llvm::DIType diTargetType = targetType->GetDIType(scope); +#if defined(LLVM_3_0) || defined(LLVM_3_1) return m->diBuilder->createReferenceType(diTargetType); +#else + return m->diBuilder->createReferenceType(llvm::dwarf::DW_TAG_reference_type, + diTargetType); +#endif } From 333f901187a7ba99a5bb1d33fbc66ba4ede465c2 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 23 May 2012 14:19:50 -0700 Subject: [PATCH 151/173] Fix build with LLVM 3.2 dev top-of-tree --- type.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/type.cpp b/type.cpp index aa6a6e77..fc2f74f4 100644 --- a/type.cpp +++ b/type.cpp @@ -789,7 +789,11 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const { m->diBuilder->createEnumerationType(scope, name, diFile, pos.first_line, 32 /* size in bits */, 32 /* align in bits */, - elementArray); + elementArray +#if !defined(LLVM_3_0) && !defined(LLVM_3_1) + , llvm::DIType() +#endif + ); switch (variability.type) { From e8858150cbe016183c1cefd3b5be480c1cd13d4f Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 23 May 2012 14:20:20 -0700 Subject: [PATCH 152/173] Allow redundant semicolons at global scope. (Ingo Wald) --- parse.yy | 1 + 1 file changed, 1 insertion(+) diff --git a/parse.yy b/parse.yy index 605d5d7d..f72a9f65 100644 --- a/parse.yy +++ b/parse.yy @@ -1843,6 +1843,7 @@ external_declaration for (unsigned int i = 0; i < $1->declarators.size(); ++i) lAddDeclaration($1->declSpecs, $1->declarators[i]); } + | ';' ; function_definition From 2c5a57e386e1dd7a28a5b00fa591a3b4d9e43d55 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 23 May 2012 14:29:17 -0700 Subject: [PATCH 153/173] Fix bugs related to varying pointers to functions that return void. --- ctx.cpp | 3 ++- expr.cpp | 14 +++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index 896f9346..0554485f 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -3243,7 +3243,8 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, // Now, do a masked store into the memory allocated to // accumulate the result using the call mask. - if (callResult != NULL) { + if (callResult != NULL && + callResult->getType() != LLVMTypes::VoidType) { Assert(resultPtr != NULL); StoreInst(callResult, resultPtr, callMask, returnType, PointerType::GetUniform(returnType)); diff --git a/expr.cpp b/expr.cpp index 4dac53ee..e5d0050c 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3586,11 +3586,15 @@ FunctionCallExpr::TypeCheck() { Assert(funcType->GetParameterDefault(i) != NULL); } - if (fptrType->IsVaryingType() && - funcType->GetReturnType()->IsUniformType()) { - Error(pos, "Illegal to call a varying function pointer that " - "points to a function with a uniform return type."); - return NULL; + if (fptrType->IsVaryingType()) { + const Type *retType = funcType->GetReturnType(); + if (Type::Equal(retType, AtomicType::Void) == false && + retType->IsUniformType()) { + Error(pos, "Illegal to call a varying function pointer that " + "points to a function with a uniform return type \"%s\".", + funcType->GetReturnType()->GetString().c_str()); + return NULL; + } } } From fd03ba7586bbb9e54ddc21b8c6f42e90c9bd4b88 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 24 May 2012 07:12:48 -0700 Subject: [PATCH 154/173] Export reference parameters as C++ references, not pointers. --- examples/deferred/main.cpp | 2 +- type.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/deferred/main.cpp b/examples/deferred/main.cpp index 88dab2d6..17bd3f42 100644 --- a/examples/deferred/main.cpp +++ b/examples/deferred/main.cpp @@ -87,7 +87,7 @@ int main(int argc, char** argv) { framebuffer.clear(); reset_and_start_timer(); for (int j = 0; j < nframes; ++j) - ispc::RenderStatic(&input->header, &input->arrays, + ispc::RenderStatic(input->header, input->arrays, VISUALIZE_LIGHT_COUNT, framebuffer.r, framebuffer.g, framebuffer.b); double mcycles = get_elapsed_mcycles() / nframes; diff --git a/type.cpp b/type.cpp index fc2f74f4..ea61109d 100644 --- a/type.cpp +++ b/type.cpp @@ -2554,7 +2554,7 @@ ReferenceType::GetCDeclaration(const std::string &name) const { } else { std::string ret; - ret += targetType->GetCDeclaration("") + std::string(" *"); + ret += targetType->GetCDeclaration("") + std::string(" &"); if (lShouldPrintName(name)) ret += name; return ret; From d943455e1023cd2f1bbe977a3d43ca34ac2564f4 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 25 May 2012 10:28:01 -0700 Subject: [PATCH 155/173] Issue error on overloaded "export"ed functions. Issue #270. --- module.cpp | 12 ++++++++++++ tests_errors/export-multiple-name.ispc | 5 +++++ 2 files changed, 17 insertions(+) create mode 100644 tests_errors/export-multiple-name.ispc diff --git a/module.cpp b/module.cpp index 47c21e34..b5afc875 100644 --- a/module.cpp +++ b/module.cpp @@ -613,6 +613,18 @@ Module::AddFunctionDeclaration(const std::string &name, for (unsigned int i = 0; i < overloadFuncs.size(); ++i) { Symbol *overloadFunc = overloadFuncs[i]; + const FunctionType *overloadType = + CastType(overloadFunc->type); + if (overloadType == NULL) { + Assert(m->errorCount == 0); + continue; + } + + if (functionType->isExported || overloadType->isExported) + Error(pos, "Illegal to have \"export\" function with same name " + "as previously declared function (%s:%d).", + overloadFunc->pos.name, overloadFunc->pos.first_line); + // Check for a redeclaration of a function with the same // name and type if (Type::Equal(overloadFunc->type, functionType)) diff --git a/tests_errors/export-multiple-name.ispc b/tests_errors/export-multiple-name.ispc new file mode 100644 index 00000000..11a2c896 --- /dev/null +++ b/tests_errors/export-multiple-name.ispc @@ -0,0 +1,5 @@ +// Illegal to have "export" function with same name as previously declared function + +export void foo() { } + +export void foo(uniform int x) { } From 64807dfb3b369cb161ac74343b236821d08c669a Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 25 May 2012 10:59:45 -0700 Subject: [PATCH 156/173] Add AssertPos() macro that provides rough source location in error It can sometimes be useful to know the general place we were in the program when an assertion hit; when the position is available / applicable, this macro is now used. Issue #268. --- ctx.cpp | 254 ++++++++++++++++++++++----------------------- decl.cpp | 16 +-- expr.cpp | 308 +++++++++++++++++++++++++++---------------------------- ispc.h | 24 +++-- parse.yy | 42 ++++---- stmt.cpp | 60 +++++------ util.cpp | 29 +++++- 7 files changed, 380 insertions(+), 353 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index 0554485f..4e357873 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -284,7 +284,7 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, llvm::Constant *offFunc = m->module->getOrInsertFunction(buf, LLVMTypes::VoidType, NULL); - Assert(llvm::isa(offFunc)); + AssertPos(currentPos, llvm::isa(offFunc)); llvm::BasicBlock *offBB = llvm::BasicBlock::Create(*g->ctx, "entry", (llvm::Function *)offFunc, 0); @@ -302,18 +302,18 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, /* If debugging is enabled, tell the debug information emission code about this new function */ diFile = funcStartPos.GetDIFile(); - Assert(diFile.Verify()); + AssertPos(currentPos, diFile.Verify()); llvm::DIScope scope = llvm::DIScope(m->diBuilder->getCU()); - Assert(scope.Verify()); + AssertPos(currentPos, scope.Verify()); const FunctionType *functionType = function->GetType(); llvm::DIType diSubprogramType; if (functionType == NULL) - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); else { diSubprogramType = functionType->GetDIType(scope); - Assert(diSubprogramType.Verify()); + AssertPos(currentPos, diSubprogramType.Verify()); } std::string mangledName = llvmFunction->getName(); @@ -335,7 +335,7 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, #endif // !LLVM_3_0 flags, isOptimized, llvmFunction); - Assert(diSubprogram.Verify()); + AssertPos(currentPos, diSubprogram.Verify()); /* And start a scope representing the initial function scope */ StartScope(); @@ -344,8 +344,8 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, FunctionEmitContext::~FunctionEmitContext() { - Assert(controlFlowInfo.size() == 0); - Assert(debugScopes.size() == (m->diBuilder ? 1 : 0)); + AssertPos(currentPos, controlFlowInfo.size() == 0); + AssertPos(currentPos, debugScopes.size() == (m->diBuilder ? 1 : 0)); } @@ -439,7 +439,7 @@ FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *te void FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) { - Assert(bblock != NULL); + AssertPos(currentPos, bblock != NULL); llvm::Value *any = Any(GetFullMask()); BranchInst(btrue, bfalse, any); // It's illegal to add any additional instructions to the basic block @@ -450,7 +450,7 @@ FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock * void FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) { - Assert(bblock != NULL); + AssertPos(currentPos, bblock != NULL); llvm::Value *all = All(GetFullMask()); BranchInst(btrue, bfalse, all); // It's illegal to add any additional instructions to the basic block @@ -461,7 +461,7 @@ FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock * void FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) { - Assert(bblock != NULL); + AssertPos(currentPos, bblock != NULL); // switch sense of true/false bblocks BranchIfMaskAny(bfalse, btrue); // It's illegal to add any additional instructions to the basic block @@ -486,7 +486,7 @@ void FunctionEmitContext::EndIf() { CFInfo *ci = popCFState(); // Make sure we match up with a Start{Uniform,Varying}If(). - Assert(ci->IsIf()); + AssertPos(currentPos, ci->IsIf()); // 'uniform' ifs don't change the mask so we only need to restore the // mask going into the if for 'varying' if statements @@ -575,7 +575,7 @@ FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct, void FunctionEmitContext::EndLoop() { CFInfo *ci = popCFState(); - Assert(ci->IsLoop()); + AssertPos(currentPos, ci->IsLoop()); if (!ci->IsUniform()) // If the loop had a 'uniform' test, then it didn't make any @@ -609,7 +609,7 @@ FunctionEmitContext::StartForeach() { void FunctionEmitContext::EndForeach() { CFInfo *ci = popCFState(); - Assert(ci->IsForeach()); + AssertPos(currentPos, ci->IsForeach()); } @@ -654,7 +654,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) { "for/while/do loops and \"switch\" statements."); return; } - Assert(controlFlowInfo.size() > 0); + AssertPos(currentPos, controlFlowInfo.size() > 0); if (bblock == NULL) return; @@ -664,7 +664,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) { ifsInCFAllUniform(CFInfo::Switch)) { // We know that all program instances are executing the break, so // just jump to the block immediately after the switch. - Assert(breakTarget != NULL); + AssertPos(currentPos, breakTarget != NULL); BranchInst(breakTarget); bblock = NULL; return; @@ -689,7 +689,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) { // break. In these cases, we need to update the mask of the lanes // that have executed a 'break' statement: // breakLanes = breakLanes | mask - Assert(breakLanesPtr != NULL); + AssertPos(currentPos, breakLanesPtr != NULL); llvm::Value *mask = GetInternalMask(); llvm::Value *breakMask = LoadInst(breakLanesPtr, "break_mask"); @@ -728,7 +728,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) { "for/while/do/foreach loops."); return; } - Assert(controlFlowInfo.size() > 0); + AssertPos(currentPos, controlFlowInfo.size() > 0); if (ifsInCFAllUniform(CFInfo::Loop) || GetInternalMask() == LLVMMaskAllOn) { // Similarly to 'break' statements, we can immediately jump to the @@ -744,7 +744,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) { else { // Otherwise update the stored value of which lanes have 'continue'd. // continueLanes = continueLanes | mask - Assert(continueLanesPtr); + AssertPos(currentPos, continueLanesPtr); llvm::Value *mask = GetInternalMask(); llvm::Value *continueMask = LoadInst(continueLanesPtr, "continue_mask"); @@ -772,7 +772,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) { */ bool FunctionEmitContext::ifsInCFAllUniform(int type) const { - Assert(controlFlowInfo.size() > 0); + AssertPos(currentPos, controlFlowInfo.size() > 0); // Go backwards through controlFlowInfo, since we add new nested scopes // to the back. Stop once we come to the first enclosing control flow // structure of the desired type. @@ -783,7 +783,7 @@ FunctionEmitContext::ifsInCFAllUniform(int type) const { return false; --i; } - Assert(i >= 0); // else we didn't find the expected control flow type! + AssertPos(currentPos, i >= 0); // else we didn't find the expected control flow type! return true; } @@ -791,7 +791,7 @@ FunctionEmitContext::ifsInCFAllUniform(int type) const { void FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) { llvm::Value *allDone = NULL; - Assert(continueLanesPtr != NULL); + AssertPos(currentPos, continueLanesPtr != NULL); if (breakLanesPtr == NULL) { // In a foreach loop, break and return are illegal, and // breakLanesPtr is NULL. In this case, the mask is guaranteed to @@ -884,7 +884,7 @@ FunctionEmitContext::StartSwitch(bool cfIsUniform, llvm::BasicBlock *bbBreak) { void FunctionEmitContext::EndSwitch() { - Assert(bblock != NULL); + AssertPos(currentPos, bblock != NULL); CFInfo *ci = popCFState(); if (ci->IsVarying() && bblock != NULL) @@ -903,7 +903,7 @@ FunctionEmitContext::addSwitchMaskCheck(llvm::Value *mask) { // Find the basic block for the case or default label immediately after // the current one in the switch statement--that's where we want to // jump if the mask is all off at this label. - Assert(nextBlocks->find(bblock) != nextBlocks->end()); + AssertPos(currentPos, nextBlocks->find(bblock) != nextBlocks->end()); llvm::BasicBlock *bbNext = nextBlocks->find(bblock)->second; // Jump to the next one of the mask is all off; otherwise jump to the @@ -917,11 +917,11 @@ FunctionEmitContext::addSwitchMaskCheck(llvm::Value *mask) { statement. */ llvm::Value * FunctionEmitContext::getMaskAtSwitchEntry() { - Assert(controlFlowInfo.size() > 0); + AssertPos(currentPos, controlFlowInfo.size() > 0); int i = controlFlowInfo.size() - 1; while (i >= 0 && controlFlowInfo[i]->type != CFInfo::Switch) --i; - Assert(i != -1); + AssertPos(currentPos, i != -1); return controlFlowInfo[i]->savedMask; } @@ -936,7 +936,7 @@ FunctionEmitContext::EmitDefaultLabel(bool checkMask, SourcePos pos) { // If there's a default label in the switch, a basic block for it // should have been provided in the previous call to SwitchInst(). - Assert(defaultBlock != NULL); + AssertPos(currentPos, defaultBlock != NULL); if (bblock != NULL) // The previous case in the switch fell through, or we're in a @@ -998,13 +998,13 @@ FunctionEmitContext::EmitCaseLabel(int value, bool checkMask, SourcePos pos) { // Find the basic block for this case statement. llvm::BasicBlock *bbCase = NULL; - Assert(caseBlocks != NULL); + AssertPos(currentPos, caseBlocks != NULL); for (int i = 0; i < (int)caseBlocks->size(); ++i) if ((*caseBlocks)[i].first == value) { bbCase = (*caseBlocks)[i].second; break; } - Assert(bbCase != NULL); + AssertPos(currentPos, bbCase != NULL); if (bblock != NULL) // fall through from the previous case @@ -1047,7 +1047,7 @@ FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault, const std::map &bbNext) { // The calling code should have called StartSwitch() before calling // SwitchInst(). - Assert(controlFlowInfo.size() && + AssertPos(currentPos, controlFlowInfo.size() && controlFlowInfo.back()->IsSwitch()); switchExpr = expr; @@ -1066,7 +1066,7 @@ FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault, if (expr->getType() == LLVMTypes::Int32Type) s->addCase(LLVMInt32(bbCases[i].first), bbCases[i].second); else { - Assert(expr->getType() == LLVMTypes::Int64Type); + AssertPos(currentPos, expr->getType() == LLVMTypes::Int64Type); s->addCase(LLVMInt64(bbCases[i].first), bbCases[i].second); } } @@ -1085,7 +1085,7 @@ FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault, // anyone. std::map::const_iterator iter; iter = nextBlocks->find(NULL); - Assert(iter != nextBlocks->end()); + AssertPos(currentPos, iter != nextBlocks->end()); llvm::BasicBlock *bbFirst = iter->second; BranchInst(bbFirst); bblock = NULL; @@ -1282,10 +1282,10 @@ FunctionEmitContext::LaneMask(llvm::Value *v) { std::vector mm; m->symbolTable->LookupFunction("__movmsk", &mm); if (g->target.maskBitCount == 1) - Assert(mm.size() == 1); + AssertPos(currentPos, mm.size() == 1); else // There should be one with signed int signature, one unsigned int. - Assert(mm.size() == 2); + AssertPos(currentPos, mm.size() == 2); // We can actually call either one, since both are i32s as far as // LLVM's type system is concerned... llvm::Function *fmm = mm[0]->function; @@ -1337,7 +1337,7 @@ FunctionEmitContext::CreateBasicBlock(const char *name) { llvm::Value * FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) { if (b == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1385,7 +1385,7 @@ lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) { void FunctionEmitContext::AddInstrumentationPoint(const char *note) { - Assert(note != NULL); + AssertPos(currentPos, note != NULL); if (!g->emitInstrumentation) return; @@ -1445,7 +1445,7 @@ FunctionEmitContext::StartScope() { m->diBuilder->createLexicalBlock(parentScope, diFile, currentPos.first_line, currentPos.first_column); - Assert(lexicalBlock.Verify()); + AssertPos(currentPos, lexicalBlock.Verify()); debugScopes.push_back(lexicalBlock); } } @@ -1454,7 +1454,7 @@ FunctionEmitContext::StartScope() { void FunctionEmitContext::EndScope() { if (m->diBuilder != NULL) { - Assert(debugScopes.size() > 0); + AssertPos(currentPos, debugScopes.size() > 0); debugScopes.pop_back(); } } @@ -1462,7 +1462,7 @@ FunctionEmitContext::EndScope() { llvm::DIScope FunctionEmitContext::GetDIScope() const { - Assert(debugScopes.size() > 0); + AssertPos(currentPos, debugScopes.size() > 0); return debugScopes.back(); } @@ -1474,7 +1474,7 @@ FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) { llvm::DIScope scope = GetDIScope(); llvm::DIType diType = sym->type->GetDIType(scope); - Assert(diType.Verify()); + AssertPos(currentPos, diType.Verify()); llvm::DIVariable var = m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_auto_variable, scope, @@ -1483,7 +1483,7 @@ FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) { sym->pos.first_line, diType, true /* preserve through opts */); - Assert(var.Verify()); + AssertPos(currentPos, var.Verify()); llvm::Instruction *declareInst = m->diBuilder->insertDeclare(sym->storagePtr, var, bblock); AddDebugPos(declareInst, &sym->pos, &scope); @@ -1497,7 +1497,7 @@ FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym, int argNum) { llvm::DIScope scope = diSubprogram; llvm::DIType diType = sym->type->GetDIType(scope); - Assert(diType.Verify()); + AssertPos(currentPos, diType.Verify()); int flags = 0; llvm::DIVariable var = @@ -1510,7 +1510,7 @@ FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym, int argNum) { true /* preserve through opts */, flags, argNum+1); - Assert(var.Verify()); + AssertPos(currentPos, var.Verify()); llvm::Instruction *declareInst = m->diBuilder->insertDeclare(sym->storagePtr, var, bblock); AddDebugPos(declareInst, &sym->pos, &scope); @@ -1545,11 +1545,11 @@ FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, const char *name) { if (v0 == NULL || v1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } - Assert(v0->getType() == v1->getType()); + AssertPos(currentPos, v0->getType() == v1->getType()); llvm::Type *type = v0->getType(); int arraySize = lArrayVectorWidth(type); if (arraySize == 0) { @@ -1577,7 +1577,7 @@ FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value * FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) { if (v == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1632,11 +1632,11 @@ FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, llvm::Value *v0, llvm::Value *v1, const char *name) { if (v0 == NULL || v1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } - Assert(v0->getType() == v1->getType()); + AssertPos(currentPos, v0->getType() == v1->getType()); llvm::Type *type = v0->getType(); int arraySize = lArrayVectorWidth(type); if (arraySize == 0) { @@ -1663,7 +1663,7 @@ FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, llvm::Value * FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) { if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1697,7 +1697,7 @@ llvm::Value * FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1713,7 +1713,7 @@ FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, llvm::Value * FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) { if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1734,7 +1734,7 @@ llvm::Value * FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, const char *name) { if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1750,7 +1750,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits()) return TruncInst(value, toType, name); else { - Assert(fromType->getScalarSizeInBits() < + AssertPos(currentPos, fromType->getScalarSizeInBits() < toType->getScalarSizeInBits()); return ZExtInst(value, toType, name); } @@ -1766,7 +1766,7 @@ llvm::Value * FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType, const char *name) { if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1782,7 +1782,7 @@ FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType, else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits()) return TruncInst(value, toType, name); else { - Assert(fromType->getScalarSizeInBits() < + AssertPos(currentPos, fromType->getScalarSizeInBits() < toType->getScalarSizeInBits()); return ZExtInst(value, toType, name); } @@ -1799,7 +1799,7 @@ llvm::Instruction * FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1818,7 +1818,7 @@ llvm::Instruction * FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1838,7 +1838,7 @@ llvm::Instruction * FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1857,7 +1857,7 @@ llvm::Instruction * FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1876,7 +1876,7 @@ llvm::Instruction * FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type, const char *name) { if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -1953,7 +1953,7 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, // index must be varying for this method to be called. bool baseIsUniform = (llvm::isa(basePtr->getType())); - Assert(baseIsUniform == false || indexIsVarying == true); + AssertPos(currentPos, baseIsUniform == false || indexIsVarying == true); llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr) : basePtr; // newPtr = ptr + offset @@ -2057,7 +2057,7 @@ llvm::Value * FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrRefType, const char *name) { if (basePtr == NULL || index == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -2067,11 +2067,11 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); else { ptrType = CastType(ptrRefType); - Assert(ptrType != NULL); + AssertPos(currentPos, ptrType != NULL); } if (ptrType->IsSlice()) { - Assert(llvm::isa(basePtr->getType())); + AssertPos(currentPos, llvm::isa(basePtr->getType())); llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1); if (ptrType->IsFrozenSlice() == false) { @@ -2099,9 +2099,9 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, // Double-check consistency between the given pointer type and its LLVM // type. if (ptrType->IsUniformType()) - Assert(llvm::isa(basePtr->getType())); + AssertPos(currentPos, llvm::isa(basePtr->getType())); else if (ptrType->IsVaryingType()) - Assert(llvm::isa(basePtr->getType())); + AssertPos(currentPos, llvm::isa(basePtr->getType())); bool indexIsVaryingType = llvm::isa(index->getType()); @@ -2127,7 +2127,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0 llvm::Value *index1, const Type *ptrRefType, const char *name) { if (basePtr == NULL || index0 == NULL || index1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -2137,14 +2137,14 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0 ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); else { ptrType = CastType(ptrRefType); - Assert(ptrType != NULL); + AssertPos(currentPos, ptrType != NULL); } if (ptrType->IsSlice()) { // Similar to the 1D GEP implementation above, for non-frozen slice // pointers we do the two-step indexing calculation and then pass // the new major index on to a recursive GEP call. - Assert(llvm::isa(basePtr->getType())); + AssertPos(currentPos, llvm::isa(basePtr->getType())); llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1); if (ptrType->IsFrozenSlice() == false) { llvm::Value *newSliceOffset; @@ -2185,7 +2185,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0 // out the type of ptr0. const Type *baseType = ptrType->GetBaseType(); const SequentialType *st = CastType(baseType); - Assert(st != NULL); + AssertPos(currentPos, st != NULL); bool ptr0IsUniform = llvm::isa(ptr0->getType()); @@ -2204,7 +2204,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, const Type *ptrRefType, const char *name, const PointerType **resultPtrType) { if (resultPtrType != NULL) - Assert(ptrRefType != NULL); + AssertPos(currentPos, ptrRefType != NULL); llvm::PointerType *llvmPtrType = llvm::dyn_cast(fullBasePtr->getType()); @@ -2212,7 +2212,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, llvm::StructType *llvmStructType = llvm::dyn_cast(llvmPtrType->getElementType()); if (llvmStructType != NULL && llvmStructType->isSized() == false) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } } @@ -2226,7 +2226,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); else ptrType = CastType(ptrRefType); - Assert(ptrType != NULL); + AssertPos(currentPos, ptrType != NULL); } // Similarly, we have to see if the pointer type is a struct to see if @@ -2237,7 +2237,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, llvm::isa(fullBasePtr->getType()); const PointerType *rpt; if (baseIsSlicePtr) { - Assert(ptrType != NULL); + AssertPos(currentPos, ptrType != NULL); // Update basePtr to just be the part that actually points to the // start of an soa<> struct for now; the element offset computation // doesn't change the slice offset, so we'll incorporate that into @@ -2250,10 +2250,10 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, // Return the pointer type of the result of this call, for callers that // want it. if (resultPtrType != NULL) { - Assert(ptrType != NULL); + AssertPos(currentPos, ptrType != NULL); const CollectionType *ct = CastType(ptrType->GetBaseType()); - Assert(ct != NULL); + AssertPos(currentPos, ct != NULL); *resultPtrType = new PointerType(ct->GetElementType(elementNum), ptrType->GetVariability(), ptrType->IsConstType(), @@ -2285,7 +2285,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, // type of the vector. const SequentialType *st = CastType(ptrType->GetBaseType()); - Assert(st != NULL); + AssertPos(currentPos, st != NULL); llvm::Value *size = g->target.SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock); llvm::Value *scale = (g->target.is32Bit || g->opt.force32BitAddressing) ? @@ -2317,13 +2317,13 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, llvm::Value * FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) { if (ptr == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } llvm::PointerType *pt = llvm::dyn_cast(ptr->getType()); - Assert(pt != NULL); + AssertPos(currentPos, pt != NULL); if (name == NULL) name = LLVMGetName(ptr, "_load"); @@ -2419,11 +2419,11 @@ llvm::Value * FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrRefType, const char *name) { if (ptr == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } - Assert(ptrRefType != NULL && mask != NULL); + AssertPos(currentPos, ptrRefType != NULL && mask != NULL); if (name == NULL) name = LLVMGetName(ptr, "_load"); @@ -2433,7 +2433,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); else { ptrType = CastType(ptrRefType); - Assert(ptrType != NULL); + AssertPos(currentPos, ptrType != NULL); } if (ptrType->IsUniformType()) { @@ -2476,7 +2476,7 @@ llvm::Value * FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask, const char *name) { // We should have a varying pointer if we get here... - Assert(ptrType->IsVaryingType()); + AssertPos(currentPos, ptrType->IsVaryingType()); const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType(); llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx); @@ -2534,13 +2534,13 @@ FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, funcName = g->target.is32Bit ? "__pseudo_gather32_16" : "__pseudo_gather64_16"; else { - Assert(llvmReturnType == LLVMTypes::Int8VectorType); + AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType); funcName = g->target.is32Bit ? "__pseudo_gather32_8" : "__pseudo_gather64_8"; } llvm::Function *gatherFunc = m->module->getFunction(funcName); - Assert(gatherFunc != NULL); + AssertPos(currentPos, gatherFunc != NULL); llvm::Value *call = CallInst(gatherFunc, NULL, ptr, mask, name); @@ -2591,7 +2591,7 @@ FunctionEmitContext::AllocaInst(llvm::Type *llvmType, const char *name, int align, bool atEntryBlock) { if (llvmType == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -2600,7 +2600,7 @@ FunctionEmitContext::AllocaInst(llvm::Type *llvmType, // We usually insert it right before the jump instruction at the // end of allocaBlock llvm::Instruction *retInst = allocaBlock->getTerminator(); - Assert(retInst); + AssertPos(currentPos, retInst); inst = new llvm::AllocaInst(llvmType, name ? name : "", retInst); } else @@ -2636,12 +2636,12 @@ void FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, llvm::Value *mask) { if (value == NULL || ptr == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return; } - Assert(CastType(ptrType) != NULL); - Assert(ptrType->IsUniformType()); + AssertPos(currentPos, CastType(ptrType) != NULL); + AssertPos(currentPos, ptrType->IsUniformType()); const Type *valueType = ptrType->GetBaseType(); const CollectionType *collectionType = CastType(valueType); @@ -2662,7 +2662,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, // We must have a regular atomic, enumerator, or pointer type at this // point. - Assert(Type::IsBasicType(valueType)); + AssertPos(currentPos, Type::IsBasicType(valueType)); valueType = valueType->GetAsNonConstType(); // Figure out if we need a 8, 16, 32 or 64-bit masked store. @@ -2672,7 +2672,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, if (pt != NULL) { if (pt->IsSlice()) { // Masked store of (varying) slice pointer. - Assert(pt->IsVaryingType()); + AssertPos(currentPos, pt->IsVaryingType()); // First, extract the pointer from the slice struct and masked // store that. @@ -2742,7 +2742,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, ptr = BitCastInst(ptr, LLVMTypes::Int8VectorPointerType, LLVMGetName(ptr, "_to_int8vecptr")); } - Assert(maskedStoreFunc != NULL); + AssertPos(currentPos, maskedStoreFunc != NULL); std::vector args; args.push_back(ptr); @@ -2764,8 +2764,8 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, const Type *valueType, const Type *origPt, llvm::Value *mask) { const PointerType *ptrType = CastType(origPt); - Assert(ptrType != NULL); - Assert(ptrType->IsVaryingType()); + AssertPos(currentPos, ptrType != NULL); + AssertPos(currentPos, ptrType->IsVaryingType()); const CollectionType *srcCollectionType = CastType(valueType); @@ -2780,7 +2780,7 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, // instances of the struct type, etc. const CollectionType *dstCollectionType = CastType(ptrType->GetBaseType()); - Assert(dstCollectionType != NULL); + AssertPos(currentPos, dstCollectionType != NULL); // Scatter the collection elements individually for (int i = 0; i < srcCollectionType->GetElementCount(); ++i) { @@ -2827,7 +2827,7 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, const PointerType *pt = CastType(valueType); // And everything should be a pointer or atomic from here on out... - Assert(pt != NULL || CastType(valueType) != NULL); + AssertPos(currentPos, pt != NULL || CastType(valueType) != NULL); llvm::Type *type = value->getType(); const char *funcName = NULL; @@ -2854,7 +2854,7 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, "__pseudo_scatter64_8"; llvm::Function *scatterFunc = m->module->getFunction(funcName); - Assert(scatterFunc != NULL); + AssertPos(currentPos, scatterFunc != NULL); AddInstrumentationPoint("scatter"); @@ -2873,7 +2873,7 @@ void FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) { if (value == NULL || ptr == NULL) { // may happen due to error elsewhere - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return; } @@ -2898,7 +2898,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, const Type *ptrRefType) { if (value == NULL || ptr == NULL) { // may happen due to error elsewhere - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return; } @@ -2907,7 +2907,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, ptrType = PointerType::GetUniform(ptrRefType->GetReferenceTarget()); else { ptrType = CastType(ptrRefType); - Assert(ptrType != NULL); + AssertPos(currentPos, ptrType != NULL); } // Figure out what kind of store we're doing here @@ -2926,7 +2926,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, maskedStore(value, ptr, ptrType, mask); } else { - Assert(ptrType->IsVaryingType()); + AssertPos(currentPos, ptrType->IsVaryingType()); // We have a varying ptr (an array of pointers), so it's time to // scatter scatter(value, ptr, valueType, ptrType, GetFullMask()); @@ -2940,7 +2940,7 @@ void FunctionEmitContext::storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask, const Type *valueType, const PointerType *ptrType) { - Assert(Type::EqualIgnoringConst(ptrType->GetBaseType()->GetAsUniformType(), + AssertPos(currentPos, Type::EqualIgnoringConst(ptrType->GetBaseType()->GetAsUniformType(), valueType)); const CollectionType *ct = CastType(valueType); @@ -2959,7 +2959,7 @@ FunctionEmitContext::storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, else { // We're finally at a leaf SOA array; apply the slice offset and // then we can do a final regular store - Assert(Type::IsBasicType(valueType)); + AssertPos(currentPos, Type::IsBasicType(valueType)); ptr = lFinalSliceOffset(this, ptr, &ptrType); StoreInst(value, ptr); } @@ -2972,7 +2972,7 @@ FunctionEmitContext::MemcpyInst(llvm::Value *dest, llvm::Value *src, dest = BitCastInst(dest, LLVMTypes::VoidPointerType); src = BitCastInst(src, LLVMTypes::VoidPointerType); if (count->getType() != LLVMTypes::Int64Type) { - Assert(count->getType() == LLVMTypes::Int32Type); + AssertPos(currentPos, count->getType() == LLVMTypes::Int32Type); count = ZExtInst(count, LLVMTypes::Int64Type, "count_to_64"); } if (align == NULL) @@ -2983,8 +2983,8 @@ FunctionEmitContext::MemcpyInst(llvm::Value *dest, llvm::Value *src, LLVMTypes::VoidType, LLVMTypes::VoidPointerType, LLVMTypes::VoidPointerType, LLVMTypes::Int64Type, LLVMTypes::Int32Type, LLVMTypes::BoolType, NULL); - Assert(mcFunc != NULL); - Assert(llvm::isa(mcFunc)); + AssertPos(currentPos, mcFunc != NULL); + AssertPos(currentPos, llvm::isa(mcFunc)); std::vector args; args.push_back(dest); @@ -3008,7 +3008,7 @@ FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock, llvm::BasicBlock *falseBlock, llvm::Value *test) { if (test == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return; } @@ -3021,7 +3021,7 @@ FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock, llvm::Value * FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) { if (v == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -3045,7 +3045,7 @@ llvm::Value * FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name) { if (v == NULL || eltVal == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -3080,7 +3080,7 @@ llvm::Instruction * FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, const char *name) { if (test == NULL || val0 == NULL || val1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -3123,7 +3123,7 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector &args, const char *name) { if (func == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } @@ -3132,7 +3132,7 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, // isn't the case for things like intrinsics, builtins, and extern "C" // functions from the application. Add the mask if it's needed. unsigned int calleeArgCount = lCalleeArgCount(func, funcType); - Assert(argVals.size() + 1 == calleeArgCount || + AssertPos(currentPos, argVals.size() + 1 == calleeArgCount || argVals.size() == calleeArgCount); if (argVals.size() + 1 == calleeArgCount) argVals.push_back(GetFullMask()); @@ -3197,7 +3197,7 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *currentMask = LoadInst(maskPtr); llvm::Function *cttz = m->module->getFunction("__count_trailing_zeros_i32"); - Assert(cttz != NULL); + AssertPos(currentPos, cttz != NULL); llvm::Value *firstLane = CallInst(cttz, NULL, LaneMask(currentMask), "first_lane"); @@ -3245,12 +3245,12 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, // accumulate the result using the call mask. if (callResult != NULL && callResult->getType() != LLVMTypes::VoidType) { - Assert(resultPtr != NULL); + AssertPos(currentPos, resultPtr != NULL); StoreInst(callResult, resultPtr, callMask, returnType, PointerType::GetUniform(returnType)); } else - Assert(resultPtr == NULL); + AssertPos(currentPos, resultPtr == NULL); // Update the mask to turn off the program instances for which // we just called the function. @@ -3310,7 +3310,7 @@ FunctionEmitContext::ReturnInst() { rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock); } else { - Assert(Type::Equal(function->GetReturnType(), AtomicType::Void)); + AssertPos(currentPos, Type::Equal(function->GetReturnType(), AtomicType::Void)); rinst = llvm::ReturnInst::Create(*g->ctx, bblock); } @@ -3325,25 +3325,25 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee, std::vector &argVals, llvm::Value *launchCount) { if (callee == NULL) { - Assert(m->errorCount > 0); + AssertPos(currentPos, m->errorCount > 0); return NULL; } launchedTasks = true; - Assert(llvm::isa(callee)); + AssertPos(currentPos, llvm::isa(callee)); llvm::Type *argType = (llvm::dyn_cast(callee))->arg_begin()->getType(); - Assert(llvm::PointerType::classof(argType)); + AssertPos(currentPos, llvm::PointerType::classof(argType)); llvm::PointerType *pt = llvm::dyn_cast(argType); - Assert(llvm::StructType::classof(pt->getElementType())); + AssertPos(currentPos, llvm::StructType::classof(pt->getElementType())); llvm::StructType *argStructType = static_cast(pt->getElementType()); - Assert(argStructType->getNumElements() == argVals.size() + 1); + AssertPos(currentPos, argStructType->getNumElements() == argVals.size() + 1); llvm::Function *falloc = m->module->getFunction("ISPCAlloc"); - Assert(falloc != NULL); + AssertPos(currentPos, falloc != NULL); llvm::Value *structSize = g->target.SizeOf(argStructType, bblock); if (structSize->getType() != LLVMTypes::Int64Type) // ISPCAlloc expects the size as an uint64_t, but on 32-bit @@ -3378,7 +3378,7 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee, // argument block we just filled in llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType); llvm::Function *flaunch = m->module->getFunction("ISPCLaunch"); - Assert(flaunch != NULL); + AssertPos(currentPos, flaunch != NULL); std::vector args; args.push_back(launchGroupHandlePtr); args.push_back(fptr); @@ -3427,7 +3427,7 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType) { // This should only be called for varying pointers const PointerType *pt = CastType(ptrType); - Assert(pt && pt->IsVaryingType()); + AssertPos(currentPos, pt && pt->IsVaryingType()); const Type *baseType = ptrType->GetBaseType(); if (Type::IsBasicType(baseType) == false) @@ -3464,7 +3464,7 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, CFInfo * FunctionEmitContext::popCFState() { - Assert(controlFlowInfo.size() > 0); + AssertPos(currentPos, controlFlowInfo.size() > 0); CFInfo *ci = controlFlowInfo.back(); controlFlowInfo.pop_back(); @@ -3488,7 +3488,7 @@ FunctionEmitContext::popCFState() { loopMask = ci->savedLoopMask; } else { - Assert(ci->IsIf()); + AssertPos(currentPos, ci->IsIf()); // nothing to do } diff --git a/decl.cpp b/decl.cpp index 728206fd..7cf2b5fb 100644 --- a/decl.cpp +++ b/decl.cpp @@ -231,7 +231,7 @@ Declarator::InitFromDeclSpecs(DeclSpecs *ds) { InitFromType(baseType, ds); if (type == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } @@ -319,8 +319,8 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { if (kind == DK_BASE) { // All of the type qualifiers should be in the DeclSpecs for the // base declarator - Assert(typeQualifiers == 0); - Assert(child == NULL); + AssertPos(pos, typeQualifiers == 0); + AssertPos(pos, child == NULL); type = baseType; } else if (kind == DK_POINTER) { @@ -398,7 +398,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { Declaration *d = functionParams[i]; if (d == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); continue; } if (d->declarators.size() == 0) { @@ -408,10 +408,10 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { d->declarators[0]->InitFromDeclSpecs(d->declSpecs); } - Assert(d->declarators.size() == 1); + AssertPos(pos, d->declarators.size() == 1); Declarator *decl = d->declarators[0]; if (decl == NULL || decl->type == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); continue; } @@ -446,7 +446,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { // significant problem.) const Type *targetType = at->GetElementType(); if (targetType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } @@ -525,7 +525,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { } if (child == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } diff --git a/expr.cpp b/expr.cpp index e5d0050c..3eaaa96f 100644 --- a/expr.cpp +++ b/expr.cpp @@ -148,7 +148,7 @@ lMaybeIssuePrecisionWarning(const AtomicType *toAtomicType, static Expr * lArrayToPointer(Expr *expr) { - Assert(expr && CastType(expr->GetType())); + AssertPos(expr->pos, expr && CastType(expr->GetType())); Expr *zero = new ConstExpr(AtomicType::UniformInt32, 0, expr->pos); Expr *index = new IndexExpr(expr, zero, expr->pos); @@ -189,7 +189,7 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, bool failureOk, const char *errorMsgBase, SourcePos pos) { /* This function is way too long and complex. Is type conversion stuff always this messy, or can this be cleaned up somehow? */ - Assert(failureOk || errorMsgBase != NULL); + AssertPos(pos, failureOk || errorMsgBase != NULL); if (toType == NULL || fromType == NULL) return false; @@ -465,7 +465,7 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, fromArrayType->GetElementType())) { // the case of different element counts should have returned // successfully earlier, yes?? - Assert(toArrayType->GetElementCount() != fromArrayType->GetElementCount()); + AssertPos(pos, toArrayType->GetElementCount() != fromArrayType->GetElementCount()); goto typecast_ok; } else if (Type::Equal(toArrayType->GetElementType(), @@ -521,7 +521,7 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, // enum -> atomic (integer, generally...) is always ok if (fromEnumType != NULL) { - Assert(toAtomicType != NULL || toVectorType != NULL); + AssertPos(pos, toAtomicType != NULL || toVectorType != NULL); goto typecast_ok; } @@ -643,7 +643,7 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr, // pointer we have. llvm::Type *llvmType = symType->LLVMType(g->ctx); if (llvmType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } @@ -757,7 +757,7 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr, collectionType ? collectionType->GetElementType(i) : symType->GetAsUniformType(); if (elementType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } @@ -776,7 +776,7 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr, // rest as zero. llvm::Type *llvmType = elementType->LLVMType(g->ctx); if (llvmType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } @@ -1082,7 +1082,7 @@ lEmitNegate(Expr *arg, SourcePos pos, FunctionEmitContext *ctx) { return ctx->BinaryOperator(llvm::Instruction::FSub, zero, argVal, LLVMGetName(argVal, "_negate")); else { - Assert(type->IsIntType()); + AssertPos(pos, type->IsIntType()); return ctx->BinaryOperator(llvm::Instruction::Sub, zero, argVal, LLVMGetName(argVal, "_negate")); } @@ -1217,7 +1217,7 @@ UnaryExpr::Optimize() { FATAL("unexpected type in UnaryExpr::Optimize() / BitNot case"); } case LogicalNot: { - Assert(Type::EqualIgnoringConst(type, AtomicType::UniformBool) || + AssertPos(pos, Type::EqualIgnoringConst(type, AtomicType::UniformBool) || Type::EqualIgnoringConst(type, AtomicType::VaryingBool)); bool v[ISPC_MAX_NVEC]; int count = constExpr->AsBool(v); @@ -1410,7 +1410,7 @@ lEmitBinaryPointerArith(BinaryExpr::Op op, llvm::Value *value0, break; case BinaryExpr::Sub: { if (CastType(type1) != NULL) { - Assert(Type::Equal(type0, type1)); + AssertPos(pos, Type::Equal(type0, type1)); if (ptrType->IsSlice()) { llvm::Value *p0 = ctx->ExtractInst(value0, 0); @@ -1421,7 +1421,7 @@ lEmitBinaryPointerArith(BinaryExpr::Op op, llvm::Value *value0, ctx, pos); int soaWidth = ptrType->GetBaseType()->GetSOAWidth(); - Assert(soaWidth > 0); + AssertPos(pos, soaWidth > 0); llvm::Value *soaScale = LLVMIntAsType(soaWidth, majorDelta->getType()); @@ -1509,7 +1509,7 @@ lEmitBinaryArith(BinaryExpr::Op op, llvm::Value *value0, llvm::Value *value1, return lEmitBinaryPointerArith(op, value0, value1, type0, type1, ctx, pos); else { - Assert(Type::EqualIgnoringConst(type0, type1)); + AssertPos(pos, Type::EqualIgnoringConst(type0, type1)); llvm::Instruction::BinaryOps inst; bool isFloatOp = type0->IsFloatType(); @@ -1632,7 +1632,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, const Type *type0 = arg0->GetType(), *type1 = arg1->GetType(); if (type0 == NULL || type1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -1650,17 +1650,17 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, // promote the uniform one to varying if (type0->IsUniformType() && type1->IsVaryingType()) { arg0 = TypeConvertExpr(arg0, AtomicType::VaryingBool, lOpString(op)); - Assert(arg0 != NULL); + AssertPos(pos, arg0 != NULL); } if (type1->IsUniformType() && type0->IsVaryingType()) { arg1 = TypeConvertExpr(arg1, AtomicType::VaryingBool, lOpString(op)); - Assert(arg1 != NULL); + AssertPos(pos, arg1 != NULL); } llvm::Value *value0 = arg0->GetValue(ctx); llvm::Value *value1 = arg1->GetValue(ctx); if (value0 == NULL || value1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -1668,7 +1668,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, return ctx->BinaryOperator(llvm::Instruction::And, value0, value1, "logical_and"); else { - Assert(op == BinaryExpr::LogicalOr); + AssertPos(pos, op == BinaryExpr::LogicalOr); return ctx->BinaryOperator(llvm::Instruction::Or, value0, value1, "logical_or"); } @@ -1686,7 +1686,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, // Evaluate the first operand llvm::Value *value0 = arg0->GetValue(ctx); if (value0 == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -1710,7 +1710,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, ctx->BranchInst(bbLogicalDone); } else { - Assert(op == BinaryExpr::LogicalAnd); + AssertPos(pos, op == BinaryExpr::LogicalAnd); // Conversely, for &&, if value0 is false, we skip evaluating // value1. @@ -1732,12 +1732,12 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, ctx->SetCurrentBasicBlock(bbEvalValue1); if (type1->IsUniformType() && retType->IsVaryingType()) { arg1 = TypeConvertExpr(arg1, AtomicType::VaryingBool, "logical op"); - Assert(arg1 != NULL); + AssertPos(pos, arg1 != NULL); } llvm::Value *value1 = arg1->GetValue(ctx); if (value1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } ctx->StoreInst(value1, retPtr); @@ -1759,7 +1759,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, // perform logical vector ops with its value. if (type1->IsUniformType()) { arg1 = TypeConvertExpr(arg1, AtomicType::VaryingBool, "logical op"); - Assert(arg1 != NULL); + AssertPos(pos, arg1 != NULL); type1 = arg1->GetType(); } @@ -1794,7 +1794,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, llvm::Value *value1 = arg1->GetValue(ctx); if (value1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -1812,7 +1812,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, ctx->BranchInst(bbLogicalDone); } else { - Assert(op == BinaryExpr::LogicalAnd); + AssertPos(pos, op == BinaryExpr::LogicalAnd); // If value0 is false for all currently running lanes, the // overall result must be false: this corresponds to checking @@ -1843,7 +1843,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, llvm::Value *value1 = arg1->GetValue(ctx); if (value1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -1875,7 +1875,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1, llvm::Value * BinaryExpr::GetValue(FunctionEmitContext *ctx) const { if (!arg0 || !arg1) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -1886,7 +1886,7 @@ BinaryExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Value *value0 = arg0->GetValue(ctx); llvm::Value *value1 = arg1->GetValue(ctx); if (value0 == NULL || value1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -1942,7 +1942,7 @@ BinaryExpr::GetType() const { // and will fail type checking and (int + ptr) should be canonicalized // into (ptr + int) by type checking. if (op == Add) - Assert(CastType(type1) == NULL); + AssertPos(pos, CastType(type1) == NULL); if (op == Comma) return arg1->GetType(); @@ -1967,14 +1967,14 @@ BinaryExpr::GetType() const { } // otherwise fall through for these... - Assert(op == Lt || op == Gt || op == Le || op == Ge || + AssertPos(pos, op == Lt || op == Gt || op == Le || op == Ge || op == Equal || op == NotEqual); } const Type *exprType = Type::MoreGeneralType(type0, type1, pos, lOpString(op)); // I don't think that MoreGeneralType should be able to fail after the // checks done in BinaryExpr::TypeCheck(). - Assert(exprType != NULL); + AssertPos(pos, exprType != NULL); switch (op) { case Add: @@ -2159,7 +2159,7 @@ BinaryExpr::Optimize() { std::vector rcpFuns; m->symbolTable->LookupFunction("rcp", &rcpFuns); if (rcpFuns.size() > 0) { - Assert(rcpFuns.size() == 2); + AssertPos(pos, rcpFuns.size() == 2); Expr *rcpSymExpr = new FunctionSymbolExpr("rcp", rcpFuns, pos); ExprList *args = new ExprList(arg1, arg1->pos); Expr *rcpCall = new FunctionCallExpr(rcpSymExpr, args, @@ -2189,7 +2189,7 @@ BinaryExpr::Optimize() { if (constArg0 == NULL || constArg1 == NULL) return this; - Assert(Type::EqualIgnoringConst(arg0->GetType(), arg1->GetType())); + AssertPos(pos, Type::EqualIgnoringConst(arg0->GetType(), arg1->GetType())); const Type *type = arg0->GetType()->GetAsNonConstType(); if (Type::Equal(type, AtomicType::UniformFloat) || Type::Equal(type, AtomicType::VaryingFloat)) { @@ -2280,12 +2280,12 @@ BinaryExpr::TypeCheck() { if (CastType(type0) != NULL) { arg0 = new RefDerefExpr(arg0, arg0->pos); type0 = arg0->GetType(); - Assert(type0 != NULL); + AssertPos(pos, type0 != NULL); } if (CastType(type1) != NULL) { arg1 = new RefDerefExpr(arg1, arg1->pos); type1 = arg1->GetType(); - Assert(type1 != NULL); + AssertPos(pos, type1 != NULL); } // Convert arrays to pointers to their first elements @@ -2362,7 +2362,7 @@ BinaryExpr::TypeCheck() { std::swap(pt0, pt1); } - Assert(pt0 != NULL); + AssertPos(pos, pt0 != NULL); if (PointerType::IsVoidPointer(pt0)) { Error(pos, "Illegal to perform pointer arithmetic " @@ -2383,7 +2383,7 @@ BinaryExpr::TypeCheck() { arg0 = TypeConvertExpr(arg0, type0->GetAsVaryingType(), "pointer addition"); offsetType = offsetType->GetAsVaryingType(); - Assert(arg0 != NULL); + AssertPos(pos, arg0 != NULL); } arg1 = TypeConvertExpr(arg1, offsetType, lOpString(op)); @@ -2721,13 +2721,13 @@ AssignExpr::GetValue(FunctionEmitContext *ctx) const { const Type *ptrType = lvalue->GetLValueType(); const Type *valueType = rvalue->GetType(); if (ptrType == NULL || valueType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } llvm::Value *value = rvalue->GetValue(ctx); if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -2748,7 +2748,7 @@ AssignExpr::GetValue(FunctionEmitContext *ctx) const { case XorAssign: case OrAssign: { // This should be caught during type checking - Assert(!CastType(type) && + AssertPos(pos, !CastType(type) && !CastType(type)); return lEmitOpAssign(op, lvalue, rvalue, type, baseSym, pos, ctx); } @@ -2841,7 +2841,7 @@ AssignExpr::TypeCheck() { const Type *lhsType = lvalue->GetType(); if (lhsType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -2992,7 +2992,7 @@ SelectExpr::GetValue(FunctionEmitContext *ctx) const { const Type *testType = test->GetType()->GetAsNonConstType(); // This should be taken care of during typechecking - Assert(Type::Equal(testType->GetBaseType(), AtomicType::UniformBool) || + AssertPos(pos, Type::Equal(testType->GetBaseType(), AtomicType::UniformBool) || Type::Equal(testType->GetBaseType(), AtomicType::VaryingBool)); const Type *type = expr1->GetType(); @@ -3032,7 +3032,7 @@ SelectExpr::GetValue(FunctionEmitContext *ctx) const { else if (CastType(testType) == NULL) { // the test is a varying bool type llvm::Value *testVal = test->GetValue(ctx); - Assert(testVal->getType() == LLVMTypes::MaskType); + AssertPos(pos, testVal->getType() == LLVMTypes::MaskType); llvm::Value *oldMask = ctx->GetInternalMask(); llvm::Value *fullMask = ctx->GetFullMask(); @@ -3095,8 +3095,8 @@ SelectExpr::GetValue(FunctionEmitContext *ctx) const { ctx->SetDebugPos(pos); const VectorType *vt = CastType(type); // Things that typechecking should have caught - Assert(vt != NULL); - Assert(CastType(testType) != NULL && + AssertPos(pos, vt != NULL); + AssertPos(pos, CastType(testType) != NULL && (CastType(testType)->GetElementCount() == vt->GetElementCount())); @@ -3137,7 +3137,7 @@ SelectExpr::GetType() const { CastType(testType)->GetElementCount() : 0; int expr1VecSize = CastType(expr1Type) != NULL ? CastType(expr1Type)->GetElementCount() : 0; - Assert(!(testVecSize != 0 && expr1VecSize != 0 && testVecSize != expr1VecSize)); + AssertPos(pos, !(testVecSize != 0 && expr1VecSize != 0 && testVecSize != expr1VecSize)); int vectorSize = std::max(testVecSize, expr1VecSize); return Type::MoreGeneralType(expr1Type, expr2Type, Union(expr1->pos, expr2->pos), @@ -3182,9 +3182,9 @@ SelectExpr::Optimize() { if (constExpr1 == NULL || constExpr2 == NULL) return this; - Assert(Type::Equal(constExpr1->GetType(), constExpr2->GetType())); + AssertPos(pos, Type::Equal(constExpr1->GetType(), constExpr2->GetType())); const Type *exprType = constExpr1->GetType()->GetAsNonConstType(); - Assert(exprType->IsVaryingType()); + AssertPos(pos, exprType->IsVaryingType()); // FIXME: it's annoying to have to have all of this replicated code. if (Type::Equal(exprType, AtomicType::VaryingInt32) || @@ -3348,12 +3348,12 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Value *callee = func->GetValue(ctx); if (callee == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } const FunctionType *ft = lGetFunctionType(func); - Assert(ft != NULL); + AssertPos(pos, ft != NULL); bool isVoidFunc = Type::Equal(ft->GetReturnType(), AtomicType::Void); // Automatically convert function call args to references if needed. @@ -3366,7 +3366,7 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { // Specifically, this can happen if there's an error earlier during // overload resolution. if ((int)callargs.size() > ft->GetNumParameters()) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -3433,7 +3433,7 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Value *retVal = NULL; ctx->SetDebugPos(pos); if (ft->isTask) { - Assert(launchCountExpr != NULL); + AssertPos(pos, launchCountExpr != NULL); llvm::Value *launchCount = launchCountExpr->GetValue(ctx); if (launchCount != NULL) ctx->LaunchInst(callee, argVals, launchCount); @@ -3527,7 +3527,7 @@ FunctionCallExpr::TypeCheck() { if (isLaunch) Error(pos, "\"launch\" expression illegal with non-\"task\"-" "qualified function."); - Assert(launchCountExpr == NULL); + AssertPos(pos, launchCountExpr == NULL); } } else { @@ -3583,7 +3583,7 @@ FunctionCallExpr::TypeCheck() { // Otherwise the parameter default saves us. It should // be there for sure, given the check right above the // for loop. - Assert(funcType->GetParameterDefault(i) != NULL); + AssertPos(pos, funcType->GetParameterDefault(i) != NULL); } if (fptrType->IsVaryingType()) { @@ -3715,7 +3715,7 @@ ExprList::GetConstant(const Type *type) const { // conversion machinery handle it. expr = TypeConvertExpr(exprs[i], elementType, "initializer list"); if (expr == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } // Re-establish const-ness if possible @@ -3734,13 +3734,13 @@ ExprList::GetConstant(const Type *type) const { for (int i = (int)exprs.size(); i < collectionType->GetElementCount(); ++i) { const Type *elementType = collectionType->GetElementType(i); if (elementType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } llvm::Type *llvmType = elementType->LLVMType(g->ctx); if (llvmType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -3751,7 +3751,7 @@ ExprList::GetConstant(const Type *type) const { if (CastType(type) != NULL) { llvm::StructType *llvmStructType = llvm::dyn_cast(collectionType->LLVMType(g->ctx)); - Assert(llvmStructType != NULL); + AssertPos(pos, llvmStructType != NULL); return llvm::ConstantStruct::get(llvmStructType, cv); } else { @@ -3762,11 +3762,11 @@ ExprList::GetConstant(const Type *type) const { return llvm::ConstantArray::get(lat, cv); else { // uniform short vector type - Assert(type->IsUniformType() && + AssertPos(pos, type->IsUniformType() && CastType(type) != NULL); llvm::VectorType *lvt = llvm::dyn_cast(lt); - Assert(lvt != NULL); + AssertPos(pos, lvt != NULL); // Uniform short vectors are stored as vectors of length // rounded up to the native vector width. So we add additional @@ -3900,7 +3900,7 @@ lVaryingStructHasUniformMember(const Type *type, SourcePos pos) { for (int i = 0; i < st->GetElementCount(); ++i) { const Type *eltType = st->GetElementType(i); if (eltType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); continue; } @@ -3932,7 +3932,7 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { if (baseExpr == NULL || index == NULL || ((indexType = index->GetType()) == NULL) || ((returnType = GetType()) == NULL)) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -3955,7 +3955,7 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { const Type *baseExprType = baseExpr->GetType(); llvm::Value *val = baseExpr->GetValue(ctx); if (baseExprType == NULL || val == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } ctx->SetDebugPos(pos); @@ -3965,7 +3965,7 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { // Get a pointer type to the underlying elements const SequentialType *st = CastType(baseExprType); - Assert(st != NULL); + AssertPos(pos, st != NULL); lvType = PointerType::GetUniform(st->GetElementType()); // And do the indexing calculation into the temporary array in memory @@ -3977,7 +3977,7 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { } else { Symbol *baseSym = GetBaseSymbol(); - Assert(baseSym != NULL); + AssertPos(pos, baseSym != NULL); mask = lMaskForSymbol(baseSym, ctx); } @@ -4007,7 +4007,7 @@ IndexExpr::GetType() const { const SequentialType *sequentialType = CastType(baseExprType->GetReferenceTarget()); // Typechecking should have caught this... - Assert(sequentialType != NULL); + AssertPos(pos, sequentialType != NULL); elementType = sequentialType->GetElementType(); } @@ -4120,14 +4120,14 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const { const Type *baseExprType; if (baseExpr == NULL || index == NULL || ((baseExprType = baseExpr->GetType()) == NULL)) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } ctx->SetDebugPos(pos); llvm::Value *indexValue = index->GetValue(ctx); if (indexValue == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -4136,7 +4136,7 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const { // We're indexing off of a pointer llvm::Value *basePtrValue = baseExpr->GetValue(ctx); if (basePtrValue == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } ctx->SetDebugPos(pos); @@ -4159,11 +4159,11 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const { CastType(baseExprType)) { basePtr = baseExpr->GetLValue(ctx); basePtrType = CastType(baseExpr->GetLValueType()); - if (baseExpr->GetLValueType()) Assert(basePtrType != NULL); + if (baseExpr->GetLValueType()) AssertPos(pos, basePtrType != NULL); } else { baseExprType = baseExprType->GetReferenceTarget(); - Assert(CastType(baseExprType) || + AssertPos(pos, CastType(baseExprType) || CastType(baseExprType)); basePtr = baseExpr->GetValue(ctx); basePtrType = PointerType::GetUniform(baseExprType); @@ -4205,7 +4205,7 @@ IndexExpr::GetLValueType() const { const Type *refTarget = baseExprLValueType->GetReferenceTarget(); baseExprLValueType = PointerType::GetUniform(refTarget); } - Assert(CastType(baseExprLValueType) != NULL); + AssertPos(pos, CastType(baseExprLValueType) != NULL); // Find the type of thing that we're indexing into const Type *elementType; @@ -4216,7 +4216,7 @@ IndexExpr::GetLValueType() const { else { const PointerType *pt = CastType(baseExprLValueType->GetBaseType()); - Assert(pt != NULL); + AssertPos(pos, pt != NULL); elementType = pt->GetBaseType(); } @@ -4258,13 +4258,13 @@ IndexExpr::TypeCheck() { const Type *indexType; if (baseExpr == NULL || index == NULL || ((indexType = index->GetType()) == NULL)) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } const Type *baseExprType = baseExpr->GetType(); if (baseExprType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -4395,7 +4395,7 @@ StructMemberExpr::GetType() const { ((exprType = expr->GetType()) == NULL) || ((structType = getStructType()) == NULL) || ((lvalueType = GetLValueType()) == NULL)) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -4407,14 +4407,14 @@ StructMemberExpr::GetType() const { getCandidateNearMatches().c_str()); return NULL; } - Assert(Type::Equal(lvalueType->GetBaseType(), elementType)); + AssertPos(pos, Type::Equal(lvalueType->GetBaseType(), elementType)); bool isSlice = (CastType(lvalueType) && CastType(lvalueType)->IsSlice()); if (isSlice) { // FIXME: not true if we allow bound unif/varying for soa<> // structs?... - Assert(elementType->IsSOAType()); + AssertPos(pos, elementType->IsSOAType()); // If we're accessing a member of an soa structure via a uniform // slice pointer, then the result type is the uniform variant of @@ -4440,14 +4440,14 @@ StructMemberExpr::GetLValueType() const { return lvalueType; if (expr == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } const Type *exprLValueType = dereferenceExpr ? expr->GetType() : expr->GetLValueType(); if (exprLValueType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -4514,12 +4514,12 @@ StructMemberExpr::getStructType() const { structType = rt->GetReferenceTarget(); else { const PointerType *pt = CastType(type); - Assert(pt != NULL); + AssertPos(pos, pt != NULL); structType = pt->GetBaseType(); } const StructType *ret = CastType(structType); - Assert(ret != NULL); + AssertPos(pos, ret != NULL); return ret; } @@ -4557,11 +4557,11 @@ VectorMemberExpr::VectorMemberExpr(Expr *e, const char *id, SourcePos p, if (pt != NULL) exprVectorType = CastType(pt->GetBaseType()); else { - Assert(CastType(exprType) != NULL); + AssertPos(pos, CastType(exprType) != NULL); exprVectorType = CastType(exprType->GetReferenceTarget()); } - Assert(exprVectorType != NULL); + AssertPos(pos, exprVectorType != NULL); } memberType = new VectorType(exprVectorType->GetElementType(), identifier.length()); @@ -4586,7 +4586,7 @@ VectorMemberExpr::GetType() const { bool isSlice = (CastType(lvType) && CastType(lvType)->IsSlice()); if (isSlice) { -//CO Assert(type->IsSOAType()); +//CO AssertPos(pos, type->IsSOAType()); if (lvType->IsUniformType()) type = type->GetAsUniformType(); } @@ -4616,7 +4616,7 @@ VectorMemberExpr::GetLValueType() const { if (identifier.length() == 1) { if (expr == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -4630,7 +4630,7 @@ VectorMemberExpr::GetLValueType() const { vt = CastType(exprLValueType->GetReferenceTarget()); else vt = CastType(exprLValueType->GetBaseType()); - Assert(vt != NULL); + AssertPos(pos, vt != NULL); // we don't want to report that it's e.g. a pointer to a float<1>, // but a pointer to a float, etc. @@ -4683,7 +4683,7 @@ VectorMemberExpr::GetValue(FunctionEmitContext *ctx) const { } if (basePtr == NULL || basePtrType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -4825,7 +4825,7 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { // so that we can index from there... llvm::Value *val = expr->GetValue(ctx); if (!val) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } ctx->SetDebugPos(pos); @@ -4845,7 +4845,7 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { } else { Symbol *baseSym = GetBaseSymbol(); - Assert(baseSym != NULL); + AssertPos(pos, baseSym != NULL); mask = lMaskForSymbol(baseSym, ctx); } @@ -4897,7 +4897,7 @@ MemberExpr::GetLValue(FunctionEmitContext *ctx) const { exprLValueType, basePtr->getName().str().c_str()); if (ptr == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -4975,7 +4975,7 @@ ConstExpr::ConstExpr(const Type *t, int8_t i, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformInt8->GetAsConstType())); + AssertPos(pos, Type::Equal(type, AtomicType::UniformInt8->GetAsConstType())); int8Val[0] = i; } @@ -4984,7 +4984,7 @@ ConstExpr::ConstExpr(const Type *t, int8_t *i, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformInt8->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformInt8->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingInt8->GetAsConstType())); for (int j = 0; j < Count(); ++j) int8Val[j] = i[j]; @@ -4995,7 +4995,7 @@ ConstExpr::ConstExpr(const Type *t, uint8_t u, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformUInt8->GetAsConstType())); + AssertPos(pos, Type::Equal(type, AtomicType::UniformUInt8->GetAsConstType())); uint8Val[0] = u; } @@ -5004,7 +5004,7 @@ ConstExpr::ConstExpr(const Type *t, uint8_t *u, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformUInt8->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformUInt8->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingUInt8->GetAsConstType())); for (int j = 0; j < Count(); ++j) uint8Val[j] = u[j]; @@ -5015,7 +5015,7 @@ ConstExpr::ConstExpr(const Type *t, int16_t i, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformInt16->GetAsConstType())); + AssertPos(pos, Type::Equal(type, AtomicType::UniformInt16->GetAsConstType())); int16Val[0] = i; } @@ -5024,7 +5024,7 @@ ConstExpr::ConstExpr(const Type *t, int16_t *i, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformInt16->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformInt16->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingInt16->GetAsConstType())); for (int j = 0; j < Count(); ++j) int16Val[j] = i[j]; @@ -5035,7 +5035,7 @@ ConstExpr::ConstExpr(const Type *t, uint16_t u, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformUInt16->GetAsConstType())); + AssertPos(pos, Type::Equal(type, AtomicType::UniformUInt16->GetAsConstType())); uint16Val[0] = u; } @@ -5044,7 +5044,7 @@ ConstExpr::ConstExpr(const Type *t, uint16_t *u, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformUInt16->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformUInt16->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingUInt16->GetAsConstType())); for (int j = 0; j < Count(); ++j) uint16Val[j] = u[j]; @@ -5055,7 +5055,7 @@ ConstExpr::ConstExpr(const Type *t, int32_t i, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformInt32->GetAsConstType())); + AssertPos(pos, Type::Equal(type, AtomicType::UniformInt32->GetAsConstType())); int32Val[0] = i; } @@ -5064,7 +5064,7 @@ ConstExpr::ConstExpr(const Type *t, int32_t *i, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformInt32->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformInt32->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingInt32->GetAsConstType())); for (int j = 0; j < Count(); ++j) int32Val[j] = i[j]; @@ -5075,7 +5075,7 @@ ConstExpr::ConstExpr(const Type *t, uint32_t u, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformUInt32->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformUInt32->GetAsConstType()) || (CastType(type) != NULL && type->IsUniformType())); uint32Val[0] = u; @@ -5086,7 +5086,7 @@ ConstExpr::ConstExpr(const Type *t, uint32_t *u, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformUInt32->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformUInt32->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingUInt32->GetAsConstType()) || (CastType(type) != NULL)); for (int j = 0; j < Count(); ++j) @@ -5098,7 +5098,7 @@ ConstExpr::ConstExpr(const Type *t, float f, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformFloat->GetAsConstType())); + AssertPos(pos, Type::Equal(type, AtomicType::UniformFloat->GetAsConstType())); floatVal[0] = f; } @@ -5107,7 +5107,7 @@ ConstExpr::ConstExpr(const Type *t, float *f, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformFloat->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformFloat->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingFloat->GetAsConstType())); for (int j = 0; j < Count(); ++j) floatVal[j] = f[j]; @@ -5118,7 +5118,7 @@ ConstExpr::ConstExpr(const Type *t, int64_t i, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformInt64->GetAsConstType())); + AssertPos(pos, Type::Equal(type, AtomicType::UniformInt64->GetAsConstType())); int64Val[0] = i; } @@ -5127,7 +5127,7 @@ ConstExpr::ConstExpr(const Type *t, int64_t *i, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformInt64->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformInt64->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingInt64->GetAsConstType())); for (int j = 0; j < Count(); ++j) int64Val[j] = i[j]; @@ -5138,7 +5138,7 @@ ConstExpr::ConstExpr(const Type *t, uint64_t u, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformUInt64->GetAsConstType())); + AssertPos(pos, Type::Equal(type, AtomicType::UniformUInt64->GetAsConstType())); uint64Val[0] = u; } @@ -5147,7 +5147,7 @@ ConstExpr::ConstExpr(const Type *t, uint64_t *u, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformUInt64->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformUInt64->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingUInt64->GetAsConstType())); for (int j = 0; j < Count(); ++j) uint64Val[j] = u[j]; @@ -5158,7 +5158,7 @@ ConstExpr::ConstExpr(const Type *t, double f, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformDouble->GetAsConstType())); + AssertPos(pos, Type::Equal(type, AtomicType::UniformDouble->GetAsConstType())); doubleVal[0] = f; } @@ -5167,7 +5167,7 @@ ConstExpr::ConstExpr(const Type *t, double *f, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformDouble->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformDouble->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingDouble->GetAsConstType())); for (int j = 0; j < Count(); ++j) doubleVal[j] = f[j]; @@ -5178,7 +5178,7 @@ ConstExpr::ConstExpr(const Type *t, bool b, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformBool->GetAsConstType())); + AssertPos(pos, Type::Equal(type, AtomicType::UniformBool->GetAsConstType())); boolVal[0] = b; } @@ -5187,7 +5187,7 @@ ConstExpr::ConstExpr(const Type *t, bool *b, SourcePos p) : Expr(p) { type = t; type = type->GetAsConstType(); - Assert(Type::Equal(type, AtomicType::UniformBool->GetAsConstType()) || + AssertPos(pos, Type::Equal(type, AtomicType::UniformBool->GetAsConstType()) || Type::Equal(type, AtomicType::VaryingBool->GetAsConstType())); for (int j = 0; j < Count(); ++j) boolVal[j] = b[j]; @@ -5301,7 +5301,7 @@ ConstExpr::getBasicType() const { if (at != NULL) return at->basicType; else { - Assert(CastType(type) != NULL); + AssertPos(pos, CastType(type) != NULL); return AtomicType::TYPE_UINT32; } } @@ -5653,7 +5653,7 @@ ConstExpr::GetConstant(const Type *type) const { // Caller shouldn't be trying to stuff a varying value here into a // constant type. if (type->IsUniformType()) - Assert(Count() == 1); + AssertPos(pos, Count() == 1); type = type->GetAsNonConstType(); if (Type::Equal(type, AtomicType::UniformBool) || @@ -5762,7 +5762,7 @@ ConstExpr::GetConstant(const Type *type) const { // appropriate type. llvm::Type *llvmType = type->LLVMType(g->ctx); if (llvmType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -6430,7 +6430,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { ctx->SetDebugPos(pos); const Type *toType = GetType(), *fromType = expr->GetType(); if (toType == NULL || fromType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -6481,11 +6481,11 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { } else { // Uniform -> varying pointer conversion - Assert(fromType->IsUniformType() && toType->IsVaryingType()); + AssertPos(pos, fromType->IsUniformType() && toType->IsVaryingType()); if (fromPointerType->IsSlice()) { // For slice pointers, we need to smear out both the // pointer and the offset vector - Assert(toPointerType->IsSlice()); + AssertPos(pos, toPointerType->IsSlice()); llvm::Value *ptr = ctx->ExtractInst(value, 0); llvm::Value *offset = ctx->ExtractInst(value, 1); ptr = ctx->PtrToIntInst(ptr); @@ -6502,7 +6502,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { } } else { - Assert(CastType(toType) != NULL); + AssertPos(pos, CastType(toType) != NULL); if (toType->IsBoolType()) { // convert pointer to bool llvm::Type *lfu = @@ -6554,21 +6554,21 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { // implicit array to pointer to first element Expr *arrayAsPtr = lArrayToPointer(expr); if (Type::EqualIgnoringConst(arrayAsPtr->GetType(), toPointerType) == false) { - Assert(PointerType::IsVoidPointer(toPointerType) || + AssertPos(pos, PointerType::IsVoidPointer(toPointerType) || Type::EqualIgnoringConst(arrayAsPtr->GetType()->GetAsVaryingType(), toPointerType) == true); arrayAsPtr = new TypeCastExpr(toPointerType, arrayAsPtr, pos); arrayAsPtr = ::TypeCheck(arrayAsPtr); - Assert(arrayAsPtr != NULL); + AssertPos(pos, arrayAsPtr != NULL); arrayAsPtr = ::Optimize(arrayAsPtr); - Assert(arrayAsPtr != NULL); + AssertPos(pos, arrayAsPtr != NULL); } - Assert(Type::EqualIgnoringConst(arrayAsPtr->GetType(), toPointerType)); + AssertPos(pos, Type::EqualIgnoringConst(arrayAsPtr->GetType(), toPointerType)); return arrayAsPtr->GetValue(ctx); } // This also should be caught during typechecking - Assert(!(toType->IsUniformType() && fromType->IsVaryingType())); + AssertPos(pos, !(toType->IsUniformType() && fromType->IsVaryingType())); if (toArrayType != NULL && fromArrayType != NULL) { // cast array pointer from [n x foo] to [0 x foo] if needed to be able @@ -6577,7 +6577,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { (toArrayType->GetElementCount() != fromArrayType->GetElementCount())) Warning(pos, "Type-converting array of length %d to length %d", fromArrayType->GetElementCount(), toArrayType->GetElementCount()); - Assert(Type::EqualIgnoringConst(toArrayType->GetBaseType(), + AssertPos(pos, Type::EqualIgnoringConst(toArrayType->GetBaseType(), fromArrayType->GetBaseType())); llvm::Value *v = expr->GetValue(ctx); llvm::Type *ptype = toType->LLVMType(g->ctx); @@ -6599,14 +6599,14 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { (toArray->GetElementCount() != fromArray->GetElementCount())) Warning(pos, "Type-converting array of length %d to length %d", fromArray->GetElementCount(), toArray->GetElementCount()); - Assert(Type::EqualIgnoringConst(toArray->GetBaseType(), + AssertPos(pos, Type::EqualIgnoringConst(toArray->GetBaseType(), fromArray->GetBaseType())); llvm::Value *v = expr->GetValue(ctx); llvm::Type *ptype = toType->LLVMType(g->ctx); return ctx->BitCastInst(v, ptype); //, "array_cast_0size"); } - Assert(Type::Equal(toTarget, fromTarget) || + AssertPos(pos, Type::Equal(toTarget, fromTarget) || Type::Equal(toTarget, fromTarget->GetAsConstType())); return expr->GetValue(ctx); } @@ -6616,7 +6616,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { if (toStruct && fromStruct) { // The only legal type conversions for structs are to go from a // uniform to a varying instance of the same struct type. - Assert(toStruct->IsVaryingType() && fromStruct->IsUniformType() && + AssertPos(pos, toStruct->IsVaryingType() && fromStruct->IsUniformType() && Type::EqualIgnoringConst(toStruct, fromStruct->GetAsVaryingType())); @@ -6630,7 +6630,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { const VectorType *fromVector = CastType(fromType); if (toVector && fromVector) { // this should be caught during typechecking - Assert(toVector->GetElementCount() == fromVector->GetElementCount()); + AssertPos(pos, toVector->GetElementCount() == fromVector->GetElementCount()); llvm::Value *exprVal = expr->GetValue(ctx); if (!exprVal) @@ -6672,7 +6672,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { const AtomicType *fromAtomic = CastType(fromType); // at this point, coming from an atomic type is all that's left... - Assert(fromAtomic != NULL); + AssertPos(pos, fromAtomic != NULL); if (toVector) { // scalar -> short vector conversion @@ -6700,7 +6700,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { else { const AtomicType *toAtomic = CastType(toType); // typechecking should ensure this is the case - Assert(toAtomic != NULL); + AssertPos(pos, toAtomic != NULL); return lTypeConvAtomic(ctx, exprVal, toAtomic, fromAtomic, pos); } @@ -6709,7 +6709,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { const Type * TypeCastExpr::GetType() const { - Assert(type->HasUnboundVariability() == false); + AssertPos(pos, type->HasUnboundVariability() == false); return type; } @@ -6972,7 +6972,7 @@ llvm::Value * ReferenceExpr::GetValue(FunctionEmitContext *ctx) const { ctx->SetDebugPos(pos); if (expr == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -6986,13 +6986,13 @@ ReferenceExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Type *llvmType; if ((type = expr->GetType()) == NULL || (llvmType = type->LLVMType(g->ctx)) == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } value = expr->GetValue(ctx); if (value == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -7142,10 +7142,10 @@ const Type * PtrDerefExpr::GetType() const { const Type *type; if (expr == NULL || (type = expr->GetType()) == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } - Assert(CastType(type) != NULL); + AssertPos(pos, CastType(type) != NULL); if (type->IsUniformType()) return type->GetBaseType(); @@ -7158,7 +7158,7 @@ Expr * PtrDerefExpr::TypeCheck() { const Type *type; if (expr == NULL || (type = expr->GetType()) == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -7176,7 +7176,7 @@ int PtrDerefExpr::EstimateCost() const { const Type *type; if (expr == NULL || (type = expr->GetType()) == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return 0; } @@ -7213,11 +7213,11 @@ const Type * RefDerefExpr::GetType() const { const Type *type; if (expr == NULL || (type = expr->GetType()) == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } - Assert(CastType(type) != NULL); + AssertPos(pos, CastType(type) != NULL); return type->GetReferenceTarget(); } @@ -7226,14 +7226,14 @@ Expr * RefDerefExpr::TypeCheck() { const Type *type; if (expr == NULL || (type = expr->GetType()) == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } // We only create RefDerefExprs internally for references in // expressions, so we should never create one with a non-reference // expression... - Assert(CastType(type) != NULL); + AssertPos(pos, CastType(type) != NULL); return this; } @@ -7298,7 +7298,7 @@ AddressOfExpr::GetType() const { else { t = expr->GetType(); if (t == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } return PointerType::GetUniform(t); @@ -7328,7 +7328,7 @@ Expr * AddressOfExpr::TypeCheck() { const Type *exprType; if (expr == NULL || (exprType = expr->GetType()) == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -7361,7 +7361,7 @@ llvm::Constant * AddressOfExpr::GetConstant(const Type *type) const { const Type *exprType; if (expr == NULL || (exprType = expr->GetType()) == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -7518,7 +7518,7 @@ SymbolExpr::Optimize() { if (symbol == NULL) return NULL; else if (symbol->constValue != NULL) { - Assert(GetType()->IsConstType()); + AssertPos(pos, GetType()->IsConstType()); return new ConstExpr(symbol->constValue, pos); } else @@ -7638,7 +7638,7 @@ lPrintOverloadCandidates(SourcePos pos, const std::vector &funcs, const std::vector *argCouldBeNULL) { for (unsigned int i = 0; i < funcs.size(); ++i) { const FunctionType *ft = CastType(funcs[i]->type); - Assert(ft != NULL); + AssertPos(pos, ft != NULL); Error(funcs[i]->pos, "Candidate function: %s.", ft->GetString().c_str()); } @@ -7751,7 +7751,7 @@ FunctionSymbolExpr::getCandidateFunctions(int argCount) const { for (int i = 0; i < (int)candidateFunctions.size(); ++i) { const FunctionType *ft = CastType(candidateFunctions[i]->type); - Assert(ft != NULL); + AssertPos(pos, ft != NULL); // There's no way to match if the caller is passing more arguments // than this function instance takes. @@ -7896,7 +7896,7 @@ FunctionSymbolExpr::ResolveOverloads(SourcePos argPos, for (int i = 0; i < (int)actualCandidates.size(); ++i) { const FunctionType *ft = CastType(actualCandidates[i]->type); - Assert(ft != NULL); + AssertPos(pos, ft != NULL); candidateCosts.push_back(computeOverloadCost(ft, argTypes, argCouldBeNULL, argIsConstant)); @@ -8025,7 +8025,7 @@ NullPointerExpr::GetConstant(const Type *type) const { llvm::Type *llvmType = type->LLVMType(g->ctx); if (llvmType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -8090,7 +8090,7 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const { if (countExpr != NULL) { countValue = countExpr->GetValue(ctx); if (countValue == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } } @@ -8130,7 +8130,7 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const { "alloc_size64"); func = m->module->getFunction("__new_uniform"); } - Assert(func != NULL); + AssertPos(pos, func != NULL); // Make the call for the the actual allocation. llvm::Value *ptrValue = ctx->CallInst(func, NULL, allocSize, "new"); @@ -8211,7 +8211,7 @@ Expr * NewExpr::TypeCheck() { // It's illegal to call new with an undefined struct type if (allocType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } if (CastType(allocType) != NULL) { diff --git a/ispc.h b/ispc.h index bd170936..d0837110 100644 --- a/ispc.h +++ b/ispc.h @@ -58,16 +58,6 @@ #include #include -#define Assert(expr) \ - ((void)((expr) ? 0 : __Assert (#expr, __FILE__, __LINE__))) -#define __Assert(expr, file, line) \ - ((void)fprintf(stderr, "%s:%u: Assertion failed: \"%s\"\n" \ - "***\n*** Please file a bug report at " \ - "https://github.com/ispc/ispc/issues\n*** (Including as much " \ - "information as you can about how to reproduce this error).\n" \ - "*** You have apparently encountered a bug in the compiler that " \ - "we'd like to fix!\n***\n", file, line, expr), abort(), 0) - /** @def ISPC_MAX_NVEC maximum vector size of any of the compliation targets. */ @@ -145,11 +135,25 @@ struct SourcePos { bool operator==(const SourcePos &p2) const; }; + /** Returns a SourcePos that encompasses the extent of both of the given extents. */ SourcePos Union(const SourcePos &p1, const SourcePos &p2); + +// Assert + +extern void DoAssert(const char *file, int line, const char *expr); +extern void DoAssertPos(SourcePos pos, const char *file, int line, const char *expr); + +#define Assert(expr) \ + ((void)((expr) ? 0 : ((void)DoAssert (__FILE__, __LINE__, #expr), 0))) + +#define AssertPos(pos, expr) \ + ((void)((expr) ? 0 : ((void)DoAssertPos (pos, __FILE__, __LINE__, #expr), 0))) + + /** @brief Structure that defines a compilation target This structure defines a compilation target for the ispc compiler. diff --git a/parse.yy b/parse.yy index f72a9f65..3eb4b2be 100644 --- a/parse.yy +++ b/parse.yy @@ -391,7 +391,7 @@ argument_expression_list { ExprList *argList = dynamic_cast($1); if (argList == NULL) { - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); argList = new ExprList(@3); } argList->exprs.push_back($3); @@ -623,13 +623,13 @@ declaration_statement : declaration { if ($1 == NULL) { - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); $$ = NULL; } else if ($1->declSpecs->storageClass == SC_TYPEDEF) { for (unsigned int i = 0; i < $1->declarators.size(); ++i) { if ($1->declarators[i] == NULL) - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); else m->AddTypeDef($1->declarators[i]->name, $1->declarators[i]->type, @@ -789,7 +789,7 @@ init_declarator_list { std::vector *dl = (std::vector *)$1; if (dl == NULL) { - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); dl = new std::vector; } if ($3 != NULL) @@ -918,7 +918,7 @@ struct_declaration_list { std::vector *sdl = (std::vector *)$1; if (sdl == NULL) { - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); sdl = new std::vector; } if ($2 != NULL) @@ -1013,7 +1013,7 @@ struct_declarator_list { std::vector *sdl = (std::vector *)$1; if (sdl == NULL) { - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); sdl = new std::vector; } if ($3 != NULL) @@ -1087,7 +1087,7 @@ enumerator_list { std::vector *symList = $1; if (symList == NULL) { - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); symList = new std::vector; } if ($3 != NULL) @@ -1487,7 +1487,7 @@ initializer_list { ExprList *exprList = $1; if (exprList == NULL) { - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); exprList = new ExprList(@3); } exprList->exprs.push_back($3); @@ -1558,7 +1558,7 @@ statement_list { StmtList *sl = (StmtList *)$1; if (sl == NULL) { - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); sl = new StmtList(@2); } sl->Add($2); @@ -1670,7 +1670,7 @@ foreach_dimension_list { std::vector *dv = $1; if (dv == NULL) { - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); dv = new std::vector; } if ($3 != NULL) @@ -1708,7 +1708,7 @@ iteration_statement { std::vector *dims = $3; if (dims == NULL) { - Assert(m->errorCount > 0); + AssertPos(@3, m->errorCount > 0); dims = new std::vector; } for (unsigned int i = 0; i < dims->size(); ++i) @@ -1718,7 +1718,7 @@ iteration_statement { std::vector *dims = $3; if (dims == NULL) { - Assert(m->errorCount > 0); + AssertPos(@3, m->errorCount > 0); dims = new std::vector; } @@ -1736,7 +1736,7 @@ iteration_statement { std::vector *dims = $3; if (dims == NULL) { - Assert(m->errorCount > 0); + AssertPos(@3, m->errorCount > 0); dims = new std::vector; } @@ -1747,7 +1747,7 @@ iteration_statement { std::vector *dims = $3; if (dims == NULL) { - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); dims = new std::vector; } @@ -1861,7 +1861,7 @@ function_definition $2->InitFromDeclSpecs($1); const FunctionType *funcType = CastType($2->type); if (funcType == NULL) - Assert(m->errorCount > 0); + AssertPos(@1, m->errorCount > 0); else { Stmt *code = $4; if (code == NULL) code = new StmtList(@4); @@ -2010,7 +2010,7 @@ lAddFunctionParams(Declarator *decl) { m->symbolTable->PushScope(); if (decl == NULL) { - Assert(m->errorCount > 0); + AssertPos(decl->pos, m->errorCount > 0); return; } @@ -2018,7 +2018,7 @@ lAddFunctionParams(Declarator *decl) { while (decl->kind != DK_FUNCTION && decl->child != NULL) decl = decl->child; if (decl->kind != DK_FUNCTION) { - Assert(m->errorCount > 0); + AssertPos(decl->pos, m->errorCount > 0); return; } @@ -2028,14 +2028,14 @@ lAddFunctionParams(Declarator *decl) { Assert(pdecl != NULL && pdecl->declarators.size() == 1); Declarator *declarator = pdecl->declarators[0]; if (declarator == NULL) - Assert(m->errorCount > 0); + AssertPos(decl->pos, m->errorCount > 0); else { Symbol *sym = new Symbol(declarator->name, declarator->pos, declarator->type, declarator->storageClass); #ifndef NDEBUG bool ok = m->symbolTable->AddVariable(sym); if (ok == false) - Assert(m->errorCount > 0); + AssertPos(decl->pos, m->errorCount > 0); #else m->symbolTable->AddVariable(sym); #endif @@ -2189,7 +2189,7 @@ lFinalizeEnumeratorSymbols(std::vector &enums, if (enums[i]->constValue != NULL) { /* Already has a value, so first update nextVal with it. */ int count = enums[i]->constValue->AsUInt32(&nextVal); - Assert(count == 1); + AssertPos(enums[i]->pos, count == 1); ++nextVal; /* When the source file as being parsed, the ConstExpr for any @@ -2202,7 +2202,7 @@ lFinalizeEnumeratorSymbols(std::vector &enums, enums[i]->pos); castExpr = Optimize(castExpr); enums[i]->constValue = dynamic_cast(castExpr); - Assert(enums[i]->constValue != NULL); + AssertPos(enums[i]->pos, enums[i]->constValue != NULL); } else { enums[i]->constValue = new ConstExpr(enumType, nextVal++, diff --git a/stmt.cpp b/stmt.cpp index bcc57f4b..6a6f58e5 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -140,7 +140,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { for (unsigned int i = 0; i < vars.size(); ++i) { Symbol *sym = vars[i].sym; - Assert(sym != NULL); + AssertPos(pos, sym != NULL); if (sym->type == NULL) continue; Expr *initExpr = vars[i].init; @@ -191,7 +191,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { llvm::Type *llvmType = sym->type->LLVMType(g->ctx); if (llvmType == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } @@ -478,12 +478,12 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask, lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements"); // under varying control flow,, returns can't stop instruction // emission, so this better be non-NULL... - Assert(ctx->GetCurrentBasicBlock()); + AssertPos(ctx->GetDebugPos(), ctx->GetCurrentBasicBlock()); } if (falseStmts) { ctx->SetInternalMaskAndNot(oldMask, test); lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements"); - Assert(ctx->GetCurrentBasicBlock()); + AssertPos(ctx->GetDebugPos(), ctx->GetCurrentBasicBlock()); } } @@ -564,7 +564,7 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const { (costIsAcceptable || g->opt.disableCoherentControlFlow)) { ctx->StartVaryingIf(oldMask); emitMaskedTrueAndFalse(ctx, oldMask, ltest); - Assert(ctx->GetCurrentBasicBlock()); + AssertPos(pos, ctx->GetCurrentBasicBlock()); ctx->EndIf(); } else { @@ -587,7 +587,7 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest, // compiler see what's going on so that subsequent optimizations for // code emitted here can operate with the knowledge that the mask is // definitely all on (until it modifies the mask itself). - Assert(!g->opt.disableCoherentControlFlow); + AssertPos(pos, !g->opt.disableCoherentControlFlow); if (!g->opt.disableMaskAllOnOptimizations) ctx->SetInternalMask(LLVMMaskAllOn); llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); @@ -637,7 +637,7 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest, emitMaskedTrueAndFalse(ctx, LLVMMaskAllOn, ltest); // In this case, return/break/continue isn't allowed to jump and end // emission. - Assert(ctx->GetCurrentBasicBlock()); + AssertPos(pos, ctx->GetCurrentBasicBlock()); ctx->EndIf(); ctx->BranchInst(bDone); @@ -666,7 +666,7 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask, // Emit statements for true ctx->SetCurrentBasicBlock(bRunTrue); lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements"); - Assert(ctx->GetCurrentBasicBlock()); + AssertPos(pos, ctx->GetCurrentBasicBlock()); ctx->BranchInst(bNext); ctx->SetCurrentBasicBlock(bNext); } @@ -683,7 +683,7 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask, // Emit code for false ctx->SetCurrentBasicBlock(bRunFalse); lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements"); - Assert(ctx->GetCurrentBasicBlock()); + AssertPos(pos, ctx->GetCurrentBasicBlock()); ctx->BranchInst(bNext); ctx->SetCurrentBasicBlock(bNext); } @@ -837,7 +837,7 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const { ctx->SetFunctionMask(LLVMMaskAllOn); if (bodyStmts) bodyStmts->EmitCode(ctx); - Assert(ctx->GetCurrentBasicBlock()); + AssertPos(pos, ctx->GetCurrentBasicBlock()); ctx->SetFunctionMask(oldFunctionMask); ctx->BranchInst(btest); @@ -845,7 +845,7 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const { ctx->SetCurrentBasicBlock(bMixed); if (bodyStmts) bodyStmts->EmitCode(ctx); - Assert(ctx->GetCurrentBasicBlock()); + AssertPos(pos, ctx->GetCurrentBasicBlock()); ctx->BranchInst(btest); } else { @@ -986,7 +986,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const { // it and then jump into the loop test code. (Also start a new scope // since the initiailizer may be a declaration statement). if (init) { - Assert(dynamic_cast(init) == NULL); + AssertPos(pos, dynamic_cast(init) == NULL); ctx->StartScope(); init->EmitCode(ctx); } @@ -1015,7 +1015,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const { if (doCoherentCheck) Warning(test->pos, "Uniform condition supplied to cfor/cwhile " "statement."); - Assert(ltest->getType() == LLVMTypes::BoolType); + AssertPos(pos, ltest->getType() == LLVMTypes::BoolType); ctx->BranchInst(bloop, bexit, ltest); } else { @@ -1051,7 +1051,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const { ctx->SetFunctionMask(LLVMMaskAllOn); if (stmts) stmts->EmitCode(ctx); - Assert(ctx->GetCurrentBasicBlock()); + AssertPos(pos, ctx->GetCurrentBasicBlock()); ctx->SetFunctionMask(oldFunctionMask); ctx->BranchInst(bstep); @@ -1364,8 +1364,8 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const { ctx->SetFunctionMask(LLVMMaskAllOn); // This should be caught during typechecking - Assert(startExprs.size() == dimVariables.size() && - endExprs.size() == dimVariables.size()); + AssertPos(pos, startExprs.size() == dimVariables.size() && + endExprs.size() == dimVariables.size()); int nDims = (int)dimVariables.size(); /////////////////////////////////////////////////////////////////////// @@ -1704,7 +1704,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const { ctx->SetContinueTarget(bbFullBodyContinue); ctx->AddInstrumentationPoint("foreach loop body (all on)"); stmts->EmitCode(ctx); - Assert(ctx->GetCurrentBasicBlock() != NULL); + AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL); ctx->BranchInst(bbFullBodyContinue); } ctx->SetCurrentBasicBlock(bbFullBodyContinue); { @@ -2094,7 +2094,7 @@ SwitchStmt::EmitCode(FunctionEmitContext *ctx) const { const Type *type; if (expr == NULL || ((type = expr->GetType()) == NULL)) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } @@ -2112,7 +2112,7 @@ SwitchStmt::EmitCode(FunctionEmitContext *ctx) const { llvm::Value *exprValue = expr->GetValue(ctx); if (exprValue == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } @@ -2342,7 +2342,7 @@ LabeledStmt::LabeledStmt(const char *n, Stmt *s, SourcePos p) void LabeledStmt::EmitCode(FunctionEmitContext *ctx) const { llvm::BasicBlock *bblock = ctx->GetLabeledBasicBlock(name); - Assert(bblock != NULL); + AssertPos(pos, bblock != NULL); // End the current basic block with a jump to our basic block and then // set things up for emission to continue there. Note that the current @@ -2597,7 +2597,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const { // Now we can emit code to call __do_print() llvm::Function *printFunc = m->module->getFunction("__do_print"); - Assert(printFunc); + AssertPos(pos, printFunc); llvm::Value *mask = ctx->GetFullMask(); // Set up the rest of the parameters to it @@ -2653,7 +2653,7 @@ AssertStmt::EmitCode(FunctionEmitContext *ctx) const { llvm::Function *assertFunc = isUniform ? m->module->getFunction("__do_assert_uniform") : m->module->getFunction("__do_assert_varying"); - Assert(assertFunc != NULL); + AssertPos(pos, assertFunc != NULL); char *errorString; if (asprintf(&errorString, "%s:%d:%d: Assertion failed: %s\n", @@ -2721,18 +2721,18 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const { const Type *exprType; if (expr == NULL || ((exprType = expr->GetType()) == NULL)) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } llvm::Value *exprValue = expr->GetValue(ctx); if (exprValue == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return; } // Typechecking should catch this - Assert(CastType(exprType) != NULL); + AssertPos(pos, CastType(exprType) != NULL); if (exprType->IsUniformType()) { // For deletion of a uniform pointer, we just need to cast the @@ -2741,7 +2741,7 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const { exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType, "ptr_to_void"); llvm::Function *func = m->module->getFunction("__delete_uniform"); - Assert(func != NULL); + AssertPos(pos, func != NULL); ctx->CallInst(func, NULL, exprValue, ""); } @@ -2751,7 +2751,7 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const { // only need to extend to 64-bit values on 32-bit targets before // calling it. llvm::Function *func = m->module->getFunction("__delete_varying"); - Assert(func != NULL); + AssertPos(pos, func != NULL); if (g->target.is32Bit) exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType, "ptr_to_64"); @@ -2804,7 +2804,7 @@ DeleteStmt::EstimateCost() const { Stmt * CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) { if (iterSym == NULL) { - Assert(m->errorCount > 0); + AssertPos(pos, m->errorCount > 0); return NULL; } @@ -2831,11 +2831,11 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) { // First, call __movmsk(__mask)) to get the mask as a set of bits. // This should be hoisted out of the loop Symbol *maskSym = m->symbolTable->LookupVariable("__mask"); - Assert(maskSym != NULL); + AssertPos(pos, maskSym != NULL); Expr *maskVecExpr = new SymbolExpr(maskSym, pos); std::vector mmFuns; m->symbolTable->LookupFunction("__movmsk", &mmFuns); - Assert(mmFuns.size() == (g->target.maskBitCount == 32 ? 2 : 1)); + AssertPos(pos, mmFuns.size() == (g->target.maskBitCount == 32 ? 2 : 1)); FunctionSymbolExpr *movmskFunc = new FunctionSymbolExpr("__movmsk", mmFuns, pos); ExprList *movmskArgs = new ExprList(maskVecExpr, pos); diff --git a/util.cpp b/util.cpp index 95da62f2..3c0de598 100644 --- a/util.cpp +++ b/util.cpp @@ -420,18 +420,41 @@ PerformanceWarning(SourcePos p, const char *fmt, ...) { } -void -FatalError(const char *file, int line, const char *message) { - fprintf(stderr, "%s(%d): FATAL ERROR: %s\n", file, line, message); +static void +lPrintBugText() { fprintf(stderr, "***\n" "*** Please file a bug report at https://github.com/ispc/ispc/issues\n" "*** (Including as much information as you can about how to " "reproduce this error).\n" "*** You have apparently encountered a bug in the compiler that we'd " "like to fix!\n***\n"); +} + + +void +FatalError(const char *file, int line, const char *message) { + fprintf(stderr, "%s(%d): FATAL ERROR: %s\n", file, line, message); + lPrintBugText(); abort(); } + +void +DoAssert(const char *file, int line, const char *expr) { + fprintf(stderr, "%s:%u: Assertion failed: \"%s\".\n", file, line, expr); + lPrintBugText(); + abort(); +} + + +void +DoAssertPos(SourcePos pos, const char *file, int line, const char *expr) { + Error(pos, "Assertion failed (%s:%u): \"%s\".", file, line, expr); + lPrintBugText(); + abort(); +} + + /////////////////////////////////////////////////////////////////////////// // http://en.wikipedia.org/wiki/Levenshtein_distance From 38cea6dc71c7a62e8e612b648f857bcb37a1aaa3 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 25 May 2012 11:09:08 -0700 Subject: [PATCH 157/173] Issue error if "typedef" is inadvertently included in function definition. Issue #267. --- parse.yy | 2 ++ tests_errors/func-def-with-typedef.ispc | 4 ++++ 2 files changed, 6 insertions(+) create mode 100644 tests_errors/func-def-with-typedef.ispc diff --git a/parse.yy b/parse.yy index 3eb4b2be..33e19dbc 100644 --- a/parse.yy +++ b/parse.yy @@ -1862,6 +1862,8 @@ function_definition const FunctionType *funcType = CastType($2->type); if (funcType == NULL) AssertPos(@1, m->errorCount > 0); + else if ($1->storageClass == SC_TYPEDEF) + Error(@1, "Illegal \"typedef\" provided with function definition."); else { Stmt *code = $4; if (code == NULL) code = new StmtList(@4); diff --git a/tests_errors/func-def-with-typedef.ispc b/tests_errors/func-def-with-typedef.ispc new file mode 100644 index 00000000..a750a9be --- /dev/null +++ b/tests_errors/func-def-with-typedef.ispc @@ -0,0 +1,4 @@ +// Illegal "typedef" provided with function definition + +typedef float foo(float a, float b) { } + From 90db01d0382e1d3841e0f65f6e08115131ad821b Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 25 May 2012 11:48:08 -0700 Subject: [PATCH 158/173] Represent MOVMSK'ed masks with int64s rather than int32s. This allows us to scale up to 64-wide execution. --- builtins/builtins.c | 8 +-- builtins/target-avx-x2.ll | 7 +-- builtins/target-avx.ll | 7 +-- builtins/target-generic-1.ll | 6 +-- builtins/target-generic-common.ll | 4 +- builtins/target-sse2-x2.ll | 7 +-- builtins/target-sse2.ll | 7 +-- builtins/target-sse4-x2.ll | 7 +-- builtins/target-sse4.ll | 7 +-- builtins/util.m4 | 90 ++++++++++++++++++------------- ctx.cpp | 23 ++++---- ctx.h | 2 +- docs/perfguide.rst | 4 +- examples/intrinsics/generic-16.h | 4 +- examples/intrinsics/sse4.h | 6 +-- ispc.h | 2 +- module.cpp | 2 +- opt.cpp | 33 ++++++------ stdlib.ispc | 15 +++--- stmt.cpp | 3 +- 20 files changed, 137 insertions(+), 107 deletions(-) diff --git a/builtins/builtins.c b/builtins/builtins.c index 36498e1a..8e1a5624 100644 --- a/builtins/builtins.c +++ b/builtins/builtins.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -70,7 +70,7 @@ typedef int Bool; putchar('['); \ for (int i = 0; i < width; ++i) { \ /* only print the value if the current lane is executing */ \ - if (mask & (1< @__min_varying_float(<16 x float>, declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone -define i32 @__movmsk(<16 x i32>) nounwind readnone alwaysinline { +define i64 @__movmsk(<16 x i32>) nounwind readnone alwaysinline { %floatmask = bitcast <16 x i32> %0 to <16 x float> %mask0 = shufflevector <16 x float> %floatmask, <16 x float> undef, <8 x i32> @@ -186,7 +186,8 @@ define i32 @__movmsk(<16 x i32>) nounwind readnone alwaysinline { %v1shift = shl i32 %v1, 8 %v = or i32 %v1shift, %v0 - ret i32 %v + %v64 = zext i32 %v to i64 + ret i64 %v64 } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index 53659b7c..608d2dcd 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -175,10 +175,11 @@ define <8 x float> @__min_varying_float(<8 x float>, declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone -define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline { +define i64 @__movmsk(<8 x i32>) nounwind readnone alwaysinline { %floatmask = bitcast <8 x i32> %0 to <8 x float> %v = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask) nounwind readnone - ret i32 %v + %v64 = zext i32 %v to i64 + ret i64 %v64 } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index ad911e64..5ced9da9 100755 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -186,14 +186,14 @@ define void @__masked_store_blend_64(<1 x i64>* nocapture, <1 x i64>, ret void } -define i32 @__movmsk(<1 x i32>) nounwind readnone alwaysinline { +define i64 @__movmsk(<1 x i32>) nounwind readnone alwaysinline { %item = extractelement <1 x i32> %0, i32 0 %v = lshr i32 %item, 31 - ret i32 %v + %v64 = zext i32 %v to i64 + ret i64 %v64 } - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; rounding ;; diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 50daf23e..6bf90d95 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -201,7 +201,7 @@ declare @__svml_pow(, ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; reductions -declare i32 @__movmsk() nounwind readnone +declare i64 @__movmsk() nounwind readnone declare float @__reduce_add_float() nounwind readnone declare float @__reduce_min_float() nounwind readnone diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index 2e6d1bdc..65d30939 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -295,7 +295,7 @@ define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline { declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone -define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline { +define i64 @__movmsk(<8 x i32>) nounwind readnone alwaysinline { ; first do two 4-wide movmsk calls %floatmask = bitcast <8 x i32> %0 to <8 x float> %m0 = shufflevector <8 x float> %floatmask, <8 x float> undef, @@ -309,7 +309,8 @@ define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline { ; of the second one %v1s = shl i32 %v1, 4 %v = or i32 %v0, %v1s - ret i32 %v + %v64 = zext i32 %v to i64 + ret i64 %v64 } define <4 x float> @__vec4_add_float(<4 x float> %v0, diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index 21ffb267..e6eb7390 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -239,10 +239,11 @@ define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline { declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone -define i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline { +define i64 @__movmsk(<4 x i32>) nounwind readnone alwaysinline { %floatmask = bitcast <4 x i32> %0 to <4 x float> %v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone - ret i32 %v + %v64 = zext i32 %v to i64 + ret i64 %v64 } define float @__reduce_add_float(<4 x float> %v) nounwind readonly alwaysinline { diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index 5a467ec2..1ac6b3e5 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -237,7 +237,7 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone -define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline { +define i64 @__movmsk(<8 x i32>) nounwind readnone alwaysinline { ; first do two 4-wide movmsk calls %floatmask = bitcast <8 x i32> %0 to <8 x float> %m0 = shufflevector <8 x float> %floatmask, <8 x float> undef, @@ -251,7 +251,8 @@ define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline { ; of the second one %v1s = shl i32 %v1, 4 %v = or i32 %v0, %v1s - ret i32 %v + %v64 = zext i32 %v to i64 + ret i64 %v64 } define float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline { diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index 9dfe9db7..98426b24 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -271,10 +271,11 @@ define <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alway declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone -define i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline { +define i64 @__movmsk(<4 x i32>) nounwind readnone alwaysinline { %floatmask = bitcast <4 x i32> %0 to <4 x float> %v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone - ret i32 %v + %v64 = zext i32 %v to i64 + ret i64 %v64 } declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone diff --git a/builtins/util.m4 b/builtins/util.m4 index 023ca411..59185942 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -38,6 +38,18 @@ declare i1 @__is_compile_time_constant_uniform_int32(i32) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; It is a bit of a pain to compute this in m4 for 32 and 64-wide targets... +define(`ALL_ON_MASK', +`ifelse(WIDTH, `64', `-1', + WIDTH, `32', `4294967295', + `eval((1< %mask) declare i1 @__is_compile_time_constant_varying_int32() @@ -2096,12 +2108,12 @@ ok: define void @__do_assert_varying(i8 *%str, %test, - %mask) { + %mask) { %nottest = xor %test, < forloop(i, 1, eval(WIDTH-1), `MASK -1, ') MASK -1 > %nottest_and_mask = and %nottest, %mask - %mm = call i32 @__movmsk( %nottest_and_mask) - %all_ok = icmp eq i32 %mm, 0 + %mm = call i64 @__movmsk( %nottest_and_mask) + %all_ok = icmp eq i64 %mm, 0 br i1 %all_ok, label %ok, label %fail fail: @@ -2505,12 +2517,16 @@ define <$1 x $2> @__load_and_broadcast_$3(i8 *, <$1 x MASK> %mask) nounwind alwa define(`masked_load', ` define <$1 x $2> @__masked_load_$3(i8 *, <$1 x MASK> %mask) nounwind alwaysinline { entry: - %mm = call i32 @__movmsk(<$1 x MASK> %mask) + %mm = call i64 @__movmsk(<$1 x MASK> %mask) ; if the first lane and the last lane are on, then it is safe to do a vector load ; of the whole thing--what the lanes in the middle want turns out to not matter... - %mm_and = and i32 %mm, eval(1 | (1<<($1-1))) - %can_vload = icmp eq i32 %mm_and, eval(1 | (1<<($1-1))) + %mm_and_low = and i64 %mm, 1 + %mm_and_high = and i64 %mm, MASK_HIGH_BIT_ON + %mm_and_high_shift = lshr i64 %mm_and_high, eval(WIDTH-1) + %mm_and_low_i1 = trunc i64 %mm_and_low to i1 + %mm_and_high_shift_i1 = trunc i64 %mm_and_high_shift to i1 + %can_vload = and i1 %mm_and_low_i1, %mm_and_high_shift_i1 %fast32 = call i32 @__fast_masked_vload() %fast_i1 = trunc i32 %fast32 to i1 @@ -2529,9 +2545,10 @@ load: loop: ; loop over the lanes and see if each one is on... %lane = phi i32 [ 0, %entry ], [ %next_lane, %lane_done ] - %lanemask = shl i32 1, %lane - %mask_and = and i32 %mm, %lanemask - %do_lane = icmp ne i32 %mask_and, 0 + %lane64 = zext i32 %lane to i64 + %lanemask = shl i64 1, %lane64 + %mask_and = and i64 %mm, %lanemask + %do_lane = icmp ne i64 %mask_and, 0 br i1 %do_lane, label %load_lane, label %lane_done load_lane: @@ -2743,12 +2760,12 @@ define(`packed_load_and_store', ` define i32 @__packed_load_active(i32 * %startptr, * %val_ptr, %full_mask) nounwind alwaysinline { entry: - %mask = call i32 @__movmsk( %full_mask) + %mask = call i64 @__movmsk( %full_mask) %mask_known = call i1 @__is_compile_time_constant_mask( %full_mask) br i1 %mask_known, label %known_mask, label %unknown_mask known_mask: - %allon = icmp eq i32 %mask, eval((1 << WIDTH) -1) + %allon = icmp eq i64 %mask, ALL_ON_MASK br i1 %allon, label %all_on, label %unknown_mask all_on: @@ -2764,12 +2781,12 @@ unknown_mask: loop: %lane = phi i32 [ 0, %unknown_mask ], [ %nextlane, %loopend ] - %lanemask = phi i32 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ] + %lanemask = phi i64 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ] %offset = phi i32 [ 0, %unknown_mask ], [ %nextoffset, %loopend ] ; is the current lane on? - %and = and i32 %mask, %lanemask - %do_load = icmp eq i32 %and, %lanemask + %and = and i64 %mask, %lanemask + %do_load = icmp eq i64 %and, %lanemask br i1 %do_load, label %load, label %loopend load: @@ -2784,7 +2801,7 @@ load: loopend: %nextoffset = phi i32 [ %offset1, %load ], [ %offset, %loop ] %nextlane = add i32 %lane, 1 - %nextlanemask = mul i32 %lanemask, 2 + %nextlanemask = mul i64 %lanemask, 2 ; are we done yet? %test = icmp ne i32 %nextlane, WIDTH @@ -2795,14 +2812,14 @@ done: } define i32 @__packed_store_active(i32 * %startptr, %vals, - %full_mask) nounwind alwaysinline { + %full_mask) nounwind alwaysinline { entry: - %mask = call i32 @__movmsk( %full_mask) + %mask = call i64 @__movmsk( %full_mask) %mask_known = call i1 @__is_compile_time_constant_mask( %full_mask) br i1 %mask_known, label %known_mask, label %unknown_mask known_mask: - %allon = icmp eq i32 %mask, eval((1 << WIDTH) -1) + %allon = icmp eq i64 %mask, ALL_ON_MASK br i1 %allon, label %all_on, label %unknown_mask all_on: @@ -2815,12 +2832,12 @@ unknown_mask: loop: %lane = phi i32 [ 0, %unknown_mask ], [ %nextlane, %loopend ] - %lanemask = phi i32 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ] + %lanemask = phi i64 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ] %offset = phi i32 [ 0, %unknown_mask ], [ %nextoffset, %loopend ] ; is the current lane on? - %and = and i32 %mask, %lanemask - %do_store = icmp eq i32 %and, %lanemask + %and = and i64 %mask, %lanemask + %do_store = icmp eq i64 %and, %lanemask br i1 %do_store, label %store, label %loopend store: @@ -2833,7 +2850,7 @@ store: loopend: %nextoffset = phi i32 [ %offset1, %store ], [ %offset, %loop ] %nextlane = add i32 %lane, 1 - %nextlanemask = mul i32 %lanemask, 2 + %nextlanemask = mul i64 %lanemask, 2 ; are we done yet? %test = icmp ne i32 %nextlane, WIDTH @@ -2857,14 +2874,15 @@ define(`reduce_equal_aux', ` define i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue, <$1 x MASK> %mask) nounwind alwaysinline { entry: - %mm = call i32 @__movmsk(<$1 x MASK> %mask) - %allon = icmp eq i32 %mm, eval((1<<$1)-1) + %mm = call i64 @__movmsk(<$1 x MASK> %mask) + %allon = icmp eq i64 %mm, ALL_ON_MASK br i1 %allon, label %check_neighbors, label %domixed domixed: ; First, figure out which lane is the first active one - %first = call i32 @llvm.cttz.i32(i32 %mm) - %baseval = extractelement <$1 x $2> %v, i32 %first + %first = call i64 @llvm.cttz.i64(i64 %mm) + %first32 = trunc i64 %first to i32 + %baseval = extractelement <$1 x $2> %v, i32 %first32 %basev1 = bitcast $2 %baseval to <1 x $2> ; get a vector that is that value smeared across all elements %basesmear = shufflevector <1 x $2> %basev1, <1 x $2> undef, @@ -2895,9 +2913,9 @@ check_neighbors: %eq = $5 eq <$1 x $2> %vec, %vr ifelse(MASK,i32, ` %eq32 = sext <$1 x i1> %eq to <$1 x i32> - %eqmm = call i32 @__movmsk(<$1 x i32> %eq32)', ` - %eqmm = call i32 @__movmsk(<$1 x MASK> %eq)') - %alleq = icmp eq i32 %eqmm, eval((1<<$1)-1) + %eqmm = call i64 @__movmsk(<$1 x i32> %eq32)', ` + %eqmm = call i64 @__movmsk(<$1 x MASK> %eq)') + %alleq = icmp eq i64 %eqmm, ALL_ON_MASK br i1 %alleq, label %all_equal, label %not_all_equal ', ` ; But for 64-bit elements, it turns out to be more efficient to just @@ -3010,14 +3028,14 @@ define(`per_lane', ` br label %pl_entry pl_entry: - %pl_mask = call i32 @__movmsk($2) + %pl_mask = call i64 @__movmsk($2) %pl_mask_known = call i1 @__is_compile_time_constant_mask($2) br i1 %pl_mask_known, label %pl_known_mask, label %pl_unknown_mask pl_known_mask: ;; the mask is known at compile time; see if it is something we can ;; handle more efficiently - %pl_is_allon = icmp eq i32 %pl_mask, eval((1<<$1)-1) + %pl_is_allon = icmp eq i64 %pl_mask, ALL_ON_MASK br i1 %pl_is_allon, label %pl_all_on, label %pl_unknown_mask pl_all_on: @@ -3039,11 +3057,11 @@ pl_unknown_mask: pl_loop: ;; Loop over each lane and see if we want to do the work for this lane %pl_lane = phi i32 [ 0, %pl_unknown_mask ], [ %pl_nextlane, %pl_loopend ] - %pl_lanemask = phi i32 [ 1, %pl_unknown_mask ], [ %pl_nextlanemask, %pl_loopend ] + %pl_lanemask = phi i64 [ 1, %pl_unknown_mask ], [ %pl_nextlanemask, %pl_loopend ] ; is the current lane on? if so, goto do work, otherwise to end of loop - %pl_and = and i32 %pl_mask, %pl_lanemask - %pl_doit = icmp eq i32 %pl_and, %pl_lanemask + %pl_and = and i64 %pl_mask, %pl_lanemask + %pl_doit = icmp eq i64 %pl_and, %pl_lanemask br i1 %pl_doit, label %pl_dolane, label %pl_loopend pl_dolane: @@ -3054,7 +3072,7 @@ pl_dolane: pl_loopend: %pl_nextlane = add i32 %pl_lane, 1 - %pl_nextlanemask = mul i32 %pl_lanemask, 2 + %pl_nextlanemask = mul i64 %pl_lanemask, 2 ; are we done yet? %pl_test = icmp ne i32 %pl_nextlane, $1 diff --git a/ctx.cpp b/ctx.cpp index 4e357873..11957ae2 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1254,16 +1254,19 @@ llvm::Value * FunctionEmitContext::Any(llvm::Value *mask) { llvm::Value *mmval = LaneMask(mask); return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, mmval, - LLVMInt32(0), LLVMGetName(mask, "_any")); + LLVMInt64(0), LLVMGetName(mask, "_any")); } llvm::Value * FunctionEmitContext::All(llvm::Value *mask) { llvm::Value *mmval = LaneMask(mask); + llvm::Value *allOnMaskValue = (g->target.vectorWidth == 64) ? + LLVMInt64(~0ull) : + LLVMInt64((1ull << g->target.vectorWidth) - 1); + return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mmval, - LLVMInt32((1<target.vectorWidth)-1), - LLVMGetName(mask, "_all")); + allOnMaskValue, LLVMGetName(mask, "_all")); } @@ -1271,14 +1274,14 @@ llvm::Value * FunctionEmitContext::None(llvm::Value *mask) { llvm::Value *mmval = LaneMask(mask); return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mmval, - LLVMInt32(0), LLVMGetName(mask, "_none")); + LLVMInt64(0), LLVMGetName(mask, "_none")); } llvm::Value * FunctionEmitContext::LaneMask(llvm::Value *v) { // Call the target-dependent movmsk function to turn the vector mask - // into an i32 value + // into an i64 value std::vector mm; m->symbolTable->LookupFunction("__movmsk", &mm); if (g->target.maskBitCount == 1) @@ -1396,7 +1399,7 @@ FunctionEmitContext::AddInstrumentationPoint(const char *note) { args.push_back(lGetStringAsValue(bblock, note)); // arg 3: line number args.push_back(LLVMInt32(currentPos.first_line)); - // arg 4: current mask, movmsk'ed down to an int32 + // arg 4: current mask, movmsk'ed down to an int64 args.push_back(LaneMask(GetFullMask())); llvm::Function *finst = m->module->getFunction("ISPCInstrument"); @@ -3196,10 +3199,12 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, // pointer to be called. llvm::Value *currentMask = LoadInst(maskPtr); llvm::Function *cttz = - m->module->getFunction("__count_trailing_zeros_i32"); + m->module->getFunction("__count_trailing_zeros_i64"); AssertPos(currentPos, cttz != NULL); - llvm::Value *firstLane = CallInst(cttz, NULL, LaneMask(currentMask), - "first_lane"); + llvm::Value *firstLane64 = CallInst(cttz, NULL, LaneMask(currentMask), + "first_lane64"); + llvm::Value *firstLane = + TruncInst(firstLane64, LLVMTypes::Int32Type, "first_lane32"); // Get the pointer to the function we're going to call this // time through: ftpr = func[firstLane] diff --git a/ctx.h b/ctx.h index 304f8af1..10a22115 100644 --- a/ctx.h +++ b/ctx.h @@ -276,7 +276,7 @@ public: llvm::Value *None(llvm::Value *mask); /** Given a boolean mask value of type LLVMTypes::MaskType, return an - i32 value wherein the i'th bit is on if and only if the i'th lane + i64 value wherein the i'th bit is on if and only if the i'th lane of the mask is on. */ llvm::Value *LaneMask(llvm::Value *mask); diff --git a/docs/perfguide.rst b/docs/perfguide.rst index 6e8555bf..b8e65893 100644 --- a/docs/perfguide.rst +++ b/docs/perfguide.rst @@ -624,7 +624,7 @@ gathers happen.) extern "C" { void ISPCInstrument(const char *fn, const char *note, - int line, int mask); + int line, uint64_t mask); } This function is passed the file name of the ``ispc`` file running, a short @@ -637,7 +637,7 @@ as follows: :: - ISPCInstrument("foo.ispc", "function entry", 55, 0xf); + ISPCInstrument("foo.ispc", "function entry", 55, 0xfull); This call indicates that at the currently executing program has just entered the function defined at line 55 of the file ``foo.ispc``, with a diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index 57eba63f..80c2635c 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -311,8 +311,8 @@ INSERT_EXTRACT(__vec1_d, double) /////////////////////////////////////////////////////////////////////////// // mask ops -static FORCEINLINE uint32_t __movmsk(__vec16_i1 mask) { - return mask.v; +static FORCEINLINE uint64_t __movmsk(__vec16_i1 mask) { + return (uint64_t)mask.v; } static FORCEINLINE __vec16_i1 __equal(__vec16_i1 a, __vec16_i1 b) { diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index 5fe22b78..9f301bb7 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -224,8 +224,8 @@ CAST_BITS_SCALAR(double, int64_t) /////////////////////////////////////////////////////////////////////////// // mask ops -static FORCEINLINE uint32_t __movmsk(__vec4_i1 mask) { - return _mm_movemask_ps(mask.v); +static FORCEINLINE uint64_t __movmsk(__vec4_i1 mask) { + return (uint64_t)_mm_movemask_ps(mask.v); } static FORCEINLINE __vec4_i1 __equal(__vec4_i1 a, __vec4_i1 b) { diff --git a/ispc.h b/ispc.h index d0837110..4cbbce7d 100644 --- a/ispc.h +++ b/ispc.h @@ -61,7 +61,7 @@ /** @def ISPC_MAX_NVEC maximum vector size of any of the compliation targets. */ -#define ISPC_MAX_NVEC 32 +#define ISPC_MAX_NVEC 64 // Forward declarations of a number of widely-used LLVM types namespace llvm { diff --git a/module.cpp b/module.cpp index b5afc875..d16916be 100644 --- a/module.cpp +++ b/module.cpp @@ -1228,7 +1228,7 @@ Module::writeHeader(const char *fn) { if (g->emitInstrumentation) { fprintf(f, "#define ISPC_INSTRUMENTATION 1\n"); fprintf(f, "extern \"C\" {\n"); - fprintf(f, " void ISPCInstrument(const char *fn, const char *note, int line, int mask);\n"); + fprintf(f, " void ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask);\n"); fprintf(f, "}\n"); } diff --git a/opt.cpp b/opt.cpp index df4dd572..ce455d6f 100644 --- a/opt.cpp +++ b/opt.cpp @@ -269,12 +269,12 @@ lGEPInst(llvm::Value *ptr, llvm::Value *offset, const char *name, execution mask, convert it to a bitvector where the 0th bit corresponds to the first vector value and so forth. */ -static uint32_t +static uint64_t lConstElementsToMask(const llvm::SmallVector &elements) { - Assert(elements.size() <= 32); + Assert(elements.size() <= 64); - uint32_t mask = 0; + uint64_t mask = 0; for (unsigned int i = 0; i < elements.size(); ++i) { llvm::APInt intMaskValue; // SSE has the "interesting" approach of encoding blending @@ -293,7 +293,7 @@ lConstElementsToMask(const llvm::SmallVector 0) - mask |= (1 << i); + mask |= (1ull << i); } return mask; } @@ -306,7 +306,7 @@ lConstElementsToMask(const llvm::SmallVector, we have 0b1001 = 9. */ static bool -lGetMask(llvm::Value *factor, uint32_t *mask) { +lGetMask(llvm::Value *factor, uint64_t *mask) { #ifndef LLVM_3_0 llvm::ConstantDataVector *cdv = llvm::dyn_cast(factor); if (cdv != NULL) { @@ -364,7 +364,7 @@ enum MaskStatus { ALL_ON, ALL_OFF, MIXED, UNKNOWN }; */ static MaskStatus lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) { - uint32_t bits; + uint64_t bits; if (lGetMask(mask, &bits) == false) return UNKNOWN; @@ -373,7 +373,7 @@ lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) { if (vecWidth == -1) vecWidth = g->target.vectorWidth; - Assert(vecWidth <= 32); + Assert(vecWidth <= 64); for (int i = 0; i < vecWidth; ++i) { if ((bits & (1ull << i)) == 0) @@ -601,12 +601,12 @@ private: instruction for this optimization pass. */ struct BlendInstruction { - BlendInstruction(llvm::Function *f, uint32_t ao, int o0, int o1, int of) + BlendInstruction(llvm::Function *f, uint64_t ao, int o0, int o1, int of) : function(f), allOnMask(ao), op0(o0), op1(o1), opFactor(of) { } /** Function pointer for the blend instruction */ llvm::Function *function; /** Mask value for an "all on" mask for this instruction */ - uint32_t allOnMask; + uint64_t allOnMask; /** The operand number in the llvm CallInst corresponds to the first operand to blend with. */ int op0; @@ -728,7 +728,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } - uint32_t mask; + uint64_t mask; if (lGetMask(factor, &mask) == true) { llvm::Value *value = NULL; if (mask == 0) @@ -748,12 +748,13 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { } else if (matchesMaskInstruction(callInst->getCalledFunction())) { llvm::Value *factor = callInst->getArgOperand(0); - uint32_t mask; + uint64_t mask; if (lGetMask(factor, &mask) == true) { // If the vector-valued mask has a known value, replace it // with the corresponding integer mask from its elements // high bits. - llvm::Value *value = LLVMInt32(mask); + llvm::Value *value = (callInst->getType() == LLVMTypes::Int32Type) ? + LLVMInt32(mask) : LLVMInt64(mask); llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, value); modifiedAny = true; @@ -763,7 +764,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { else if (callInst->getCalledFunction() == avxMaskedLoad32 || callInst->getCalledFunction() == avxMaskedLoad64) { llvm::Value *factor = callInst->getArgOperand(1); - uint32_t mask; + uint64_t mask; if (lGetMask(factor, &mask) == true) { if (mask == 0) { // nothing being loaded, replace with undef value @@ -802,7 +803,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { callInst->getCalledFunction() == avxMaskedStore64) { // NOTE: mask is the 2nd parameter, not the 3rd one!! llvm::Value *factor = callInst->getArgOperand(1); - uint32_t mask; + uint64_t mask; if (lGetMask(factor, &mask) == true) { if (mask == 0) { // nothing actually being stored, just remove the inst @@ -931,7 +932,7 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { if (calledFunc == NULL || calledFunc != m->module->getFunction("__movmsk")) continue; - uint32_t mask; + uint64_t mask; if (lGetMask(callInst->getArgOperand(0), &mask) == true) { #if 0 fprintf(stderr, "mask %d\n", mask); @@ -939,7 +940,7 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { fprintf(stderr, "-----------\n"); #endif llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), - iter, LLVMInt32(mask)); + iter, LLVMInt64(mask)); modifiedAny = true; goto restart; } diff --git a/stdlib.ispc b/stdlib.ispc index 9b2fe17d..4cfcdea4 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -355,7 +355,8 @@ static inline uniform bool all(bool v) { #else int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask); #endif - return __movmsk(match) == (1 << programCount) - 1; + return __movmsk(match) == ((programCount == 64) ? ~0ull : + ((1ull << programCount) - 1)); } __declspec(safe) @@ -388,14 +389,14 @@ __declspec(safe) static inline uniform int popcnt(bool v) { // As with any() and all(), only count across the active lanes #ifdef ISPC_TARGET_GENERIC - return __popcnt_int32(__movmsk(v & __mask)); + return __popcnt_int64(__movmsk(v & __mask)); #else - return __popcnt_int32(__movmsk(__sext_varying_bool(v) & __mask)); + return __popcnt_int64(__movmsk(__sext_varying_bool(v) & __mask)); #endif } __declspec(safe) -static inline uniform int lanemask() { +static inline uniform unsigned int64 lanemask() { return __movmsk(__mask); } @@ -1615,12 +1616,12 @@ static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \ TA ret[programCount]; \ TA memVal; \ uniform int lastSwap; \ - uniform int mask = lanemask(); \ + uniform unsigned int64 mask = lanemask(); \ /* First, have the first running program instance (if any) perform \ the swap with memory with its value of "value"; record the \ value returned. */ \ for (; i < programCount; ++i) { \ - if ((mask & (1 << i)) == 0) \ + if ((mask & (1ull << i)) == 0) \ continue; \ memVal = __atomic_swap_uniform_##TB##_global(ptr, extract(value, i)); \ lastSwap = i; \ @@ -1632,7 +1633,7 @@ static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \ current instance had executed a hardware atomic swap right before \ the last one that did a swap. */ \ for (; i < programCount; ++i) { \ - if ((mask & (1 << i)) == 0) \ + if ((mask & (1ull << i)) == 0) \ continue; \ ret[lastSwap] = extract(value, i); \ lastSwap = i; \ diff --git a/stmt.cpp b/stmt.cpp index 6a6f58e5..11cc94ea 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -2843,7 +2843,7 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) { pos); // Compute the per lane mask to test the mask bits against: (1 << iter) - ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, 1, + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt64, 1ll, iterSym->pos); Expr *shiftLaneExpr = new BinaryExpr(BinaryExpr::Shl, oneExpr, symExpr, pos); @@ -2863,4 +2863,3 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) { // And return a for loop that wires it all together. return new ForStmt(initStmt, testExpr, stepStmt, laneCheckIf, false, pos); } - From 449d956966c05cb71a81ca0cbd54422240af4099 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 25 May 2012 11:50:39 -0700 Subject: [PATCH 159/173] Add support for generic-64 target. --- Makefile | 2 +- builtins.cpp | 9 ++++++++- builtins/target-generic-64.ll | 33 +++++++++++++++++++++++++++++++++ ispc.cpp | 9 +++++++++ ispc.vcxproj | 14 ++++++++++++++ 5 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 builtins/target-generic-64.ll diff --git a/Makefile b/Makefile index 23a2428a..0d61c611 100644 --- a/Makefile +++ b/Makefile @@ -85,7 +85,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \ HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ opt.h stmt.h sym.h type.h util.h TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \ - generic-16 generic-32 generic-1 + generic-16 generic-32 generic-64 generic-1 BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \ builtins/dispatch.ll BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \ diff --git a/builtins.cpp b/builtins.cpp index 14444f40..db55758a 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -854,6 +854,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod builtins_bitcode_generic_32_length, module, symbolTable); break; + case 64: + extern unsigned char builtins_bitcode_generic_64[]; + extern int builtins_bitcode_generic_64_length; + AddBitcodeToModule(builtins_bitcode_generic_64, + builtins_bitcode_generic_64_length, + module, symbolTable); + break; case 1: extern unsigned char builtins_bitcode_generic_1[]; extern int builtins_bitcode_generic_1_length; diff --git a/builtins/target-generic-64.ll b/builtins/target-generic-64.ll new file mode 100644 index 00000000..09443f8e --- /dev/null +++ b/builtins/target-generic-64.ll @@ -0,0 +1,33 @@ +;; Copyright (c) 2010-2012, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +define(`WIDTH',`64') +include(`target-generic-common.ll') diff --git a/ispc.cpp b/ispc.cpp index ac429cb9..341206c6 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -268,6 +268,15 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->hasHalf = true; t->hasTranscendentals = true; } + else if (!strcasecmp(isa, "generic-64")) { + t->isa = Target::GENERIC; + t->nativeVectorWidth = 64; + t->vectorWidth = 64; + t->maskingIsFree = true; + t->maskBitCount = 1; + t->hasHalf = true; + t->hasTranscendentals = true; + } else if (!strcasecmp(isa, "generic-1")) { t->isa = Target::GENERIC; t->nativeVectorWidth = 1; diff --git a/ispc.vcxproj b/ispc.vcxproj index 34ef9373..6478df4e 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -30,6 +30,7 @@ + @@ -278,6 +279,19 @@ Building gen-bitcode-generic-32.cpp + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll > gen-bitcode-generic-64.cpp + gen-bitcode-generic-64.cpp + builtins\util.m4;builtins\target-generic-common.ll + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll > gen-bitcode-generic-64.cpp + gen-bitcode-generic-64.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-64.cpp + Building gen-bitcode-generic-64.cpp + + Document From e8e9baa417ba9e10e8e426d4bbf465cb60c1c83a Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 25 May 2012 12:14:58 -0700 Subject: [PATCH 160/173] Update test_static.cpp to handle up to 64-wide --- test_static.cpp | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/test_static.cpp b/test_static.cpp index a8ec4a79..e798f960 100644 --- a/test_static.cpp +++ b/test_static.cpp @@ -102,15 +102,21 @@ void *ISPCAlloc(void **handle, int64_t size, int32_t alignment) { int main(int argc, char *argv[]) { int w = width(); - assert(w <= 16); + assert(w <= 64); - float returned_result[16]; - for (int i = 0; i < 16; ++i) + float returned_result[64]; + float vfloat[64]; + double vdouble[64]; + int vint[64], vint2[64]; + + for (int i = 0; i < 64; ++i) { returned_result[i] = -1e20; - float vfloat[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; - double vdouble[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; - int vint[16] = { 2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32 }; - int vint2[16] = { 5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}; + vfloat[i] = i+1; + vdouble[i] = i+1; + vint[i] = 2*(i+1); + vint2[i] = i+5; + } + float b = 5.; #if (TEST_SIG == 0) @@ -131,8 +137,8 @@ int main(int argc, char *argv[]) { #error "Unknown or unset TEST_SIG value" #endif - float expected_result[16]; - memset(expected_result, 0, 16*sizeof(float)); + float expected_result[64]; + memset(expected_result, 0, 64*sizeof(float)); result(expected_result); int errors = 0; From 7a2142075cede616ec6e99e255ad08686182f5b9 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 25 May 2012 12:37:59 -0700 Subject: [PATCH 161/173] Add examples/intrinsics/generic-32.h implementation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Roughly 100 tests fail with this; all the tests need to be audited for assumptions that 16 is the widest width possible… --- examples/intrinsics/generic-32.h | 1688 ++++++++++++++++++++++++++++++ run_tests.py | 7 +- 2 files changed, 1693 insertions(+), 2 deletions(-) create mode 100644 examples/intrinsics/generic-32.h diff --git a/examples/intrinsics/generic-32.h b/examples/intrinsics/generic-32.h new file mode 100644 index 00000000..d7d044ef --- /dev/null +++ b/examples/intrinsics/generic-32.h @@ -0,0 +1,1688 @@ +/* + Copyright (c) 2010-2012, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include + +#ifdef _MSC_VER +#define FORCEINLINE __forceinline +#define PRE_ALIGN(x) /*__declspec(align(x))*/ +#define POST_ALIGN(x) +#define roundf(x) (floorf(x + .5f)) +#define round(x) (floor(x + .5)) +#else +#define FORCEINLINE __attribute__((always_inline)) +#define PRE_ALIGN(x) +#define POST_ALIGN(x) __attribute__ ((aligned(x))) +#endif + +typedef float __vec1_f; +typedef double __vec1_d; +typedef int8_t __vec1_i8; +typedef int16_t __vec1_i16; +typedef int32_t __vec1_i32; +typedef int64_t __vec1_i64; + +struct __vec32_i1 { + __vec32_i1() { } + __vec32_i1(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3, + uint32_t v4, uint32_t v5, uint32_t v6, uint32_t v7, + uint32_t v8, uint32_t v9, uint32_t v10, uint32_t v11, + uint32_t v12, uint32_t v13, uint32_t v14, uint32_t v15, + uint32_t v16, uint32_t v17, uint32_t v18, uint32_t v19, + uint32_t v20, uint32_t v21, uint32_t v22, uint32_t v23, + uint32_t v24, uint32_t v25, uint32_t v26, uint32_t v27, + uint32_t v28, uint32_t v29, uint32_t v30, uint32_t v31) { + v = ((v0 & 1) | + ((v1 & 1) << 1) | + ((v2 & 1) << 2) | + ((v3 & 1) << 3) | + ((v4 & 1) << 4) | + ((v5 & 1) << 5) | + ((v6 & 1) << 6) | + ((v7 & 1) << 7) | + ((v8 & 1) << 8) | + ((v9 & 1) << 9) | + ((v10 & 1) << 10) | + ((v11 & 1) << 11) | + ((v12 & 1) << 12) | + ((v13 & 1) << 13) | + ((v14 & 1) << 14) | + ((v15 & 1) << 15) | + ((v16 & 1) << 16) | + ((v17 & 1) << 17) | + ((v18 & 1) << 18) | + ((v19 & 1) << 19) | + ((v20 & 1) << 20) | + ((v21 & 1) << 21) | + ((v22 & 1) << 22) | + ((v23 & 1) << 23) | + ((v24 & 1) << 24) | + ((v25 & 1) << 25) | + ((v26 & 1) << 26) | + ((v27 & 1) << 27) | + ((v28 & 1) << 28) | + ((v29 & 1) << 29) | + ((v30 & 1) << 30) | + ((v31 & 1) << 31)); + } + + uint32_t v; +}; + + +template +struct vec32 { + vec32() { } + vec32(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, + T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, + T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31) { + v[0] = v0; v[1] = v1; v[2] = v2; v[3] = v3; + v[4] = v4; v[5] = v5; v[6] = v6; v[7] = v7; + v[8] = v8; v[9] = v9; v[10] = v10; v[11] = v11; + v[12] = v12; v[13] = v13; v[14] = v14; v[15] = v15; + v[16] = v16; v[17] = v17; v[18] = v18; v[19] = v19; + v[20] = v20; v[21] = v21; v[22] = v22; v[23] = v23; + v[24] = v24; v[25] = v25; v[26] = v26; v[27] = v27; + v[28] = v28; v[29] = v29; v[30] = v30; v[31] = v31; + } + T v[32]; +}; + +PRE_ALIGN(64) struct __vec32_f : public vec32 { + __vec32_f() { } + __vec32_f(float v0, float v1, float v2, float v3, + float v4, float v5, float v6, float v7, + float v8, float v9, float v10, float v11, + float v12, float v13, float v14, float v15, + float v16, float v17, float v18, float v19, + float v20, float v21, float v22, float v23, + float v24, float v25, float v26, float v27, + float v28, float v29, float v30, float v31) + : vec32(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31) { } +} POST_ALIGN(64); + +PRE_ALIGN(128) struct __vec32_d : public vec32 { + __vec32_d() { } + __vec32_d(double v0, double v1, double v2, double v3, + double v4, double v5, double v6, double v7, + double v8, double v9, double v10, double v11, + double v12, double v13, double v14, double v15, + double v16, double v17, double v18, double v19, + double v20, double v21, double v22, double v23, + double v24, double v25, double v26, double v27, + double v28, double v29, double v30, double v31) + : vec32(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31) { } + +} POST_ALIGN(128); + +PRE_ALIGN(16) struct __vec32_i8 : public vec32 { + __vec32_i8() { } + __vec32_i8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, + int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, + int8_t v28, int8_t v29, int8_t v30, int8_t v31) + : vec32(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31) { } + +} POST_ALIGN(16); + +PRE_ALIGN(32) struct __vec32_i16 : public vec32 { + __vec32_i16() { } + __vec32_i16(int16_t v0, int16_t v1, int16_t v2, int16_t v3, + int16_t v4, int16_t v5, int16_t v6, int16_t v7, + int16_t v8, int16_t v9, int16_t v10, int16_t v11, + int16_t v12, int16_t v13, int16_t v14, int16_t v15, + int16_t v16, int16_t v17, int16_t v18, int16_t v19, + int16_t v20, int16_t v21, int16_t v22, int16_t v23, + int16_t v24, int16_t v25, int16_t v26, int16_t v27, + int16_t v28, int16_t v29, int16_t v30, int16_t v31) + : vec32(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31) { } + +} POST_ALIGN(32); + +PRE_ALIGN(64) struct __vec32_i32 : public vec32 { + __vec32_i32() { } + __vec32_i32(int32_t v0, int32_t v1, int32_t v2, int32_t v3, + int32_t v4, int32_t v5, int32_t v6, int32_t v7, + int32_t v8, int32_t v9, int32_t v10, int32_t v11, + int32_t v12, int32_t v13, int32_t v14, int32_t v15, + int32_t v16, int32_t v17, int32_t v18, int32_t v19, + int32_t v20, int32_t v21, int32_t v22, int32_t v23, + int32_t v24, int32_t v25, int32_t v26, int32_t v27, + int32_t v28, int32_t v29, int32_t v30, int32_t v31) + : vec32(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31) { } + +} POST_ALIGN(64); + +static inline int32_t __extract_element(__vec32_i32, int); + +PRE_ALIGN(128) struct __vec32_i64 : public vec32 { + __vec32_i64() { } + __vec32_i64(int64_t v0, int64_t v1, int64_t v2, int64_t v3, + int64_t v4, int64_t v5, int64_t v6, int64_t v7, + int64_t v8, int64_t v9, int64_t v10, int64_t v11, + int64_t v12, int64_t v13, int64_t v14, int64_t v15, + int64_t v16, int64_t v17, int64_t v18, int64_t v19, + int64_t v20, int64_t v21, int64_t v22, int64_t v23, + int64_t v24, int64_t v25, int64_t v26, int64_t v27, + int64_t v28, int64_t v29, int64_t v30, int64_t v31) + : vec32(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31) { } + +} POST_ALIGN(128); + +/////////////////////////////////////////////////////////////////////////// +// macros... + +#define UNARY_OP(TYPE, NAME, OP) \ +static FORCEINLINE TYPE NAME(TYPE v) { \ + TYPE ret; \ + for (int i = 0; i < 32; ++i) \ + ret.v[i] = OP(v.v[i]); \ + return ret; \ +} + +#define BINARY_OP(TYPE, NAME, OP) \ +static FORCEINLINE TYPE NAME(TYPE a, TYPE b) { \ + TYPE ret; \ + for (int i = 0; i < 32; ++i) \ + ret.v[i] = a.v[i] OP b.v[i]; \ + return ret; \ +} + +#define BINARY_OP_CAST(TYPE, CAST, NAME, OP) \ +static FORCEINLINE TYPE NAME(TYPE a, TYPE b) { \ + TYPE ret; \ + for (int i = 0; i < 32; ++i) \ + ret.v[i] = (CAST)(a.v[i]) OP (CAST)(b.v[i]); \ + return ret; \ +} + +#define BINARY_OP_FUNC(TYPE, NAME, FUNC) \ +static FORCEINLINE TYPE NAME(TYPE a, TYPE b) { \ + TYPE ret; \ + for (int i = 0; i < 32; ++i) \ + ret.v[i] = FUNC(a.v[i], b.v[i]); \ + return ret; \ +} + +#define CMP_OP(TYPE, CAST, NAME, OP) \ +static FORCEINLINE __vec32_i1 NAME(TYPE a, TYPE b) { \ + __vec32_i1 ret; \ + ret.v = 0; \ + for (int i = 0; i < 32; ++i) \ + ret.v |= ((CAST)(a.v[i]) OP (CAST)(b.v[i])) << i; \ + return ret; \ +} + +#define INSERT_EXTRACT(VTYPE, STYPE) \ +static FORCEINLINE STYPE __extract_element(VTYPE v, int index) { \ + return ((STYPE *)&v)[index]; \ +} \ +static FORCEINLINE void __insert_element(VTYPE *v, int index, STYPE val) { \ + ((STYPE *)v)[index] = val; \ +} + +#define LOAD_STORE(VTYPE, STYPE) \ +static FORCEINLINE VTYPE __load(VTYPE *p, int align) { \ + STYPE *ptr = (STYPE *)p; \ + VTYPE ret; \ + for (int i = 0; i < 32; ++i) \ + ret.v[i] = ptr[i]; \ + return ret; \ +} \ +static FORCEINLINE void __store(VTYPE *p, VTYPE v, int align) { \ + STYPE *ptr = (STYPE *)p; \ + for (int i = 0; i < 32; ++i) \ + ptr[i] = v.v[i]; \ +} + +#define REDUCE_ADD(TYPE, VTYPE, NAME) \ +static FORCEINLINE TYPE NAME(VTYPE v) { \ + TYPE ret = v.v[0]; \ + for (int i = 1; i < 32; ++i) \ + ret = ret + v.v[i]; \ + return ret; \ +} + +#define REDUCE_MINMAX(TYPE, VTYPE, NAME, OP) \ +static FORCEINLINE TYPE NAME(VTYPE v) { \ + TYPE ret = v.v[0]; \ + for (int i = 1; i < 32; ++i) \ + ret = (ret OP (TYPE)v.v[i]) ? ret : (TYPE)v.v[i]; \ + return ret; \ +} + +#define SELECT(TYPE) \ +static FORCEINLINE TYPE __select(__vec32_i1 mask, TYPE a, TYPE b) { \ + TYPE ret; \ + for (int i = 0; i < 32; ++i) \ + ret.v[i] = (mask.v & (1<v &= ~(1 << index); + else + vec->v |= (1 << index); +} + +static FORCEINLINE __vec32_i1 __load(__vec32_i1 *p, int align) { + uint16_t *ptr = (uint16_t *)p; + __vec32_i1 r; + r.v = *ptr; + return r; +} + +static FORCEINLINE void __store(__vec32_i1 *p, __vec32_i1 v, int align) { + uint16_t *ptr = (uint16_t *)p; + *ptr = v.v; +} + +static FORCEINLINE __vec32_i1 __smear_i1(__vec32_i1, int v) { + return __vec32_i1(v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v); +} + + +/////////////////////////////////////////////////////////////////////////// +// int8 + +BINARY_OP(__vec32_i8, __add, +) +BINARY_OP(__vec32_i8, __sub, -) +BINARY_OP(__vec32_i8, __mul, *) + +BINARY_OP(__vec32_i8, __or, |) +BINARY_OP(__vec32_i8, __and, &) +BINARY_OP(__vec32_i8, __xor, ^) +BINARY_OP(__vec32_i8, __shl, <<) + +BINARY_OP_CAST(__vec32_i8, uint8_t, __udiv, /) +BINARY_OP_CAST(__vec32_i8, int8_t, __sdiv, /) + +BINARY_OP_CAST(__vec32_i8, uint8_t, __urem, %) +BINARY_OP_CAST(__vec32_i8, int8_t, __srem, %) +BINARY_OP_CAST(__vec32_i8, uint8_t, __lshr, >>) +BINARY_OP_CAST(__vec32_i8, int8_t, __ashr, >>) + +SHIFT_UNIFORM(__vec32_i8, uint8_t, __lshr, >>) +SHIFT_UNIFORM(__vec32_i8, int8_t, __ashr, >>) +SHIFT_UNIFORM(__vec32_i8, int8_t, __shl, <<) + +CMP_OP(__vec32_i8, int8_t, __equal, ==) +CMP_OP(__vec32_i8, int8_t, __not_equal, !=) +CMP_OP(__vec32_i8, uint8_t, __unsigned_less_equal, <=) +CMP_OP(__vec32_i8, int8_t, __signed_less_equal, <=) +CMP_OP(__vec32_i8, uint8_t, __unsigned_greater_equal, >=) +CMP_OP(__vec32_i8, int8_t, __signed_greater_equal, >=) +CMP_OP(__vec32_i8, uint8_t, __unsigned_less_than, <) +CMP_OP(__vec32_i8, int8_t, __signed_less_than, <) +CMP_OP(__vec32_i8, uint8_t, __unsigned_greater_than, >) +CMP_OP(__vec32_i8, int8_t, __signed_greater_than, >) + +SELECT(__vec32_i8) +INSERT_EXTRACT(__vec32_i8, int8_t) +SMEAR(__vec32_i8, i8, int8_t) +BROADCAST(__vec32_i8, i8, int8_t) +ROTATE(__vec32_i8, i8, int8_t) +SHUFFLES(__vec32_i8, i8, int8_t) +LOAD_STORE(__vec32_i8, int8_t) + +/////////////////////////////////////////////////////////////////////////// +// int16 + +BINARY_OP(__vec32_i16, __add, +) +BINARY_OP(__vec32_i16, __sub, -) +BINARY_OP(__vec32_i16, __mul, *) + +BINARY_OP(__vec32_i16, __or, |) +BINARY_OP(__vec32_i16, __and, &) +BINARY_OP(__vec32_i16, __xor, ^) +BINARY_OP(__vec32_i16, __shl, <<) + +BINARY_OP_CAST(__vec32_i16, uint16_t, __udiv, /) +BINARY_OP_CAST(__vec32_i16, int16_t, __sdiv, /) + +BINARY_OP_CAST(__vec32_i16, uint16_t, __urem, %) +BINARY_OP_CAST(__vec32_i16, int16_t, __srem, %) +BINARY_OP_CAST(__vec32_i16, uint16_t, __lshr, >>) +BINARY_OP_CAST(__vec32_i16, int16_t, __ashr, >>) + +SHIFT_UNIFORM(__vec32_i16, uint16_t, __lshr, >>) +SHIFT_UNIFORM(__vec32_i16, int16_t, __ashr, >>) +SHIFT_UNIFORM(__vec32_i16, int16_t, __shl, <<) + +CMP_OP(__vec32_i16, int16_t, __equal, ==) +CMP_OP(__vec32_i16, int16_t, __not_equal, !=) +CMP_OP(__vec32_i16, uint16_t, __unsigned_less_equal, <=) +CMP_OP(__vec32_i16, int16_t, __signed_less_equal, <=) +CMP_OP(__vec32_i16, uint16_t, __unsigned_greater_equal, >=) +CMP_OP(__vec32_i16, int16_t, __signed_greater_equal, >=) +CMP_OP(__vec32_i16, uint16_t, __unsigned_less_than, <) +CMP_OP(__vec32_i16, int16_t, __signed_less_than, <) +CMP_OP(__vec32_i16, uint16_t, __unsigned_greater_than, >) +CMP_OP(__vec32_i16, int16_t, __signed_greater_than, >) + +SELECT(__vec32_i16) +INSERT_EXTRACT(__vec32_i16, int16_t) +SMEAR(__vec32_i16, i16, int16_t) +BROADCAST(__vec32_i16, i16, int16_t) +ROTATE(__vec32_i16, i16, int16_t) +SHUFFLES(__vec32_i16, i16, int16_t) +LOAD_STORE(__vec32_i16, int16_t) + +/////////////////////////////////////////////////////////////////////////// +// int32 + +BINARY_OP(__vec32_i32, __add, +) +BINARY_OP(__vec32_i32, __sub, -) +BINARY_OP(__vec32_i32, __mul, *) + +BINARY_OP(__vec32_i32, __or, |) +BINARY_OP(__vec32_i32, __and, &) +BINARY_OP(__vec32_i32, __xor, ^) +BINARY_OP(__vec32_i32, __shl, <<) + +BINARY_OP_CAST(__vec32_i32, uint32_t, __udiv, /) +BINARY_OP_CAST(__vec32_i32, int32_t, __sdiv, /) + +BINARY_OP_CAST(__vec32_i32, uint32_t, __urem, %) +BINARY_OP_CAST(__vec32_i32, int32_t, __srem, %) +BINARY_OP_CAST(__vec32_i32, uint32_t, __lshr, >>) +BINARY_OP_CAST(__vec32_i32, int32_t, __ashr, >>) + +SHIFT_UNIFORM(__vec32_i32, uint32_t, __lshr, >>) +SHIFT_UNIFORM(__vec32_i32, int32_t, __ashr, >>) +SHIFT_UNIFORM(__vec32_i32, int32_t, __shl, <<) + +CMP_OP(__vec32_i32, int32_t, __equal, ==) +CMP_OP(__vec32_i32, int32_t, __not_equal, !=) +CMP_OP(__vec32_i32, uint32_t, __unsigned_less_equal, <=) +CMP_OP(__vec32_i32, int32_t, __signed_less_equal, <=) +CMP_OP(__vec32_i32, uint32_t, __unsigned_greater_equal, >=) +CMP_OP(__vec32_i32, int32_t, __signed_greater_equal, >=) +CMP_OP(__vec32_i32, uint32_t, __unsigned_less_than, <) +CMP_OP(__vec32_i32, int32_t, __signed_less_than, <) +CMP_OP(__vec32_i32, uint32_t, __unsigned_greater_than, >) +CMP_OP(__vec32_i32, int32_t, __signed_greater_than, >) + +SELECT(__vec32_i32) +INSERT_EXTRACT(__vec32_i32, int32_t) +SMEAR(__vec32_i32, i32, int32_t) +BROADCAST(__vec32_i32, i32, int32_t) +ROTATE(__vec32_i32, i32, int32_t) +SHUFFLES(__vec32_i32, i32, int32_t) +LOAD_STORE(__vec32_i32, int32_t) + +/////////////////////////////////////////////////////////////////////////// +// int64 + +BINARY_OP(__vec32_i64, __add, +) +BINARY_OP(__vec32_i64, __sub, -) +BINARY_OP(__vec32_i64, __mul, *) + +BINARY_OP(__vec32_i64, __or, |) +BINARY_OP(__vec32_i64, __and, &) +BINARY_OP(__vec32_i64, __xor, ^) +BINARY_OP(__vec32_i64, __shl, <<) + +BINARY_OP_CAST(__vec32_i64, uint64_t, __udiv, /) +BINARY_OP_CAST(__vec32_i64, int64_t, __sdiv, /) + +BINARY_OP_CAST(__vec32_i64, uint64_t, __urem, %) +BINARY_OP_CAST(__vec32_i64, int64_t, __srem, %) +BINARY_OP_CAST(__vec32_i64, uint64_t, __lshr, >>) +BINARY_OP_CAST(__vec32_i64, int64_t, __ashr, >>) + +SHIFT_UNIFORM(__vec32_i64, uint64_t, __lshr, >>) +SHIFT_UNIFORM(__vec32_i64, int64_t, __ashr, >>) +SHIFT_UNIFORM(__vec32_i64, int64_t, __shl, <<) + +CMP_OP(__vec32_i64, int64_t, __equal, ==) +CMP_OP(__vec32_i64, int64_t, __not_equal, !=) +CMP_OP(__vec32_i64, uint64_t, __unsigned_less_equal, <=) +CMP_OP(__vec32_i64, int64_t, __signed_less_equal, <=) +CMP_OP(__vec32_i64, uint64_t, __unsigned_greater_equal, >=) +CMP_OP(__vec32_i64, int64_t, __signed_greater_equal, >=) +CMP_OP(__vec32_i64, uint64_t, __unsigned_less_than, <) +CMP_OP(__vec32_i64, int64_t, __signed_less_than, <) +CMP_OP(__vec32_i64, uint64_t, __unsigned_greater_than, >) +CMP_OP(__vec32_i64, int64_t, __signed_greater_than, >) + +SELECT(__vec32_i64) +INSERT_EXTRACT(__vec32_i64, int64_t) +SMEAR(__vec32_i64, i64, int64_t) +BROADCAST(__vec32_i64, i64, int64_t) +ROTATE(__vec32_i64, i64, int64_t) +SHUFFLES(__vec32_i64, i64, int64_t) +LOAD_STORE(__vec32_i64, int64_t) + +/////////////////////////////////////////////////////////////////////////// +// float + +BINARY_OP(__vec32_f, __add, +) +BINARY_OP(__vec32_f, __sub, -) +BINARY_OP(__vec32_f, __mul, *) +BINARY_OP(__vec32_f, __div, /) + +CMP_OP(__vec32_f, float, __equal, ==) +CMP_OP(__vec32_f, float, __not_equal, !=) +CMP_OP(__vec32_f, float, __less_than, <) +CMP_OP(__vec32_f, float, __less_equal, <=) +CMP_OP(__vec32_f, float, __greater_than, >) +CMP_OP(__vec32_f, float, __greater_equal, >=) + +static FORCEINLINE __vec32_i1 __ordered(__vec32_f a, __vec32_f b) { + __vec32_i1 ret; + ret.v = 0; + for (int i = 0; i < 32; ++i) + ret.v |= ((a.v[i] == a.v[i]) && (b.v[i] == b.v[i])) ? (1 << i) : 0; + return ret; +} + +#if 0 + case Instruction::FRem: intrinsic = "__frem"; break; +#endif + +SELECT(__vec32_f) +INSERT_EXTRACT(__vec32_f, float) +SMEAR(__vec32_f, float, float) +BROADCAST(__vec32_f, float, float) +ROTATE(__vec32_f, float, float) +SHUFFLES(__vec32_f, float, float) +LOAD_STORE(__vec32_f, float) + +static FORCEINLINE float __exp_uniform_float(float v) { + return expf(v); +} + +static FORCEINLINE __vec32_f __exp_varying_float(__vec32_f v) { + __vec32_f ret; + for (int i = 0; i < 32; ++i) + ret.v[i] = expf(v.v[i]); + return ret; +} + +static FORCEINLINE float __log_uniform_float(float v) { + return logf(v); +} + +static FORCEINLINE __vec32_f __log_varying_float(__vec32_f v) { + __vec32_f ret; + for (int i = 0; i < 32; ++i) + ret.v[i] = logf(v.v[i]); + return ret; +} + +static FORCEINLINE float __pow_uniform_float(float a, float b) { + return powf(a, b); +} + +static FORCEINLINE __vec32_f __pow_varying_float(__vec32_f a, __vec32_f b) { + __vec32_f ret; + for (int i = 0; i < 32; ++i) + ret.v[i] = powf(a.v[i], b.v[i]); + return ret; +} + +static FORCEINLINE int __intbits(float v) { + union { + float f; + int i; + } u; + u.f = v; + return u.i; +} + +static FORCEINLINE float __floatbits(int v) { + union { + float f; + int i; + } u; + u.i = v; + return u.f; +} + +static FORCEINLINE float __half_to_float_uniform(int16_t h) { + static const uint32_t shifted_exp = 0x7c00 << 13; // exponent mask after shift + + int32_t o = ((int32_t)(h & 0x7fff)) << 13; // exponent/mantissa bits + uint32_t exp = shifted_exp & o; // just the exponent + o += (127 - 15) << 23; // exponent adjust + + // handle exponent special cases + if (exp == shifted_exp) // Inf/NaN? + o += (128 - 16) << 23; // extra exp adjust + else if (exp == 0) { // Zero/Denormal? + o += 1 << 23; // extra exp adjust + o = __intbits(__floatbits(o) - __floatbits(113 << 23)); // renormalize + } + + o |= ((int32_t)(h & 0x8000)) << 16; // sign bit + return __floatbits(o); +} + + +static FORCEINLINE __vec32_f __half_to_float_varying(__vec32_i16 v) { + __vec32_f ret; + for (int i = 0; i < 32; ++i) + ret.v[i] = __half_to_float_uniform(v.v[i]); + return ret; +} + + +static FORCEINLINE int16_t __float_to_half_uniform(float f) { + uint32_t sign_mask = 0x80000000u; + int32_t o; + + int32_t fint = __intbits(f); + int32_t sign = fint & sign_mask; + fint ^= sign; + + int32_t f32infty = 255 << 23; + o = (fint > f32infty) ? 0x7e00 : 0x7c00; + + // (De)normalized number or zero + // update fint unconditionally to save the blending; we don't need it + // anymore for the Inf/NaN case anyway. + const uint32_t round_mask = ~0xfffu; + const int32_t magic = 15 << 23; + const int32_t f16infty = 31 << 23; + + int32_t fint2 = __intbits(__floatbits(fint & round_mask) * __floatbits(magic)) - round_mask; + fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed + + if (fint < f32infty) + o = fint2 >> 13; // Take the bits! + + return (o | (sign >> 16)); +} + + +static FORCEINLINE __vec32_i16 __float_to_half_varying(__vec32_f v) { + __vec32_i16 ret; + for (int i = 0; i < 32; ++i) + ret.v[i] = __float_to_half_uniform(v.v[i]); + return ret; +} + + +/////////////////////////////////////////////////////////////////////////// +// double + +BINARY_OP(__vec32_d, __add, +) +BINARY_OP(__vec32_d, __sub, -) +BINARY_OP(__vec32_d, __mul, *) +BINARY_OP(__vec32_d, __div, /) + +CMP_OP(__vec32_d, double, __equal, ==) +CMP_OP(__vec32_d, double, __not_equal, !=) +CMP_OP(__vec32_d, double, __less_than, <) +CMP_OP(__vec32_d, double, __less_equal, <=) +CMP_OP(__vec32_d, double, __greater_than, >) +CMP_OP(__vec32_d, double, __greater_equal, >=) + +static FORCEINLINE __vec32_i1 __ordered(__vec32_d a, __vec32_d b) { + __vec32_i1 ret; + ret.v = 0; + for (int i = 0; i < 32; ++i) + ret.v |= ((a.v[i] == a.v[i]) && (b.v[i] == b.v[i])) ? (1 << i) : 0; + return ret; +} + +#if 0 + case Instruction::FRem: intrinsic = "__frem"; break; +#endif + +SELECT(__vec32_d) +INSERT_EXTRACT(__vec32_d, double) +SMEAR(__vec32_d, double, double) +BROADCAST(__vec32_d, double, double) +ROTATE(__vec32_d, double, double) +SHUFFLES(__vec32_d, double, double) +LOAD_STORE(__vec32_d, double) + +/////////////////////////////////////////////////////////////////////////// +// casts + + +#define CAST(TO, STO, FROM, SFROM, FUNC) \ +static FORCEINLINE TO FUNC(TO, FROM val) { \ + TO ret; \ + for (int i = 0; i < 32; ++i) \ + ret.v[i] = (STO)((SFROM)(val.v[i])); \ + return ret; \ +} + +// sign extension conversions +CAST(__vec32_i64, int64_t, __vec32_i32, int32_t, __cast_sext) +CAST(__vec32_i64, int64_t, __vec32_i16, int16_t, __cast_sext) +CAST(__vec32_i64, int64_t, __vec32_i8, int8_t, __cast_sext) +CAST(__vec32_i32, int32_t, __vec32_i16, int16_t, __cast_sext) +CAST(__vec32_i32, int32_t, __vec32_i8, int8_t, __cast_sext) +CAST(__vec32_i16, int16_t, __vec32_i8, int8_t, __cast_sext) + +#define CAST_SEXT_I1(TYPE) \ +static FORCEINLINE TYPE __cast_sext(TYPE, __vec32_i1 v) { \ + TYPE ret; \ + for (int i = 0; i < 32; ++i) { \ + ret.v[i] = 0; \ + if (v.v & (1 << i)) \ + ret.v[i] = ~ret.v[i]; \ + } \ + return ret; \ +} + +CAST_SEXT_I1(__vec32_i8) +CAST_SEXT_I1(__vec32_i16) +CAST_SEXT_I1(__vec32_i32) +CAST_SEXT_I1(__vec32_i64) + +// zero extension +CAST(__vec32_i64, uint64_t, __vec32_i32, uint32_t, __cast_zext) +CAST(__vec32_i64, uint64_t, __vec32_i16, uint16_t, __cast_zext) +CAST(__vec32_i64, uint64_t, __vec32_i8, uint8_t, __cast_zext) +CAST(__vec32_i32, uint32_t, __vec32_i16, uint16_t, __cast_zext) +CAST(__vec32_i32, uint32_t, __vec32_i8, uint8_t, __cast_zext) +CAST(__vec32_i16, uint16_t, __vec32_i8, uint8_t, __cast_zext) + +#define CAST_ZEXT_I1(TYPE) \ +static FORCEINLINE TYPE __cast_zext(TYPE, __vec32_i1 v) { \ + TYPE ret; \ + for (int i = 0; i < 32; ++i) \ + ret.v[i] = (v.v & (1 << i)) ? 1 : 0; \ + return ret; \ +} + +CAST_ZEXT_I1(__vec32_i8) +CAST_ZEXT_I1(__vec32_i16) +CAST_ZEXT_I1(__vec32_i32) +CAST_ZEXT_I1(__vec32_i64) + +// truncations +CAST(__vec32_i32, int32_t, __vec32_i64, int64_t, __cast_trunc) +CAST(__vec32_i16, int16_t, __vec32_i64, int64_t, __cast_trunc) +CAST(__vec32_i8, int8_t, __vec32_i64, int64_t, __cast_trunc) +CAST(__vec32_i16, int16_t, __vec32_i32, int32_t, __cast_trunc) +CAST(__vec32_i8, int8_t, __vec32_i32, int32_t, __cast_trunc) +CAST(__vec32_i8, int8_t, __vec32_i16, int16_t, __cast_trunc) + +// signed int to float/double +CAST(__vec32_f, float, __vec32_i8, int8_t, __cast_sitofp) +CAST(__vec32_f, float, __vec32_i16, int16_t, __cast_sitofp) +CAST(__vec32_f, float, __vec32_i32, int32_t, __cast_sitofp) +CAST(__vec32_f, float, __vec32_i64, int64_t, __cast_sitofp) +CAST(__vec32_d, double, __vec32_i8, int8_t, __cast_sitofp) +CAST(__vec32_d, double, __vec32_i16, int16_t, __cast_sitofp) +CAST(__vec32_d, double, __vec32_i32, int32_t, __cast_sitofp) +CAST(__vec32_d, double, __vec32_i64, int64_t, __cast_sitofp) + +// unsigned int to float/double +CAST(__vec32_f, float, __vec32_i8, uint8_t, __cast_uitofp) +CAST(__vec32_f, float, __vec32_i16, uint16_t, __cast_uitofp) +CAST(__vec32_f, float, __vec32_i32, uint32_t, __cast_uitofp) +CAST(__vec32_f, float, __vec32_i64, uint64_t, __cast_uitofp) +CAST(__vec32_d, double, __vec32_i8, uint8_t, __cast_uitofp) +CAST(__vec32_d, double, __vec32_i16, uint16_t, __cast_uitofp) +CAST(__vec32_d, double, __vec32_i32, uint32_t, __cast_uitofp) +CAST(__vec32_d, double, __vec32_i64, uint64_t, __cast_uitofp) + +static FORCEINLINE __vec32_f __cast_uitofp(__vec32_f, __vec32_i1 v) { + __vec32_f ret; + for (int i = 0; i < 32; ++i) + ret.v[i] = (v.v & (1 << i)) ? 1. : 0.; + return ret; +} + +// float/double to signed int +CAST(__vec32_i8, int8_t, __vec32_f, float, __cast_fptosi) +CAST(__vec32_i16, int16_t, __vec32_f, float, __cast_fptosi) +CAST(__vec32_i32, int32_t, __vec32_f, float, __cast_fptosi) +CAST(__vec32_i64, int64_t, __vec32_f, float, __cast_fptosi) +CAST(__vec32_i8, int8_t, __vec32_d, double, __cast_fptosi) +CAST(__vec32_i16, int16_t, __vec32_d, double, __cast_fptosi) +CAST(__vec32_i32, int32_t, __vec32_d, double, __cast_fptosi) +CAST(__vec32_i64, int64_t, __vec32_d, double, __cast_fptosi) + +// float/double to unsigned int +CAST(__vec32_i8, uint8_t, __vec32_f, float, __cast_fptoui) +CAST(__vec32_i16, uint16_t, __vec32_f, float, __cast_fptoui) +CAST(__vec32_i32, uint32_t, __vec32_f, float, __cast_fptoui) +CAST(__vec32_i64, uint64_t, __vec32_f, float, __cast_fptoui) +CAST(__vec32_i8, uint8_t, __vec32_d, double, __cast_fptoui) +CAST(__vec32_i16, uint16_t, __vec32_d, double, __cast_fptoui) +CAST(__vec32_i32, uint32_t, __vec32_d, double, __cast_fptoui) +CAST(__vec32_i64, uint64_t, __vec32_d, double, __cast_fptoui) + +// float/double conversions +CAST(__vec32_f, float, __vec32_d, double, __cast_fptrunc) +CAST(__vec32_d, double, __vec32_f, float, __cast_fpext) + +typedef union { + int32_t i32; + float f; + int64_t i64; + double d; +} BitcastUnion; + +#define CAST_BITS(TO, TO_ELT, FROM, FROM_ELT) \ +static FORCEINLINE TO __cast_bits(TO, FROM val) { \ + TO r; \ + for (int i = 0; i < 32; ++i) { \ + BitcastUnion u; \ + u.FROM_ELT = val.v[i]; \ + r.v[i] = u.TO_ELT; \ + } \ + return r; \ +} + +CAST_BITS(__vec32_f, f, __vec32_i32, i32) +CAST_BITS(__vec32_i32, i32, __vec32_f, f) +CAST_BITS(__vec32_d, d, __vec32_i64, i64) +CAST_BITS(__vec32_i64, i64, __vec32_d, d) + +#define CAST_BITS_SCALAR(TO, FROM) \ +static FORCEINLINE TO __cast_bits(TO, FROM v) { \ + union { \ + TO to; \ + FROM from; \ + } u; \ + u.from = v; \ + return u.to; \ +} + +CAST_BITS_SCALAR(uint32_t, float) +CAST_BITS_SCALAR(int32_t, float) +CAST_BITS_SCALAR(float, uint32_t) +CAST_BITS_SCALAR(float, int32_t) +CAST_BITS_SCALAR(uint64_t, double) +CAST_BITS_SCALAR(int64_t, double) +CAST_BITS_SCALAR(double, uint64_t) +CAST_BITS_SCALAR(double, int64_t) + +/////////////////////////////////////////////////////////////////////////// +// various math functions + +static FORCEINLINE void __fastmath() { +} + +static FORCEINLINE float __round_uniform_float(float v) { + return roundf(v); +} + +static FORCEINLINE float __floor_uniform_float(float v) { + return floorf(v); +} + +static FORCEINLINE float __ceil_uniform_float(float v) { + return ceilf(v); +} + +static FORCEINLINE double __round_uniform_double(double v) { + return round(v); +} + +static FORCEINLINE double __floor_uniform_double(double v) { + return floor(v); +} + +static FORCEINLINE double __ceil_uniform_double(double v) { + return ceil(v); +} + +UNARY_OP(__vec32_f, __round_varying_float, roundf) +UNARY_OP(__vec32_f, __floor_varying_float, floorf) +UNARY_OP(__vec32_f, __ceil_varying_float, ceilf) +UNARY_OP(__vec32_d, __round_varying_double, round) +UNARY_OP(__vec32_d, __floor_varying_double, floor) +UNARY_OP(__vec32_d, __ceil_varying_double, ceil) + +// min/max + +static FORCEINLINE float __min_uniform_float(float a, float b) { return (ab) ? a : b; } +static FORCEINLINE double __min_uniform_double(double a, double b) { return (ab) ? a : b; } + +static FORCEINLINE int32_t __min_uniform_int32(int32_t a, int32_t b) { return (ab) ? a : b; } +static FORCEINLINE int32_t __min_uniform_uint32(uint32_t a, uint32_t b) { return (ab) ? a : b; } + +static FORCEINLINE int64_t __min_uniform_int64(int64_t a, int64_t b) { return (ab) ? a : b; } +static FORCEINLINE int64_t __min_uniform_uint64(uint64_t a, uint64_t b) { return (ab) ? a : b; } + + +BINARY_OP_FUNC(__vec32_f, __max_varying_float, __max_uniform_float) +BINARY_OP_FUNC(__vec32_f, __min_varying_float, __min_uniform_float) +BINARY_OP_FUNC(__vec32_d, __max_varying_double, __max_uniform_double) +BINARY_OP_FUNC(__vec32_d, __min_varying_double, __min_uniform_double) + +BINARY_OP_FUNC(__vec32_i32, __max_varying_int32, __max_uniform_int32) +BINARY_OP_FUNC(__vec32_i32, __min_varying_int32, __min_uniform_int32) +BINARY_OP_FUNC(__vec32_i32, __max_varying_uint32, __max_uniform_uint32) +BINARY_OP_FUNC(__vec32_i32, __min_varying_uint32, __min_uniform_uint32) + +BINARY_OP_FUNC(__vec32_i64, __max_varying_int64, __max_uniform_int64) +BINARY_OP_FUNC(__vec32_i64, __min_varying_int64, __min_uniform_int64) +BINARY_OP_FUNC(__vec32_i64, __max_varying_uint64, __max_uniform_uint64) +BINARY_OP_FUNC(__vec32_i64, __min_varying_uint64, __min_uniform_uint64) + +// sqrt/rsqrt/rcp + +static FORCEINLINE float __rsqrt_uniform_float(float v) { + return 1.f / sqrtf(v); +} + +static FORCEINLINE float __rcp_uniform_float(float v) { + return 1.f / v; +} + +static FORCEINLINE float __sqrt_uniform_float(float v) { + return sqrtf(v); +} + +static FORCEINLINE double __sqrt_uniform_double(double v) { + return sqrt(v); +} + +UNARY_OP(__vec32_f, __rcp_varying_float, __rcp_uniform_float) +UNARY_OP(__vec32_f, __rsqrt_varying_float, __rsqrt_uniform_float) +UNARY_OP(__vec32_f, __sqrt_varying_float, __sqrt_uniform_float) +UNARY_OP(__vec32_d, __sqrt_varying_double, __sqrt_uniform_double) + +/////////////////////////////////////////////////////////////////////////// +// bit ops + +static FORCEINLINE int32_t __popcnt_int32(uint32_t v) { + int count = 0; + for (; v != 0; v >>= 1) + count += (v & 1); + return count; +} + +static FORCEINLINE int32_t __popcnt_int64(uint64_t v) { + int count = 0; + for (; v != 0; v >>= 1) + count += (v & 1); + return count; +} + +static FORCEINLINE int32_t __count_trailing_zeros_i32(uint32_t v) { + if (v == 0) + return 32; + + int count = 0; + while ((v & 1) == 0) { + ++count; + v >>= 1; + } + return count; +} + +static FORCEINLINE int64_t __count_trailing_zeros_i64(uint64_t v) { + if (v == 0) + return 64; + + int count = 0; + while ((v & 1) == 0) { + ++count; + v >>= 1; + } + return count; +} + +static FORCEINLINE int32_t __count_leading_zeros_i32(uint32_t v) { + if (v == 0) + return 32; + + int count = 0; + while ((v & (1<<31)) == 0) { + ++count; + v <<= 1; + } + return count; +} + +static FORCEINLINE int64_t __count_leading_zeros_i64(uint64_t v) { + if (v == 0) + return 64; + + int count = 0; + while ((v & (1ull<<63)) == 0) { + ++count; + v <<= 1; + } + return count; +} + +/////////////////////////////////////////////////////////////////////////// +// reductions + +REDUCE_ADD(float, __vec32_f, __reduce_add_float) +REDUCE_MINMAX(float, __vec32_f, __reduce_min_float, <) +REDUCE_MINMAX(float, __vec32_f, __reduce_max_float, >) + +REDUCE_ADD(double, __vec32_d, __reduce_add_double) +REDUCE_MINMAX(double, __vec32_d, __reduce_min_double, <) +REDUCE_MINMAX(double, __vec32_d, __reduce_max_double, >) + +REDUCE_ADD(uint32_t, __vec32_i32, __reduce_add_int32) +REDUCE_MINMAX(int32_t, __vec32_i32, __reduce_min_int32, <) +REDUCE_MINMAX(int32_t, __vec32_i32, __reduce_max_int32, >) + +REDUCE_ADD(uint32_t, __vec32_i32, __reduce_add_uint32) +REDUCE_MINMAX(uint32_t, __vec32_i32, __reduce_min_uint32, <) +REDUCE_MINMAX(uint32_t, __vec32_i32, __reduce_max_uint32, >) + +REDUCE_ADD(uint64_t, __vec32_i64, __reduce_add_int64) +REDUCE_MINMAX(int64_t, __vec32_i64, __reduce_min_int64, <) +REDUCE_MINMAX(int64_t, __vec32_i64, __reduce_max_int64, >) + +REDUCE_ADD(uint64_t, __vec32_i64, __reduce_add_uint64) +REDUCE_MINMAX(uint64_t, __vec32_i64, __reduce_min_uint64, <) +REDUCE_MINMAX(uint64_t, __vec32_i64, __reduce_max_uint64, >) + +/////////////////////////////////////////////////////////////////////////// +// masked load/store + +static FORCEINLINE __vec32_i8 __masked_load_8(void *p, + __vec32_i1 mask) { + __vec32_i8 ret; + int8_t *ptr = (int8_t *)p; + for (int i = 0; i < 32; ++i) + if ((mask.v & (1 << i)) != 0) + ret.v[i] = ptr[i]; + return ret; +} + +static FORCEINLINE __vec32_i16 __masked_load_16(void *p, + __vec32_i1 mask) { + __vec32_i16 ret; + int16_t *ptr = (int16_t *)p; + for (int i = 0; i < 32; ++i) + if ((mask.v & (1 << i)) != 0) + ret.v[i] = ptr[i]; + return ret; +} + +static FORCEINLINE __vec32_i32 __masked_load_32(void *p, + __vec32_i1 mask) { + __vec32_i32 ret; + int32_t *ptr = (int32_t *)p; + for (int i = 0; i < 32; ++i) + if ((mask.v & (1 << i)) != 0) + ret.v[i] = ptr[i]; + return ret; +} + +static FORCEINLINE __vec32_i64 __masked_load_64(void *p, + __vec32_i1 mask) { + __vec32_i64 ret; + int64_t *ptr = (int64_t *)p; + for (int i = 0; i < 32; ++i) + if ((mask.v & (1 << i)) != 0) + ret.v[i] = ptr[i]; + return ret; +} + +static FORCEINLINE void __masked_store_8(void *p, __vec32_i8 val, + __vec32_i1 mask) { + int8_t *ptr = (int8_t *)p; + for (int i = 0; i < 32; ++i) + if ((mask.v & (1 << i)) != 0) + ptr[i] = val.v[i]; +} + +static FORCEINLINE void __masked_store_16(void *p, __vec32_i16 val, + __vec32_i1 mask) { + int16_t *ptr = (int16_t *)p; + for (int i = 0; i < 32; ++i) + if ((mask.v & (1 << i)) != 0) + ptr[i] = val.v[i]; +} + +static FORCEINLINE void __masked_store_32(void *p, __vec32_i32 val, + __vec32_i1 mask) { + int32_t *ptr = (int32_t *)p; + for (int i = 0; i < 32; ++i) + if ((mask.v & (1 << i)) != 0) + ptr[i] = val.v[i]; +} + +static FORCEINLINE void __masked_store_64(void *p, __vec32_i64 val, + __vec32_i1 mask) { + int64_t *ptr = (int64_t *)p; + for (int i = 0; i < 32; ++i) + if ((mask.v & (1 << i)) != 0) + ptr[i] = val.v[i]; +} + +static FORCEINLINE void __masked_store_blend_8(void *p, __vec32_i8 val, + __vec32_i1 mask) { + __masked_store_8(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_16(void *p, __vec32_i16 val, + __vec32_i1 mask) { + __masked_store_16(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_32(void *p, __vec32_i32 val, + __vec32_i1 mask) { + __masked_store_32(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_64(void *p, __vec32_i64 val, + __vec32_i1 mask) { + __masked_store_64(p, val, mask); +} + +/////////////////////////////////////////////////////////////////////////// +// gather/scatter + +// offsets * offsetScale is in bytes (for all of these) + +#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \ +static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \ + uint32_t scale, OTYPE constOffset, \ + __vec32_i1 mask) { \ + VTYPE ret; \ + int8_t *base = (int8_t *)b; \ + for (int i = 0; i < 32; ++i) \ + if ((mask.v & (1 << i)) != 0) { \ + STYPE *ptr = (STYPE *)(base + scale * varyingOffset.v[i] + \ + constOffset.v[i]); \ + ret.v[i] = *ptr; \ + } \ + return ret; \ +} + + +GATHER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i32, __gather_base_offsets32_i8) +GATHER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i64, __gather_base_offsets64_i8) +GATHER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i32, __gather_base_offsets32_i16) +GATHER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i64, __gather_base_offsets64_i16) +GATHER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i32, __gather_base_offsets32_i32) +GATHER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i64, __gather_base_offsets64_i32) +GATHER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i32, __gather_base_offsets32_i64) +GATHER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i64, __gather_base_offsets64_i64) + +#define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \ +static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec32_i1 mask) { \ + VTYPE ret; \ + for (int i = 0; i < 32; ++i) \ + if ((mask.v & (1 << i)) != 0) { \ + STYPE *ptr = (STYPE *)ptrs.v[i]; \ + ret.v[i] = *ptr; \ + } \ + return ret; \ +} + +GATHER_GENERAL(__vec32_i8, int8_t, __vec32_i32, __gather32_i8) +GATHER_GENERAL(__vec32_i8, int8_t, __vec32_i64, __gather64_i8) +GATHER_GENERAL(__vec32_i16, int16_t, __vec32_i32, __gather32_i16) +GATHER_GENERAL(__vec32_i16, int16_t, __vec32_i64, __gather64_i16) +GATHER_GENERAL(__vec32_i32, int32_t, __vec32_i32, __gather32_i32) +GATHER_GENERAL(__vec32_i32, int32_t, __vec32_i64, __gather64_i32) +GATHER_GENERAL(__vec32_i64, int64_t, __vec32_i32, __gather32_i64) +GATHER_GENERAL(__vec32_i64, int64_t, __vec32_i64, __gather64_i64) + +// scatter + +#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \ +static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \ + uint32_t scale, OTYPE constOffset, \ + VTYPE val, __vec32_i1 mask) { \ + int8_t *base = (int8_t *)b; \ + for (int i = 0; i < 32; ++i) \ + if ((mask.v & (1 << i)) != 0) { \ + STYPE *ptr = (STYPE *)(base + scale * varyingOffset.v[i] + \ + constOffset.v[i]); \ + *ptr = val.v[i]; \ + } \ +} + + +SCATTER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i32, __scatter_base_offsets32_i8) +SCATTER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i64, __scatter_base_offsets64_i8) +SCATTER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i32, __scatter_base_offsets32_i16) +SCATTER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i64, __scatter_base_offsets64_i16) +SCATTER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i32, __scatter_base_offsets32_i32) +SCATTER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i64, __scatter_base_offsets64_i32) +SCATTER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i32, __scatter_base_offsets32_i64) +SCATTER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i64, __scatter_base_offsets64_i64) + +#define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \ +static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec32_i1 mask) { \ + VTYPE ret; \ + for (int i = 0; i < 32; ++i) \ + if ((mask.v & (1 << i)) != 0) { \ + STYPE *ptr = (STYPE *)ptrs.v[i]; \ + *ptr = val.v[i]; \ + } \ +} + +SCATTER_GENERAL(__vec32_i8, int8_t, __vec32_i32, __scatter32_i8) +SCATTER_GENERAL(__vec32_i8, int8_t, __vec32_i64, __scatter64_i8) +SCATTER_GENERAL(__vec32_i16, int16_t, __vec32_i32, __scatter32_i16) +SCATTER_GENERAL(__vec32_i16, int16_t, __vec32_i64, __scatter64_i16) +SCATTER_GENERAL(__vec32_i32, int32_t, __vec32_i32, __scatter32_i32) +SCATTER_GENERAL(__vec32_i32, int32_t, __vec32_i64, __scatter64_i32) +SCATTER_GENERAL(__vec32_i64, int64_t, __vec32_i32, __scatter32_i64) +SCATTER_GENERAL(__vec32_i64, int64_t, __vec32_i64, __scatter64_i64) + +/////////////////////////////////////////////////////////////////////////// +// packed load/store + +static FORCEINLINE int32_t __packed_load_active(int32_t *ptr, __vec32_i32 *val, + __vec32_i1 mask) { + int count = 0; + for (int i = 0; i < 32; ++i) { + if ((mask.v & (1 << i)) != 0) { + val->v[i] = *ptr++; + ++count; + } + } + return count; +} + + +static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec32_i32 val, + __vec32_i1 mask) { + int count = 0; + for (int i = 0; i < 32; ++i) { + if ((mask.v & (1 << i)) != 0) { + *ptr++ = val.v[i]; + ++count; + } + } + return count; +} + +static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, + __vec32_i32 *val, + __vec32_i1 mask) { + int count = 0; + for (int i = 0; i < 32; ++i) { + if ((mask.v & (1 << i)) != 0) { + val->v[i] = *ptr++; + ++count; + } + } + return count; +} + + +static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr, + __vec32_i32 val, + __vec32_i1 mask) { + int count = 0; + for (int i = 0; i < 32; ++i) { + if ((mask.v & (1 << i)) != 0) { + *ptr++ = val.v[i]; + ++count; + } + } + return count; +} + + +/////////////////////////////////////////////////////////////////////////// +// aos/soa + +static FORCEINLINE void __soa_to_aos3_float(__vec32_f v0, __vec32_f v1, __vec32_f v2, + float *ptr) { + for (int i = 0; i < 32; ++i) { + *ptr++ = __extract_element(v0, i); + *ptr++ = __extract_element(v1, i); + *ptr++ = __extract_element(v2, i); + } +} + +static FORCEINLINE void __aos_to_soa3_float(float *ptr, __vec32_f *out0, __vec32_f *out1, + __vec32_f *out2) { + for (int i = 0; i < 32; ++i) { + __insert_element(out0, i, *ptr++); + __insert_element(out1, i, *ptr++); + __insert_element(out2, i, *ptr++); + } +} + +static FORCEINLINE void __soa_to_aos4_float(__vec32_f v0, __vec32_f v1, __vec32_f v2, + __vec32_f v3, float *ptr) { + for (int i = 0; i < 32; ++i) { + *ptr++ = __extract_element(v0, i); + *ptr++ = __extract_element(v1, i); + *ptr++ = __extract_element(v2, i); + *ptr++ = __extract_element(v3, i); + } +} + +static FORCEINLINE void __aos_to_soa4_float(float *ptr, __vec32_f *out0, __vec32_f *out1, + __vec32_f *out2, __vec32_f *out3) { + for (int i = 0; i < 32; ++i) { + __insert_element(out0, i, *ptr++); + __insert_element(out1, i, *ptr++); + __insert_element(out2, i, *ptr++); + __insert_element(out3, i, *ptr++); + } +} + +/////////////////////////////////////////////////////////////////////////// +// prefetch + +static FORCEINLINE void __prefetch_read_uniform_1(unsigned char *) { +} + +static FORCEINLINE void __prefetch_read_uniform_2(unsigned char *) { +} + +static FORCEINLINE void __prefetch_read_uniform_3(unsigned char *) { +} + +static FORCEINLINE void __prefetch_read_uniform_nt(unsigned char *) { +} + +/////////////////////////////////////////////////////////////////////////// +// atomics + +static FORCEINLINE uint32_t __atomic_add(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedAdd((LONG volatile *)p, v) - v; +#else + return __sync_fetch_and_add(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_sub(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedAdd((LONG volatile *)p, -v) + v; +#else + return __sync_fetch_and_sub(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_and(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedAnd((LONG volatile *)p, v); +#else + return __sync_fetch_and_and(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_or(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedOr((LONG volatile *)p, v); +#else + return __sync_fetch_and_or(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_xor(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedXor((LONG volatile *)p, v); +#else + return __sync_fetch_and_xor(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_min(uint32_t *p, uint32_t v) { + int32_t old, min; + do { + old = *((volatile int32_t *)p); + min = (old < (int32_t)v) ? old : (int32_t)v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange((LONG volatile *)p, min, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, min) == false); +#endif + return old; +} + +static FORCEINLINE uint32_t __atomic_max(uint32_t *p, uint32_t v) { + int32_t old, max; + do { + old = *((volatile int32_t *)p); + max = (old > (int32_t)v) ? old : (int32_t)v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange((LONG volatile *)p, max, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, max) == false); +#endif + return old; +} + +static FORCEINLINE uint32_t __atomic_umin(uint32_t *p, uint32_t v) { + uint32_t old, min; + do { + old = *((volatile uint32_t *)p); + min = (old < v) ? old : v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange((LONG volatile *)p, min, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, min) == false); +#endif + return old; +} + +static FORCEINLINE uint32_t __atomic_umax(uint32_t *p, uint32_t v) { + uint32_t old, max; + do { + old = *((volatile uint32_t *)p); + max = (old > v) ? old : v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange((LONG volatile *)p, max, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, max) == false); +#endif + return old; +} + +static FORCEINLINE uint32_t __atomic_xchg(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedExchange((LONG volatile *)p, v); +#else + return __sync_lock_test_and_set(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_cmpxchg(uint32_t *p, uint32_t cmpval, + uint32_t newval) { +#ifdef _MSC_VER + return InterlockedCompareExchange((LONG volatile *)p, newval, cmpval); +#else + return __sync_val_compare_and_swap(p, cmpval, newval); +#endif +} + +static FORCEINLINE uint64_t __atomic_add(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedAdd64((LONGLONG volatile *)p, v) - v; +#else + return __sync_fetch_and_add(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_sub(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedAdd64((LONGLONG volatile *)p, -v) + v; +#else + return __sync_fetch_and_sub(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_and(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedAnd64((LONGLONG volatile *)p, v) - v; +#else + return __sync_fetch_and_and(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_or(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedOr64((LONGLONG volatile *)p, v) - v; +#else + return __sync_fetch_and_or(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_xor(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedXor64((LONGLONG volatile *)p, v) - v; +#else + return __sync_fetch_and_xor(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_min(uint64_t *p, uint64_t v) { + int64_t old, min; + do { + old = *((volatile int64_t *)p); + min = (old < (int64_t)v) ? old : (int64_t)v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange64((LONGLONG volatile *)p, min, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, min) == false); +#endif + return old; +} + +static FORCEINLINE uint64_t __atomic_max(uint64_t *p, uint64_t v) { + int64_t old, max; + do { + old = *((volatile int64_t *)p); + max = (old > (int64_t)v) ? old : (int64_t)v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange64((LONGLONG volatile *)p, max, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, max) == false); +#endif + return old; +} + +static FORCEINLINE uint64_t __atomic_umin(uint64_t *p, uint64_t v) { + uint64_t old, min; + do { + old = *((volatile uint64_t *)p); + min = (old < v) ? old : v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange64((LONGLONG volatile *)p, min, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, min) == false); +#endif + return old; +} + +static FORCEINLINE uint64_t __atomic_umax(uint64_t *p, uint64_t v) { + uint64_t old, max; + do { + old = *((volatile uint64_t *)p); + max = (old > v) ? old : v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange64((LONGLONG volatile *)p, max, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, max) == false); +#endif + return old; +} + +static FORCEINLINE uint64_t __atomic_xchg(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedExchange64((LONGLONG volatile *)p, v); +#else + return __sync_lock_test_and_set(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_cmpxchg(uint64_t *p, uint64_t cmpval, + uint64_t newval) { +#ifdef _MSC_VER + return InterlockedCompareExchange64((LONGLONG volatile *)p, newval, cmpval); +#else + return __sync_val_compare_and_swap(p, cmpval, newval); +#endif +} diff --git a/run_tests.py b/run_tests.py index 6bfadf05..03fda1ad 100755 --- a/run_tests.py +++ b/run_tests.py @@ -76,8 +76,11 @@ if is_generic_target and options.include_file == None: sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-16.h\n") options.include_file = "examples/intrinsics/generic-16.h" elif options.target == "generic-32": - sys.stderr.write("No generics #include specified and no default available for \"generic-32\" target.\n") - sys.exit(1) + sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-32.h\n") + options.include_file = "examples/intrinsics/generic-32.h" + elif options.target == "generic-64": + sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-64.h\n") + options.include_file = "examples/intrinsics/generic-64.h" if options.compiler_exe == None: if is_windows: From 6c7bcf00e73bf8ab125e4643c86080b30ce8135e Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 25 May 2012 14:27:19 -0700 Subject: [PATCH 162/173] Add examples/intrinsics/generic-64.h. --- examples/intrinsics/generic-64.h | 1817 ++++++++++++++++++++++++++++++ 1 file changed, 1817 insertions(+) create mode 100644 examples/intrinsics/generic-64.h diff --git a/examples/intrinsics/generic-64.h b/examples/intrinsics/generic-64.h new file mode 100644 index 00000000..08ae20d0 --- /dev/null +++ b/examples/intrinsics/generic-64.h @@ -0,0 +1,1817 @@ +/* + Copyright (c) 2010-2012, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include + +#ifdef _MSC_VER +#define FORCEINLINE __forceinline +#define PRE_ALIGN(x) /*__declspec(align(x))*/ +#define POST_ALIGN(x) +#define roundf(x) (floorf(x + .5f)) +#define round(x) (floor(x + .5)) +#else +#define FORCEINLINE __attribute__((always_inline)) +#define PRE_ALIGN(x) +#define POST_ALIGN(x) __attribute__ ((aligned(x))) +#endif + +typedef float __vec1_f; +typedef double __vec1_d; +typedef int8_t __vec1_i8; +typedef int16_t __vec1_i16; +typedef int32_t __vec1_i32; +typedef int64_t __vec1_i64; + +struct __vec64_i1 { + __vec64_i1() { } + __vec64_i1(uint64_t v0, uint64_t v1, uint64_t v2, uint64_t v3, + uint64_t v4, uint64_t v5, uint64_t v6, uint64_t v7, + uint64_t v8, uint64_t v9, uint64_t v10, uint64_t v11, + uint64_t v12, uint64_t v13, uint64_t v14, uint64_t v15, + uint64_t v16, uint64_t v17, uint64_t v18, uint64_t v19, + uint64_t v20, uint64_t v21, uint64_t v22, uint64_t v23, + uint64_t v24, uint64_t v25, uint64_t v26, uint64_t v27, + uint64_t v28, uint64_t v29, uint64_t v30, uint64_t v31, + uint64_t v32, uint64_t v33, uint64_t v34, uint64_t v35, + uint64_t v36, uint64_t v37, uint64_t v38, uint64_t v39, + uint64_t v40, uint64_t v41, uint64_t v42, uint64_t v43, + uint64_t v44, uint64_t v45, uint64_t v46, uint64_t v47, + uint64_t v48, uint64_t v49, uint64_t v50, uint64_t v51, + uint64_t v52, uint64_t v53, uint64_t v54, uint64_t v55, + uint64_t v56, uint64_t v57, uint64_t v58, uint64_t v59, + uint64_t v60, uint64_t v61, uint64_t v62, uint64_t v63) { + v = ((v0 & 1) | + ((v1 & 1) << 1) | + ((v2 & 1) << 2) | + ((v3 & 1) << 3) | + ((v4 & 1) << 4) | + ((v5 & 1) << 5) | + ((v6 & 1) << 6) | + ((v7 & 1) << 7) | + ((v8 & 1) << 8) | + ((v9 & 1) << 9) | + ((v10 & 1) << 10) | + ((v11 & 1) << 11) | + ((v12 & 1) << 12) | + ((v13 & 1) << 13) | + ((v14 & 1) << 14) | + ((v15 & 1) << 15) | + ((v16 & 1) << 16) | + ((v17 & 1) << 17) | + ((v18 & 1) << 18) | + ((v19 & 1) << 19) | + ((v20 & 1) << 20) | + ((v21 & 1) << 21) | + ((v22 & 1) << 22) | + ((v23 & 1) << 23) | + ((v24 & 1) << 24) | + ((v25 & 1) << 25) | + ((v26 & 1) << 26) | + ((v27 & 1) << 27) | + ((v28 & 1) << 28) | + ((v29 & 1) << 29) | + ((v30 & 1) << 30) | + ((v31 & 1) << 31) | + ((v32 & 1) << 32) | + ((v33 & 1) << 33) | + ((v34 & 1) << 34) | + ((v35 & 1) << 35) | + ((v36 & 1) << 36) | + ((v37 & 1) << 37) | + ((v38 & 1) << 38) | + ((v39 & 1) << 39) | + ((v40 & 1) << 40) | + ((v41 & 1) << 41) | + ((v42 & 1) << 42) | + ((v43 & 1) << 43) | + ((v44 & 1) << 44) | + ((v45 & 1) << 45) | + ((v46 & 1) << 46) | + ((v47 & 1) << 47) | + ((v48 & 1) << 48) | + ((v49 & 1) << 49) | + ((v50 & 1) << 50) | + ((v51 & 1) << 51) | + ((v52 & 1) << 52) | + ((v53 & 1) << 53) | + ((v54 & 1) << 54) | + ((v55 & 1) << 55) | + ((v56 & 1) << 56) | + ((v57 & 1) << 57) | + ((v58 & 1) << 58) | + ((v59 & 1) << 59) | + ((v60 & 1) << 60) | + ((v61 & 1) << 61) | + ((v62 & 1) << 62) | + ((v63 & 1) << 63)); + } + + uint64_t v; +}; + + +template +struct vec64 { + vec64() { } + vec64(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, + T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, + T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31, + T v32, T v33, T v34, T v35, T v36, T v37, T v38, T v39, + T v40, T v41, T v42, T v43, T v44, T v45, T v46, T v47, + T v48, T v49, T v50, T v51, T v52, T v53, T v54, T v55, + T v56, T v57, T v58, T v59, T v60, T v61, T v62, T v63) { + v[0] = v0; v[1] = v1; v[2] = v2; v[3] = v3; + v[4] = v4; v[5] = v5; v[6] = v6; v[7] = v7; + v[8] = v8; v[9] = v9; v[10] = v10; v[11] = v11; + v[12] = v12; v[13] = v13; v[14] = v14; v[15] = v15; + v[16] = v16; v[17] = v17; v[18] = v18; v[19] = v19; + v[20] = v20; v[21] = v21; v[22] = v22; v[23] = v23; + v[24] = v24; v[25] = v25; v[26] = v26; v[27] = v27; + v[28] = v28; v[29] = v29; v[30] = v30; v[31] = v31; + v[32] = v32; v[33] = v33; v[34] = v34; v[35] = v35; + v[36] = v36; v[37] = v37; v[38] = v38; v[39] = v39; + v[40] = v40; v[41] = v41; v[42] = v42; v[43] = v43; + v[44] = v44; v[45] = v45; v[46] = v46; v[47] = v47; + v[48] = v48; v[49] = v49; v[50] = v50; v[51] = v51; + v[52] = v52; v[53] = v53; v[54] = v54; v[55] = v55; + v[56] = v56; v[57] = v57; v[58] = v58; v[59] = v59; + v[60] = v60; v[61] = v61; v[62] = v62; v[63] = v63; + } + T v[64]; +}; + +PRE_ALIGN(64) struct __vec64_f : public vec64 { + __vec64_f() { } + __vec64_f(float v0, float v1, float v2, float v3, + float v4, float v5, float v6, float v7, + float v8, float v9, float v10, float v11, + float v12, float v13, float v14, float v15, + float v16, float v17, float v18, float v19, + float v20, float v21, float v22, float v23, + float v24, float v25, float v26, float v27, + float v28, float v29, float v30, float v31, + float v32, float v33, float v34, float v35, + float v36, float v37, float v38, float v39, + float v40, float v41, float v42, float v43, + float v44, float v45, float v46, float v47, + float v48, float v49, float v50, float v51, + float v52, float v53, float v54, float v55, + float v56, float v57, float v58, float v59, + float v60, float v61, float v62, float v63) + : vec64(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, + v32, v33, v34, v35, v36, v37, v38, v39, + v40, v41, v42, v43, v44, v45, v46, v47, + v48, v49, v50, v51, v52, v53, v54, v55, + v56, v57, v58, v59, v60, v61, v62, v63) { } + +} POST_ALIGN(64); + +PRE_ALIGN(128) struct __vec64_d : public vec64 { + __vec64_d() { } + __vec64_d(double v0, double v1, double v2, double v3, + double v4, double v5, double v6, double v7, + double v8, double v9, double v10, double v11, + double v12, double v13, double v14, double v15, + double v16, double v17, double v18, double v19, + double v20, double v21, double v22, double v23, + double v24, double v25, double v26, double v27, + double v28, double v29, double v30, double v31, + double v32, double v33, double v34, double v35, + double v36, double v37, double v38, double v39, + double v40, double v41, double v42, double v43, + double v44, double v45, double v46, double v47, + double v48, double v49, double v50, double v51, + double v52, double v53, double v54, double v55, + double v56, double v57, double v58, double v59, + double v60, double v61, double v62, double v63) + : vec64(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, + v32, v33, v34, v35, v36, v37, v38, v39, + v40, v41, v42, v43, v44, v45, v46, v47, + v48, v49, v50, v51, v52, v53, v54, v55, + v56, v57, v58, v59, v60, v61, v62, v63) { } + +} POST_ALIGN(128); + +PRE_ALIGN(16) struct __vec64_i8 : public vec64 { + __vec64_i8() { } + __vec64_i8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, + int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, + int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, + int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, + int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, + int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, + int8_t v60, int8_t v61, int8_t v62, int8_t v63) + : vec64(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, + v32, v33, v34, v35, v36, v37, v38, v39, + v40, v41, v42, v43, v44, v45, v46, v47, + v48, v49, v50, v51, v52, v53, v54, v55, + v56, v57, v58, v59, v60, v61, v62, v63) { } + +} POST_ALIGN(16); + +PRE_ALIGN(32) struct __vec64_i16 : public vec64 { + __vec64_i16() { } + __vec64_i16(int16_t v0, int16_t v1, int16_t v2, int16_t v3, + int16_t v4, int16_t v5, int16_t v6, int16_t v7, + int16_t v8, int16_t v9, int16_t v10, int16_t v11, + int16_t v12, int16_t v13, int16_t v14, int16_t v15, + int16_t v16, int16_t v17, int16_t v18, int16_t v19, + int16_t v20, int16_t v21, int16_t v22, int16_t v23, + int16_t v24, int16_t v25, int16_t v26, int16_t v27, + int16_t v28, int16_t v29, int16_t v30, int16_t v31, + int16_t v32, int16_t v33, int16_t v34, int16_t v35, + int16_t v36, int16_t v37, int16_t v38, int16_t v39, + int16_t v40, int16_t v41, int16_t v42, int16_t v43, + int16_t v44, int16_t v45, int16_t v46, int16_t v47, + int16_t v48, int16_t v49, int16_t v50, int16_t v51, + int16_t v52, int16_t v53, int16_t v54, int16_t v55, + int16_t v56, int16_t v57, int16_t v58, int16_t v59, + int16_t v60, int16_t v61, int16_t v62, int16_t v63) + : vec64(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, + v32, v33, v34, v35, v36, v37, v38, v39, + v40, v41, v42, v43, v44, v45, v46, v47, + v48, v49, v50, v51, v52, v53, v54, v55, + v56, v57, v58, v59, v60, v61, v62, v63) { } + +} POST_ALIGN(32); + +PRE_ALIGN(64) struct __vec64_i32 : public vec64 { + __vec64_i32() { } + __vec64_i32(int32_t v0, int32_t v1, int32_t v2, int32_t v3, + int32_t v4, int32_t v5, int32_t v6, int32_t v7, + int32_t v8, int32_t v9, int32_t v10, int32_t v11, + int32_t v12, int32_t v13, int32_t v14, int32_t v15, + int32_t v16, int32_t v17, int32_t v18, int32_t v19, + int32_t v20, int32_t v21, int32_t v22, int32_t v23, + int32_t v24, int32_t v25, int32_t v26, int32_t v27, + int32_t v28, int32_t v29, int32_t v30, int32_t v31, + int32_t v32, int32_t v33, int32_t v34, int32_t v35, + int32_t v36, int32_t v37, int32_t v38, int32_t v39, + int32_t v40, int32_t v41, int32_t v42, int32_t v43, + int32_t v44, int32_t v45, int32_t v46, int32_t v47, + int32_t v48, int32_t v49, int32_t v50, int32_t v51, + int32_t v52, int32_t v53, int32_t v54, int32_t v55, + int32_t v56, int32_t v57, int32_t v58, int32_t v59, + int32_t v60, int32_t v61, int32_t v62, int32_t v63) + : vec64(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, + v32, v33, v34, v35, v36, v37, v38, v39, + v40, v41, v42, v43, v44, v45, v46, v47, + v48, v49, v50, v51, v52, v53, v54, v55, + v56, v57, v58, v59, v60, v61, v62, v63) { } + +} POST_ALIGN(64); + +static inline int32_t __extract_element(__vec64_i32, int); + +PRE_ALIGN(128) struct __vec64_i64 : public vec64 { + __vec64_i64() { } + __vec64_i64(int64_t v0, int64_t v1, int64_t v2, int64_t v3, + int64_t v4, int64_t v5, int64_t v6, int64_t v7, + int64_t v8, int64_t v9, int64_t v10, int64_t v11, + int64_t v12, int64_t v13, int64_t v14, int64_t v15, + int64_t v16, int64_t v17, int64_t v18, int64_t v19, + int64_t v20, int64_t v21, int64_t v22, int64_t v23, + int64_t v24, int64_t v25, int64_t v26, int64_t v27, + int64_t v28, int64_t v29, int64_t v30, int64_t v31, + int64_t v32, int64_t v33, int64_t v34, int64_t v35, + int64_t v36, int64_t v37, int64_t v38, int64_t v39, + int64_t v40, int64_t v41, int64_t v42, int64_t v43, + int64_t v44, int64_t v45, int64_t v46, int64_t v47, + int64_t v48, int64_t v49, int64_t v50, int64_t v51, + int64_t v52, int64_t v53, int64_t v54, int64_t v55, + int64_t v56, int64_t v57, int64_t v58, int64_t v59, + int64_t v60, int64_t v61, int64_t v62, int64_t v63) + : vec64(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, + v32, v33, v34, v35, v36, v37, v38, v39, + v40, v41, v42, v43, v44, v45, v46, v47, + v48, v49, v50, v51, v52, v53, v54, v55, + v56, v57, v58, v59, v60, v61, v62, v63) { } + +} POST_ALIGN(128); + +/////////////////////////////////////////////////////////////////////////// +// macros... + +#define UNARY_OP(TYPE, NAME, OP) \ +static FORCEINLINE TYPE NAME(TYPE v) { \ + TYPE ret; \ + for (int i = 0; i < 64; ++i) \ + ret.v[i] = OP(v.v[i]); \ + return ret; \ +} + +#define BINARY_OP(TYPE, NAME, OP) \ +static FORCEINLINE TYPE NAME(TYPE a, TYPE b) { \ + TYPE ret; \ + for (int i = 0; i < 64; ++i) \ + ret.v[i] = a.v[i] OP b.v[i]; \ + return ret; \ +} + +#define BINARY_OP_CAST(TYPE, CAST, NAME, OP) \ +static FORCEINLINE TYPE NAME(TYPE a, TYPE b) { \ + TYPE ret; \ + for (int i = 0; i < 64; ++i) \ + ret.v[i] = (CAST)(a.v[i]) OP (CAST)(b.v[i]); \ + return ret; \ +} + +#define BINARY_OP_FUNC(TYPE, NAME, FUNC) \ +static FORCEINLINE TYPE NAME(TYPE a, TYPE b) { \ + TYPE ret; \ + for (int i = 0; i < 64; ++i) \ + ret.v[i] = FUNC(a.v[i], b.v[i]); \ + return ret; \ +} + +#define CMP_OP(TYPE, CAST, NAME, OP) \ +static FORCEINLINE __vec64_i1 NAME(TYPE a, TYPE b) { \ + __vec64_i1 ret; \ + ret.v = 0; \ + for (int i = 0; i < 64; ++i) \ + ret.v |= ((CAST)(a.v[i]) OP (CAST)(b.v[i])) << i; \ + return ret; \ +} + +#define INSERT_EXTRACT(VTYPE, STYPE) \ +static FORCEINLINE STYPE __extract_element(VTYPE v, int index) { \ + return ((STYPE *)&v)[index]; \ +} \ +static FORCEINLINE void __insert_element(VTYPE *v, int index, STYPE val) { \ + ((STYPE *)v)[index] = val; \ +} + +#define LOAD_STORE(VTYPE, STYPE) \ +static FORCEINLINE VTYPE __load(VTYPE *p, int align) { \ + STYPE *ptr = (STYPE *)p; \ + VTYPE ret; \ + for (int i = 0; i < 64; ++i) \ + ret.v[i] = ptr[i]; \ + return ret; \ +} \ +static FORCEINLINE void __store(VTYPE *p, VTYPE v, int align) { \ + STYPE *ptr = (STYPE *)p; \ + for (int i = 0; i < 64; ++i) \ + ptr[i] = v.v[i]; \ +} + +#define REDUCE_ADD(TYPE, VTYPE, NAME) \ +static FORCEINLINE TYPE NAME(VTYPE v) { \ + TYPE ret = v.v[0]; \ + for (int i = 1; i < 64; ++i) \ + ret = ret + v.v[i]; \ + return ret; \ +} + +#define REDUCE_MINMAX(TYPE, VTYPE, NAME, OP) \ +static FORCEINLINE TYPE NAME(VTYPE v) { \ + TYPE ret = v.v[0]; \ + for (int i = 1; i < 64; ++i) \ + ret = (ret OP (TYPE)v.v[i]) ? ret : (TYPE)v.v[i]; \ + return ret; \ +} + +#define SELECT(TYPE) \ +static FORCEINLINE TYPE __select(__vec64_i1 mask, TYPE a, TYPE b) { \ + TYPE ret; \ + for (int i = 0; i < 64; ++i) \ + ret.v[i] = (mask.v & (1<v &= ~(1 << index); + else + vec->v |= (1 << index); +} + +static FORCEINLINE __vec64_i1 __load(__vec64_i1 *p, int align) { + uint16_t *ptr = (uint16_t *)p; + __vec64_i1 r; + r.v = *ptr; + return r; +} + +static FORCEINLINE void __store(__vec64_i1 *p, __vec64_i1 v, int align) { + uint16_t *ptr = (uint16_t *)p; + *ptr = v.v; +} + +static FORCEINLINE __vec64_i1 __smear_i1(__vec64_i1, int v) { + return __vec64_i1(v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v); +} + + +/////////////////////////////////////////////////////////////////////////// +// int8 + +BINARY_OP(__vec64_i8, __add, +) +BINARY_OP(__vec64_i8, __sub, -) +BINARY_OP(__vec64_i8, __mul, *) + +BINARY_OP(__vec64_i8, __or, |) +BINARY_OP(__vec64_i8, __and, &) +BINARY_OP(__vec64_i8, __xor, ^) +BINARY_OP(__vec64_i8, __shl, <<) + +BINARY_OP_CAST(__vec64_i8, uint8_t, __udiv, /) +BINARY_OP_CAST(__vec64_i8, int8_t, __sdiv, /) + +BINARY_OP_CAST(__vec64_i8, uint8_t, __urem, %) +BINARY_OP_CAST(__vec64_i8, int8_t, __srem, %) +BINARY_OP_CAST(__vec64_i8, uint8_t, __lshr, >>) +BINARY_OP_CAST(__vec64_i8, int8_t, __ashr, >>) + +SHIFT_UNIFORM(__vec64_i8, uint8_t, __lshr, >>) +SHIFT_UNIFORM(__vec64_i8, int8_t, __ashr, >>) +SHIFT_UNIFORM(__vec64_i8, int8_t, __shl, <<) + +CMP_OP(__vec64_i8, int8_t, __equal, ==) +CMP_OP(__vec64_i8, int8_t, __not_equal, !=) +CMP_OP(__vec64_i8, uint8_t, __unsigned_less_equal, <=) +CMP_OP(__vec64_i8, int8_t, __signed_less_equal, <=) +CMP_OP(__vec64_i8, uint8_t, __unsigned_greater_equal, >=) +CMP_OP(__vec64_i8, int8_t, __signed_greater_equal, >=) +CMP_OP(__vec64_i8, uint8_t, __unsigned_less_than, <) +CMP_OP(__vec64_i8, int8_t, __signed_less_than, <) +CMP_OP(__vec64_i8, uint8_t, __unsigned_greater_than, >) +CMP_OP(__vec64_i8, int8_t, __signed_greater_than, >) + +SELECT(__vec64_i8) +INSERT_EXTRACT(__vec64_i8, int8_t) +SMEAR(__vec64_i8, i8, int8_t) +BROADCAST(__vec64_i8, i8, int8_t) +ROTATE(__vec64_i8, i8, int8_t) +SHUFFLES(__vec64_i8, i8, int8_t) +LOAD_STORE(__vec64_i8, int8_t) + +/////////////////////////////////////////////////////////////////////////// +// int16 + +BINARY_OP(__vec64_i16, __add, +) +BINARY_OP(__vec64_i16, __sub, -) +BINARY_OP(__vec64_i16, __mul, *) + +BINARY_OP(__vec64_i16, __or, |) +BINARY_OP(__vec64_i16, __and, &) +BINARY_OP(__vec64_i16, __xor, ^) +BINARY_OP(__vec64_i16, __shl, <<) + +BINARY_OP_CAST(__vec64_i16, uint16_t, __udiv, /) +BINARY_OP_CAST(__vec64_i16, int16_t, __sdiv, /) + +BINARY_OP_CAST(__vec64_i16, uint16_t, __urem, %) +BINARY_OP_CAST(__vec64_i16, int16_t, __srem, %) +BINARY_OP_CAST(__vec64_i16, uint16_t, __lshr, >>) +BINARY_OP_CAST(__vec64_i16, int16_t, __ashr, >>) + +SHIFT_UNIFORM(__vec64_i16, uint16_t, __lshr, >>) +SHIFT_UNIFORM(__vec64_i16, int16_t, __ashr, >>) +SHIFT_UNIFORM(__vec64_i16, int16_t, __shl, <<) + +CMP_OP(__vec64_i16, int16_t, __equal, ==) +CMP_OP(__vec64_i16, int16_t, __not_equal, !=) +CMP_OP(__vec64_i16, uint16_t, __unsigned_less_equal, <=) +CMP_OP(__vec64_i16, int16_t, __signed_less_equal, <=) +CMP_OP(__vec64_i16, uint16_t, __unsigned_greater_equal, >=) +CMP_OP(__vec64_i16, int16_t, __signed_greater_equal, >=) +CMP_OP(__vec64_i16, uint16_t, __unsigned_less_than, <) +CMP_OP(__vec64_i16, int16_t, __signed_less_than, <) +CMP_OP(__vec64_i16, uint16_t, __unsigned_greater_than, >) +CMP_OP(__vec64_i16, int16_t, __signed_greater_than, >) + +SELECT(__vec64_i16) +INSERT_EXTRACT(__vec64_i16, int16_t) +SMEAR(__vec64_i16, i16, int16_t) +BROADCAST(__vec64_i16, i16, int16_t) +ROTATE(__vec64_i16, i16, int16_t) +SHUFFLES(__vec64_i16, i16, int16_t) +LOAD_STORE(__vec64_i16, int16_t) + +/////////////////////////////////////////////////////////////////////////// +// int32 + +BINARY_OP(__vec64_i32, __add, +) +BINARY_OP(__vec64_i32, __sub, -) +BINARY_OP(__vec64_i32, __mul, *) + +BINARY_OP(__vec64_i32, __or, |) +BINARY_OP(__vec64_i32, __and, &) +BINARY_OP(__vec64_i32, __xor, ^) +BINARY_OP(__vec64_i32, __shl, <<) + +BINARY_OP_CAST(__vec64_i32, uint32_t, __udiv, /) +BINARY_OP_CAST(__vec64_i32, int32_t, __sdiv, /) + +BINARY_OP_CAST(__vec64_i32, uint32_t, __urem, %) +BINARY_OP_CAST(__vec64_i32, int32_t, __srem, %) +BINARY_OP_CAST(__vec64_i32, uint32_t, __lshr, >>) +BINARY_OP_CAST(__vec64_i32, int32_t, __ashr, >>) + +SHIFT_UNIFORM(__vec64_i32, uint32_t, __lshr, >>) +SHIFT_UNIFORM(__vec64_i32, int32_t, __ashr, >>) +SHIFT_UNIFORM(__vec64_i32, int32_t, __shl, <<) + +CMP_OP(__vec64_i32, int32_t, __equal, ==) +CMP_OP(__vec64_i32, int32_t, __not_equal, !=) +CMP_OP(__vec64_i32, uint32_t, __unsigned_less_equal, <=) +CMP_OP(__vec64_i32, int32_t, __signed_less_equal, <=) +CMP_OP(__vec64_i32, uint32_t, __unsigned_greater_equal, >=) +CMP_OP(__vec64_i32, int32_t, __signed_greater_equal, >=) +CMP_OP(__vec64_i32, uint32_t, __unsigned_less_than, <) +CMP_OP(__vec64_i32, int32_t, __signed_less_than, <) +CMP_OP(__vec64_i32, uint32_t, __unsigned_greater_than, >) +CMP_OP(__vec64_i32, int32_t, __signed_greater_than, >) + +SELECT(__vec64_i32) +INSERT_EXTRACT(__vec64_i32, int32_t) +SMEAR(__vec64_i32, i32, int32_t) +BROADCAST(__vec64_i32, i32, int32_t) +ROTATE(__vec64_i32, i32, int32_t) +SHUFFLES(__vec64_i32, i32, int32_t) +LOAD_STORE(__vec64_i32, int32_t) + +/////////////////////////////////////////////////////////////////////////// +// int64 + +BINARY_OP(__vec64_i64, __add, +) +BINARY_OP(__vec64_i64, __sub, -) +BINARY_OP(__vec64_i64, __mul, *) + +BINARY_OP(__vec64_i64, __or, |) +BINARY_OP(__vec64_i64, __and, &) +BINARY_OP(__vec64_i64, __xor, ^) +BINARY_OP(__vec64_i64, __shl, <<) + +BINARY_OP_CAST(__vec64_i64, uint64_t, __udiv, /) +BINARY_OP_CAST(__vec64_i64, int64_t, __sdiv, /) + +BINARY_OP_CAST(__vec64_i64, uint64_t, __urem, %) +BINARY_OP_CAST(__vec64_i64, int64_t, __srem, %) +BINARY_OP_CAST(__vec64_i64, uint64_t, __lshr, >>) +BINARY_OP_CAST(__vec64_i64, int64_t, __ashr, >>) + +SHIFT_UNIFORM(__vec64_i64, uint64_t, __lshr, >>) +SHIFT_UNIFORM(__vec64_i64, int64_t, __ashr, >>) +SHIFT_UNIFORM(__vec64_i64, int64_t, __shl, <<) + +CMP_OP(__vec64_i64, int64_t, __equal, ==) +CMP_OP(__vec64_i64, int64_t, __not_equal, !=) +CMP_OP(__vec64_i64, uint64_t, __unsigned_less_equal, <=) +CMP_OP(__vec64_i64, int64_t, __signed_less_equal, <=) +CMP_OP(__vec64_i64, uint64_t, __unsigned_greater_equal, >=) +CMP_OP(__vec64_i64, int64_t, __signed_greater_equal, >=) +CMP_OP(__vec64_i64, uint64_t, __unsigned_less_than, <) +CMP_OP(__vec64_i64, int64_t, __signed_less_than, <) +CMP_OP(__vec64_i64, uint64_t, __unsigned_greater_than, >) +CMP_OP(__vec64_i64, int64_t, __signed_greater_than, >) + +SELECT(__vec64_i64) +INSERT_EXTRACT(__vec64_i64, int64_t) +SMEAR(__vec64_i64, i64, int64_t) +BROADCAST(__vec64_i64, i64, int64_t) +ROTATE(__vec64_i64, i64, int64_t) +SHUFFLES(__vec64_i64, i64, int64_t) +LOAD_STORE(__vec64_i64, int64_t) + +/////////////////////////////////////////////////////////////////////////// +// float + +BINARY_OP(__vec64_f, __add, +) +BINARY_OP(__vec64_f, __sub, -) +BINARY_OP(__vec64_f, __mul, *) +BINARY_OP(__vec64_f, __div, /) + +CMP_OP(__vec64_f, float, __equal, ==) +CMP_OP(__vec64_f, float, __not_equal, !=) +CMP_OP(__vec64_f, float, __less_than, <) +CMP_OP(__vec64_f, float, __less_equal, <=) +CMP_OP(__vec64_f, float, __greater_than, >) +CMP_OP(__vec64_f, float, __greater_equal, >=) + +static FORCEINLINE __vec64_i1 __ordered(__vec64_f a, __vec64_f b) { + __vec64_i1 ret; + ret.v = 0; + for (int i = 0; i < 64; ++i) + ret.v |= ((a.v[i] == a.v[i]) && (b.v[i] == b.v[i])) ? (1ull << i) : 0; + return ret; +} + +#if 0 + case Instruction::FRem: intrinsic = "__frem"; break; +#endif + +SELECT(__vec64_f) +INSERT_EXTRACT(__vec64_f, float) +SMEAR(__vec64_f, float, float) +BROADCAST(__vec64_f, float, float) +ROTATE(__vec64_f, float, float) +SHUFFLES(__vec64_f, float, float) +LOAD_STORE(__vec64_f, float) + +static FORCEINLINE float __exp_uniform_float(float v) { + return expf(v); +} + +static FORCEINLINE __vec64_f __exp_varying_float(__vec64_f v) { + __vec64_f ret; + for (int i = 0; i < 64; ++i) + ret.v[i] = expf(v.v[i]); + return ret; +} + +static FORCEINLINE float __log_uniform_float(float v) { + return logf(v); +} + +static FORCEINLINE __vec64_f __log_varying_float(__vec64_f v) { + __vec64_f ret; + for (int i = 0; i < 64; ++i) + ret.v[i] = logf(v.v[i]); + return ret; +} + +static FORCEINLINE float __pow_uniform_float(float a, float b) { + return powf(a, b); +} + +static FORCEINLINE __vec64_f __pow_varying_float(__vec64_f a, __vec64_f b) { + __vec64_f ret; + for (int i = 0; i < 64; ++i) + ret.v[i] = powf(a.v[i], b.v[i]); + return ret; +} + +static FORCEINLINE int __intbits(float v) { + union { + float f; + int i; + } u; + u.f = v; + return u.i; +} + +static FORCEINLINE float __floatbits(int v) { + union { + float f; + int i; + } u; + u.i = v; + return u.f; +} + +static FORCEINLINE float __half_to_float_uniform(int16_t h) { + static const uint32_t shifted_exp = 0x7c00 << 13; // exponent mask after shift + + int32_t o = ((int32_t)(h & 0x7fff)) << 13; // exponent/mantissa bits + uint32_t exp = shifted_exp & o; // just the exponent + o += (127 - 15) << 23; // exponent adjust + + // handle exponent special cases + if (exp == shifted_exp) // Inf/NaN? + o += (128 - 16) << 23; // extra exp adjust + else if (exp == 0) { // Zero/Denormal? + o += 1 << 23; // extra exp adjust + o = __intbits(__floatbits(o) - __floatbits(113 << 23)); // renormalize + } + + o |= ((int32_t)(h & 0x8000)) << 16; // sign bit + return __floatbits(o); +} + + +static FORCEINLINE __vec64_f __half_to_float_varying(__vec64_i16 v) { + __vec64_f ret; + for (int i = 0; i < 64; ++i) + ret.v[i] = __half_to_float_uniform(v.v[i]); + return ret; +} + + +static FORCEINLINE int16_t __float_to_half_uniform(float f) { + uint32_t sign_mask = 0x80000000u; + int32_t o; + + int32_t fint = __intbits(f); + int32_t sign = fint & sign_mask; + fint ^= sign; + + int32_t f32infty = 255 << 23; + o = (fint > f32infty) ? 0x7e00 : 0x7c00; + + // (De)normalized number or zero + // update fint unconditionally to save the blending; we don't need it + // anymore for the Inf/NaN case anyway. + const uint32_t round_mask = ~0xfffu; + const int32_t magic = 15 << 23; + const int32_t f16infty = 31 << 23; + + int32_t fint2 = __intbits(__floatbits(fint & round_mask) * __floatbits(magic)) - round_mask; + fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed + + if (fint < f32infty) + o = fint2 >> 13; // Take the bits! + + return (o | (sign >> 16)); +} + + +static FORCEINLINE __vec64_i16 __float_to_half_varying(__vec64_f v) { + __vec64_i16 ret; + for (int i = 0; i < 64; ++i) + ret.v[i] = __float_to_half_uniform(v.v[i]); + return ret; +} + + +/////////////////////////////////////////////////////////////////////////// +// double + +BINARY_OP(__vec64_d, __add, +) +BINARY_OP(__vec64_d, __sub, -) +BINARY_OP(__vec64_d, __mul, *) +BINARY_OP(__vec64_d, __div, /) + +CMP_OP(__vec64_d, double, __equal, ==) +CMP_OP(__vec64_d, double, __not_equal, !=) +CMP_OP(__vec64_d, double, __less_than, <) +CMP_OP(__vec64_d, double, __less_equal, <=) +CMP_OP(__vec64_d, double, __greater_than, >) +CMP_OP(__vec64_d, double, __greater_equal, >=) + +static FORCEINLINE __vec64_i1 __ordered(__vec64_d a, __vec64_d b) { + __vec64_i1 ret; + ret.v = 0; + for (int i = 0; i < 64; ++i) + ret.v |= ((a.v[i] == a.v[i]) && (b.v[i] == b.v[i])) ? (1ull << i) : 0; + return ret; +} + +#if 0 + case Instruction::FRem: intrinsic = "__frem"; break; +#endif + +SELECT(__vec64_d) +INSERT_EXTRACT(__vec64_d, double) +SMEAR(__vec64_d, double, double) +BROADCAST(__vec64_d, double, double) +ROTATE(__vec64_d, double, double) +SHUFFLES(__vec64_d, double, double) +LOAD_STORE(__vec64_d, double) + +/////////////////////////////////////////////////////////////////////////// +// casts + + +#define CAST(TO, STO, FROM, SFROM, FUNC) \ +static FORCEINLINE TO FUNC(TO, FROM val) { \ + TO ret; \ + for (int i = 0; i < 64; ++i) \ + ret.v[i] = (STO)((SFROM)(val.v[i])); \ + return ret; \ +} + +// sign extension conversions +CAST(__vec64_i64, int64_t, __vec64_i32, int32_t, __cast_sext) +CAST(__vec64_i64, int64_t, __vec64_i16, int16_t, __cast_sext) +CAST(__vec64_i64, int64_t, __vec64_i8, int8_t, __cast_sext) +CAST(__vec64_i32, int32_t, __vec64_i16, int16_t, __cast_sext) +CAST(__vec64_i32, int32_t, __vec64_i8, int8_t, __cast_sext) +CAST(__vec64_i16, int16_t, __vec64_i8, int8_t, __cast_sext) + +#define CAST_SEXT_I1(TYPE) \ +static FORCEINLINE TYPE __cast_sext(TYPE, __vec64_i1 v) { \ + TYPE ret; \ + for (int i = 0; i < 64; ++i) { \ + ret.v[i] = 0; \ + if (v.v & (1ull << i)) \ + ret.v[i] = ~ret.v[i]; \ + } \ + return ret; \ +} + +CAST_SEXT_I1(__vec64_i8) +CAST_SEXT_I1(__vec64_i16) +CAST_SEXT_I1(__vec64_i32) +CAST_SEXT_I1(__vec64_i64) + +// zero extension +CAST(__vec64_i64, uint64_t, __vec64_i32, uint32_t, __cast_zext) +CAST(__vec64_i64, uint64_t, __vec64_i16, uint16_t, __cast_zext) +CAST(__vec64_i64, uint64_t, __vec64_i8, uint8_t, __cast_zext) +CAST(__vec64_i32, uint32_t, __vec64_i16, uint16_t, __cast_zext) +CAST(__vec64_i32, uint32_t, __vec64_i8, uint8_t, __cast_zext) +CAST(__vec64_i16, uint16_t, __vec64_i8, uint8_t, __cast_zext) + +#define CAST_ZEXT_I1(TYPE) \ +static FORCEINLINE TYPE __cast_zext(TYPE, __vec64_i1 v) { \ + TYPE ret; \ + for (int i = 0; i < 64; ++i) \ + ret.v[i] = (v.v & (1 << i)) ? 1 : 0; \ + return ret; \ +} + +CAST_ZEXT_I1(__vec64_i8) +CAST_ZEXT_I1(__vec64_i16) +CAST_ZEXT_I1(__vec64_i32) +CAST_ZEXT_I1(__vec64_i64) + +// truncations +CAST(__vec64_i32, int32_t, __vec64_i64, int64_t, __cast_trunc) +CAST(__vec64_i16, int16_t, __vec64_i64, int64_t, __cast_trunc) +CAST(__vec64_i8, int8_t, __vec64_i64, int64_t, __cast_trunc) +CAST(__vec64_i16, int16_t, __vec64_i32, int32_t, __cast_trunc) +CAST(__vec64_i8, int8_t, __vec64_i32, int32_t, __cast_trunc) +CAST(__vec64_i8, int8_t, __vec64_i16, int16_t, __cast_trunc) + +// signed int to float/double +CAST(__vec64_f, float, __vec64_i8, int8_t, __cast_sitofp) +CAST(__vec64_f, float, __vec64_i16, int16_t, __cast_sitofp) +CAST(__vec64_f, float, __vec64_i32, int32_t, __cast_sitofp) +CAST(__vec64_f, float, __vec64_i64, int64_t, __cast_sitofp) +CAST(__vec64_d, double, __vec64_i8, int8_t, __cast_sitofp) +CAST(__vec64_d, double, __vec64_i16, int16_t, __cast_sitofp) +CAST(__vec64_d, double, __vec64_i32, int32_t, __cast_sitofp) +CAST(__vec64_d, double, __vec64_i64, int64_t, __cast_sitofp) + +// unsigned int to float/double +CAST(__vec64_f, float, __vec64_i8, uint8_t, __cast_uitofp) +CAST(__vec64_f, float, __vec64_i16, uint16_t, __cast_uitofp) +CAST(__vec64_f, float, __vec64_i32, uint32_t, __cast_uitofp) +CAST(__vec64_f, float, __vec64_i64, uint64_t, __cast_uitofp) +CAST(__vec64_d, double, __vec64_i8, uint8_t, __cast_uitofp) +CAST(__vec64_d, double, __vec64_i16, uint16_t, __cast_uitofp) +CAST(__vec64_d, double, __vec64_i32, uint32_t, __cast_uitofp) +CAST(__vec64_d, double, __vec64_i64, uint64_t, __cast_uitofp) + +static FORCEINLINE __vec64_f __cast_uitofp(__vec64_f, __vec64_i1 v) { + __vec64_f ret; + for (int i = 0; i < 64; ++i) + ret.v[i] = (v.v & (1 << i)) ? 1. : 0.; + return ret; +} + +// float/double to signed int +CAST(__vec64_i8, int8_t, __vec64_f, float, __cast_fptosi) +CAST(__vec64_i16, int16_t, __vec64_f, float, __cast_fptosi) +CAST(__vec64_i32, int32_t, __vec64_f, float, __cast_fptosi) +CAST(__vec64_i64, int64_t, __vec64_f, float, __cast_fptosi) +CAST(__vec64_i8, int8_t, __vec64_d, double, __cast_fptosi) +CAST(__vec64_i16, int16_t, __vec64_d, double, __cast_fptosi) +CAST(__vec64_i32, int32_t, __vec64_d, double, __cast_fptosi) +CAST(__vec64_i64, int64_t, __vec64_d, double, __cast_fptosi) + +// float/double to unsigned int +CAST(__vec64_i8, uint8_t, __vec64_f, float, __cast_fptoui) +CAST(__vec64_i16, uint16_t, __vec64_f, float, __cast_fptoui) +CAST(__vec64_i32, uint32_t, __vec64_f, float, __cast_fptoui) +CAST(__vec64_i64, uint64_t, __vec64_f, float, __cast_fptoui) +CAST(__vec64_i8, uint8_t, __vec64_d, double, __cast_fptoui) +CAST(__vec64_i16, uint16_t, __vec64_d, double, __cast_fptoui) +CAST(__vec64_i32, uint32_t, __vec64_d, double, __cast_fptoui) +CAST(__vec64_i64, uint64_t, __vec64_d, double, __cast_fptoui) + +// float/double conversions +CAST(__vec64_f, float, __vec64_d, double, __cast_fptrunc) +CAST(__vec64_d, double, __vec64_f, float, __cast_fpext) + +typedef union { + int32_t i32; + float f; + int64_t i64; + double d; +} BitcastUnion; + +#define CAST_BITS(TO, TO_ELT, FROM, FROM_ELT) \ +static FORCEINLINE TO __cast_bits(TO, FROM val) { \ + TO r; \ + for (int i = 0; i < 64; ++i) { \ + BitcastUnion u; \ + u.FROM_ELT = val.v[i]; \ + r.v[i] = u.TO_ELT; \ + } \ + return r; \ +} + +CAST_BITS(__vec64_f, f, __vec64_i32, i32) +CAST_BITS(__vec64_i32, i32, __vec64_f, f) +CAST_BITS(__vec64_d, d, __vec64_i64, i64) +CAST_BITS(__vec64_i64, i64, __vec64_d, d) + +#define CAST_BITS_SCALAR(TO, FROM) \ +static FORCEINLINE TO __cast_bits(TO, FROM v) { \ + union { \ + TO to; \ + FROM from; \ + } u; \ + u.from = v; \ + return u.to; \ +} + +CAST_BITS_SCALAR(uint32_t, float) +CAST_BITS_SCALAR(int32_t, float) +CAST_BITS_SCALAR(float, uint32_t) +CAST_BITS_SCALAR(float, int32_t) +CAST_BITS_SCALAR(uint64_t, double) +CAST_BITS_SCALAR(int64_t, double) +CAST_BITS_SCALAR(double, uint64_t) +CAST_BITS_SCALAR(double, int64_t) + +/////////////////////////////////////////////////////////////////////////// +// various math functions + +static FORCEINLINE void __fastmath() { +} + +static FORCEINLINE float __round_uniform_float(float v) { + return roundf(v); +} + +static FORCEINLINE float __floor_uniform_float(float v) { + return floorf(v); +} + +static FORCEINLINE float __ceil_uniform_float(float v) { + return ceilf(v); +} + +static FORCEINLINE double __round_uniform_double(double v) { + return round(v); +} + +static FORCEINLINE double __floor_uniform_double(double v) { + return floor(v); +} + +static FORCEINLINE double __ceil_uniform_double(double v) { + return ceil(v); +} + +UNARY_OP(__vec64_f, __round_varying_float, roundf) +UNARY_OP(__vec64_f, __floor_varying_float, floorf) +UNARY_OP(__vec64_f, __ceil_varying_float, ceilf) +UNARY_OP(__vec64_d, __round_varying_double, round) +UNARY_OP(__vec64_d, __floor_varying_double, floor) +UNARY_OP(__vec64_d, __ceil_varying_double, ceil) + +// min/max + +static FORCEINLINE float __min_uniform_float(float a, float b) { return (ab) ? a : b; } +static FORCEINLINE double __min_uniform_double(double a, double b) { return (ab) ? a : b; } + +static FORCEINLINE int32_t __min_uniform_int32(int32_t a, int32_t b) { return (ab) ? a : b; } +static FORCEINLINE int32_t __min_uniform_uint32(uint32_t a, uint32_t b) { return (ab) ? a : b; } + +static FORCEINLINE int64_t __min_uniform_int64(int64_t a, int64_t b) { return (ab) ? a : b; } +static FORCEINLINE int64_t __min_uniform_uint64(uint64_t a, uint64_t b) { return (ab) ? a : b; } + + +BINARY_OP_FUNC(__vec64_f, __max_varying_float, __max_uniform_float) +BINARY_OP_FUNC(__vec64_f, __min_varying_float, __min_uniform_float) +BINARY_OP_FUNC(__vec64_d, __max_varying_double, __max_uniform_double) +BINARY_OP_FUNC(__vec64_d, __min_varying_double, __min_uniform_double) + +BINARY_OP_FUNC(__vec64_i32, __max_varying_int32, __max_uniform_int32) +BINARY_OP_FUNC(__vec64_i32, __min_varying_int32, __min_uniform_int32) +BINARY_OP_FUNC(__vec64_i32, __max_varying_uint32, __max_uniform_uint32) +BINARY_OP_FUNC(__vec64_i32, __min_varying_uint32, __min_uniform_uint32) + +BINARY_OP_FUNC(__vec64_i64, __max_varying_int64, __max_uniform_int64) +BINARY_OP_FUNC(__vec64_i64, __min_varying_int64, __min_uniform_int64) +BINARY_OP_FUNC(__vec64_i64, __max_varying_uint64, __max_uniform_uint64) +BINARY_OP_FUNC(__vec64_i64, __min_varying_uint64, __min_uniform_uint64) + +// sqrt/rsqrt/rcp + +static FORCEINLINE float __rsqrt_uniform_float(float v) { + return 1.f / sqrtf(v); +} + +static FORCEINLINE float __rcp_uniform_float(float v) { + return 1.f / v; +} + +static FORCEINLINE float __sqrt_uniform_float(float v) { + return sqrtf(v); +} + +static FORCEINLINE double __sqrt_uniform_double(double v) { + return sqrt(v); +} + +UNARY_OP(__vec64_f, __rcp_varying_float, __rcp_uniform_float) +UNARY_OP(__vec64_f, __rsqrt_varying_float, __rsqrt_uniform_float) +UNARY_OP(__vec64_f, __sqrt_varying_float, __sqrt_uniform_float) +UNARY_OP(__vec64_d, __sqrt_varying_double, __sqrt_uniform_double) + +/////////////////////////////////////////////////////////////////////////// +// bit ops + +static FORCEINLINE int32_t __popcnt_int32(uint32_t v) { + int count = 0; + for (; v != 0; v >>= 1) + count += (v & 1); + return count; +} + +static FORCEINLINE int32_t __popcnt_int64(uint64_t v) { + int count = 0; + for (; v != 0; v >>= 1) + count += (v & 1); + return count; +} + +static FORCEINLINE int32_t __count_trailing_zeros_i32(uint32_t v) { + if (v == 0) + return 32; + + int count = 0; + while ((v & 1) == 0) { + ++count; + v >>= 1; + } + return count; +} + +static FORCEINLINE int64_t __count_trailing_zeros_i64(uint64_t v) { + if (v == 0) + return 64; + + int count = 0; + while ((v & 1) == 0) { + ++count; + v >>= 1; + } + return count; +} + +static FORCEINLINE int32_t __count_leading_zeros_i32(uint32_t v) { + if (v == 0) + return 32; + + int count = 0; + while ((v & (1<<31)) == 0) { + ++count; + v <<= 1; + } + return count; +} + +static FORCEINLINE int64_t __count_leading_zeros_i64(uint64_t v) { + if (v == 0) + return 64; + + int count = 0; + while ((v & (1ull<<63)) == 0) { + ++count; + v <<= 1; + } + return count; +} + +/////////////////////////////////////////////////////////////////////////// +// reductions + +REDUCE_ADD(float, __vec64_f, __reduce_add_float) +REDUCE_MINMAX(float, __vec64_f, __reduce_min_float, <) +REDUCE_MINMAX(float, __vec64_f, __reduce_max_float, >) + +REDUCE_ADD(double, __vec64_d, __reduce_add_double) +REDUCE_MINMAX(double, __vec64_d, __reduce_min_double, <) +REDUCE_MINMAX(double, __vec64_d, __reduce_max_double, >) + +REDUCE_ADD(uint32_t, __vec64_i32, __reduce_add_int32) +REDUCE_MINMAX(int32_t, __vec64_i32, __reduce_min_int32, <) +REDUCE_MINMAX(int32_t, __vec64_i32, __reduce_max_int32, >) + +REDUCE_ADD(uint32_t, __vec64_i32, __reduce_add_uint32) +REDUCE_MINMAX(uint32_t, __vec64_i32, __reduce_min_uint32, <) +REDUCE_MINMAX(uint32_t, __vec64_i32, __reduce_max_uint32, >) + +REDUCE_ADD(uint64_t, __vec64_i64, __reduce_add_int64) +REDUCE_MINMAX(int64_t, __vec64_i64, __reduce_min_int64, <) +REDUCE_MINMAX(int64_t, __vec64_i64, __reduce_max_int64, >) + +REDUCE_ADD(uint64_t, __vec64_i64, __reduce_add_uint64) +REDUCE_MINMAX(uint64_t, __vec64_i64, __reduce_min_uint64, <) +REDUCE_MINMAX(uint64_t, __vec64_i64, __reduce_max_uint64, >) + +/////////////////////////////////////////////////////////////////////////// +// masked load/store + +static FORCEINLINE __vec64_i8 __masked_load_8(void *p, + __vec64_i1 mask) { + __vec64_i8 ret; + int8_t *ptr = (int8_t *)p; + for (int i = 0; i < 64; ++i) + if ((mask.v & (1ull << i)) != 0) + ret.v[i] = ptr[i]; + return ret; +} + +static FORCEINLINE __vec64_i16 __masked_load_16(void *p, + __vec64_i1 mask) { + __vec64_i16 ret; + int16_t *ptr = (int16_t *)p; + for (int i = 0; i < 64; ++i) + if ((mask.v & (1ull << i)) != 0) + ret.v[i] = ptr[i]; + return ret; +} + +static FORCEINLINE __vec64_i32 __masked_load_32(void *p, + __vec64_i1 mask) { + __vec64_i32 ret; + int32_t *ptr = (int32_t *)p; + for (int i = 0; i < 64; ++i) + if ((mask.v & (1ull << i)) != 0) + ret.v[i] = ptr[i]; + return ret; +} + +static FORCEINLINE __vec64_i64 __masked_load_64(void *p, + __vec64_i1 mask) { + __vec64_i64 ret; + int64_t *ptr = (int64_t *)p; + for (int i = 0; i < 64; ++i) + if ((mask.v & (1ull << i)) != 0) + ret.v[i] = ptr[i]; + return ret; +} + +static FORCEINLINE void __masked_store_8(void *p, __vec64_i8 val, + __vec64_i1 mask) { + int8_t *ptr = (int8_t *)p; + for (int i = 0; i < 64; ++i) + if ((mask.v & (1ull << i)) != 0) + ptr[i] = val.v[i]; +} + +static FORCEINLINE void __masked_store_16(void *p, __vec64_i16 val, + __vec64_i1 mask) { + int16_t *ptr = (int16_t *)p; + for (int i = 0; i < 64; ++i) + if ((mask.v & (1ull << i)) != 0) + ptr[i] = val.v[i]; +} + +static FORCEINLINE void __masked_store_32(void *p, __vec64_i32 val, + __vec64_i1 mask) { + int32_t *ptr = (int32_t *)p; + for (int i = 0; i < 64; ++i) + if ((mask.v & (1ull << i)) != 0) + ptr[i] = val.v[i]; +} + +static FORCEINLINE void __masked_store_64(void *p, __vec64_i64 val, + __vec64_i1 mask) { + int64_t *ptr = (int64_t *)p; + for (int i = 0; i < 64; ++i) + if ((mask.v & (1ull << i)) != 0) + ptr[i] = val.v[i]; +} + +static FORCEINLINE void __masked_store_blend_8(void *p, __vec64_i8 val, + __vec64_i1 mask) { + __masked_store_8(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_16(void *p, __vec64_i16 val, + __vec64_i1 mask) { + __masked_store_16(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_32(void *p, __vec64_i32 val, + __vec64_i1 mask) { + __masked_store_32(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_64(void *p, __vec64_i64 val, + __vec64_i1 mask) { + __masked_store_64(p, val, mask); +} + +/////////////////////////////////////////////////////////////////////////// +// gather/scatter + +// offsets * offsetScale is in bytes (for all of these) + +#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \ +static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \ + uint32_t scale, OTYPE constOffset, \ + __vec64_i1 mask) { \ + VTYPE ret; \ + int8_t *base = (int8_t *)b; \ + for (int i = 0; i < 64; ++i) \ + if ((mask.v & (1ull << i)) != 0) { \ + STYPE *ptr = (STYPE *)(base + scale * varyingOffset.v[i] + \ + constOffset.v[i]); \ + ret.v[i] = *ptr; \ + } \ + return ret; \ +} + + +GATHER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i32, __gather_base_offsets32_i8) +GATHER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i64, __gather_base_offsets64_i8) +GATHER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i32, __gather_base_offsets32_i16) +GATHER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i64, __gather_base_offsets64_i16) +GATHER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i32, __gather_base_offsets32_i32) +GATHER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i64, __gather_base_offsets64_i32) +GATHER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i32, __gather_base_offsets32_i64) +GATHER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i64, __gather_base_offsets64_i64) + +#define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \ +static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec64_i1 mask) { \ + VTYPE ret; \ + for (int i = 0; i < 64; ++i) \ + if ((mask.v & (1ull << i)) != 0) { \ + STYPE *ptr = (STYPE *)ptrs.v[i]; \ + ret.v[i] = *ptr; \ + } \ + return ret; \ +} + +GATHER_GENERAL(__vec64_i8, int8_t, __vec64_i32, __gather32_i8) +GATHER_GENERAL(__vec64_i8, int8_t, __vec64_i64, __gather64_i8) +GATHER_GENERAL(__vec64_i16, int16_t, __vec64_i32, __gather32_i16) +GATHER_GENERAL(__vec64_i16, int16_t, __vec64_i64, __gather64_i16) +GATHER_GENERAL(__vec64_i32, int32_t, __vec64_i32, __gather32_i32) +GATHER_GENERAL(__vec64_i32, int32_t, __vec64_i64, __gather64_i32) +GATHER_GENERAL(__vec64_i64, int64_t, __vec64_i32, __gather32_i64) +GATHER_GENERAL(__vec64_i64, int64_t, __vec64_i64, __gather64_i64) + +// scatter + +#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \ +static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \ + uint32_t scale, OTYPE constOffset, \ + VTYPE val, __vec64_i1 mask) { \ + int8_t *base = (int8_t *)b; \ + for (int i = 0; i < 64; ++i) \ + if ((mask.v & (1ull << i)) != 0) { \ + STYPE *ptr = (STYPE *)(base + scale * varyingOffset.v[i] + \ + constOffset.v[i]); \ + *ptr = val.v[i]; \ + } \ +} + + +SCATTER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i32, __scatter_base_offsets32_i8) +SCATTER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i64, __scatter_base_offsets64_i8) +SCATTER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i32, __scatter_base_offsets32_i16) +SCATTER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i64, __scatter_base_offsets64_i16) +SCATTER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i32, __scatter_base_offsets32_i32) +SCATTER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i64, __scatter_base_offsets64_i32) +SCATTER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i32, __scatter_base_offsets32_i64) +SCATTER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i64, __scatter_base_offsets64_i64) + +#define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \ +static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec64_i1 mask) { \ + VTYPE ret; \ + for (int i = 0; i < 64; ++i) \ + if ((mask.v & (1ull << i)) != 0) { \ + STYPE *ptr = (STYPE *)ptrs.v[i]; \ + *ptr = val.v[i]; \ + } \ +} + +SCATTER_GENERAL(__vec64_i8, int8_t, __vec64_i32, __scatter32_i8) +SCATTER_GENERAL(__vec64_i8, int8_t, __vec64_i64, __scatter64_i8) +SCATTER_GENERAL(__vec64_i16, int16_t, __vec64_i32, __scatter32_i16) +SCATTER_GENERAL(__vec64_i16, int16_t, __vec64_i64, __scatter64_i16) +SCATTER_GENERAL(__vec64_i32, int32_t, __vec64_i32, __scatter32_i32) +SCATTER_GENERAL(__vec64_i32, int32_t, __vec64_i64, __scatter64_i32) +SCATTER_GENERAL(__vec64_i64, int64_t, __vec64_i32, __scatter32_i64) +SCATTER_GENERAL(__vec64_i64, int64_t, __vec64_i64, __scatter64_i64) + +/////////////////////////////////////////////////////////////////////////// +// packed load/store + +static FORCEINLINE int32_t __packed_load_active(int32_t *ptr, __vec64_i32 *val, + __vec64_i1 mask) { + int count = 0; + for (int i = 0; i < 64; ++i) { + if ((mask.v & (1ull << i)) != 0) { + val->v[i] = *ptr++; + ++count; + } + } + return count; +} + + +static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec64_i32 val, + __vec64_i1 mask) { + int count = 0; + for (int i = 0; i < 64; ++i) { + if ((mask.v & (1ull << i)) != 0) { + *ptr++ = val.v[i]; + ++count; + } + } + return count; +} + +static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, + __vec64_i32 *val, + __vec64_i1 mask) { + int count = 0; + for (int i = 0; i < 64; ++i) { + if ((mask.v & (1ull << i)) != 0) { + val->v[i] = *ptr++; + ++count; + } + } + return count; +} + + +static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr, + __vec64_i32 val, + __vec64_i1 mask) { + int count = 0; + for (int i = 0; i < 64; ++i) { + if ((mask.v & (1ull << i)) != 0) { + *ptr++ = val.v[i]; + ++count; + } + } + return count; +} + + +/////////////////////////////////////////////////////////////////////////// +// aos/soa + +static FORCEINLINE void __soa_to_aos3_float(__vec64_f v0, __vec64_f v1, __vec64_f v2, + float *ptr) { + for (int i = 0; i < 64; ++i) { + *ptr++ = __extract_element(v0, i); + *ptr++ = __extract_element(v1, i); + *ptr++ = __extract_element(v2, i); + } +} + +static FORCEINLINE void __aos_to_soa3_float(float *ptr, __vec64_f *out0, __vec64_f *out1, + __vec64_f *out2) { + for (int i = 0; i < 64; ++i) { + __insert_element(out0, i, *ptr++); + __insert_element(out1, i, *ptr++); + __insert_element(out2, i, *ptr++); + } +} + +static FORCEINLINE void __soa_to_aos4_float(__vec64_f v0, __vec64_f v1, __vec64_f v2, + __vec64_f v3, float *ptr) { + for (int i = 0; i < 64; ++i) { + *ptr++ = __extract_element(v0, i); + *ptr++ = __extract_element(v1, i); + *ptr++ = __extract_element(v2, i); + *ptr++ = __extract_element(v3, i); + } +} + +static FORCEINLINE void __aos_to_soa4_float(float *ptr, __vec64_f *out0, __vec64_f *out1, + __vec64_f *out2, __vec64_f *out3) { + for (int i = 0; i < 64; ++i) { + __insert_element(out0, i, *ptr++); + __insert_element(out1, i, *ptr++); + __insert_element(out2, i, *ptr++); + __insert_element(out3, i, *ptr++); + } +} + +/////////////////////////////////////////////////////////////////////////// +// prefetch + +static FORCEINLINE void __prefetch_read_uniform_1(unsigned char *) { +} + +static FORCEINLINE void __prefetch_read_uniform_2(unsigned char *) { +} + +static FORCEINLINE void __prefetch_read_uniform_3(unsigned char *) { +} + +static FORCEINLINE void __prefetch_read_uniform_nt(unsigned char *) { +} + +/////////////////////////////////////////////////////////////////////////// +// atomics + +static FORCEINLINE uint32_t __atomic_add(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedAdd((LONG volatile *)p, v) - v; +#else + return __sync_fetch_and_add(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_sub(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedAdd((LONG volatile *)p, -v) + v; +#else + return __sync_fetch_and_sub(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_and(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedAnd((LONG volatile *)p, v); +#else + return __sync_fetch_and_and(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_or(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedOr((LONG volatile *)p, v); +#else + return __sync_fetch_and_or(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_xor(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedXor((LONG volatile *)p, v); +#else + return __sync_fetch_and_xor(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_min(uint32_t *p, uint32_t v) { + int32_t old, min; + do { + old = *((volatile int32_t *)p); + min = (old < (int32_t)v) ? old : (int32_t)v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange((LONG volatile *)p, min, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, min) == false); +#endif + return old; +} + +static FORCEINLINE uint32_t __atomic_max(uint32_t *p, uint32_t v) { + int32_t old, max; + do { + old = *((volatile int32_t *)p); + max = (old > (int32_t)v) ? old : (int32_t)v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange((LONG volatile *)p, max, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, max) == false); +#endif + return old; +} + +static FORCEINLINE uint32_t __atomic_umin(uint32_t *p, uint32_t v) { + uint32_t old, min; + do { + old = *((volatile uint32_t *)p); + min = (old < v) ? old : v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange((LONG volatile *)p, min, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, min) == false); +#endif + return old; +} + +static FORCEINLINE uint32_t __atomic_umax(uint32_t *p, uint32_t v) { + uint32_t old, max; + do { + old = *((volatile uint32_t *)p); + max = (old > v) ? old : v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange((LONG volatile *)p, max, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, max) == false); +#endif + return old; +} + +static FORCEINLINE uint32_t __atomic_xchg(uint32_t *p, uint32_t v) { +#ifdef _MSC_VER + return InterlockedExchange((LONG volatile *)p, v); +#else + return __sync_lock_test_and_set(p, v); +#endif +} + +static FORCEINLINE uint32_t __atomic_cmpxchg(uint32_t *p, uint32_t cmpval, + uint32_t newval) { +#ifdef _MSC_VER + return InterlockedCompareExchange((LONG volatile *)p, newval, cmpval); +#else + return __sync_val_compare_and_swap(p, cmpval, newval); +#endif +} + +static FORCEINLINE uint64_t __atomic_add(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedAdd64((LONGLONG volatile *)p, v) - v; +#else + return __sync_fetch_and_add(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_sub(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedAdd64((LONGLONG volatile *)p, -v) + v; +#else + return __sync_fetch_and_sub(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_and(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedAnd64((LONGLONG volatile *)p, v) - v; +#else + return __sync_fetch_and_and(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_or(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedOr64((LONGLONG volatile *)p, v) - v; +#else + return __sync_fetch_and_or(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_xor(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedXor64((LONGLONG volatile *)p, v) - v; +#else + return __sync_fetch_and_xor(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_min(uint64_t *p, uint64_t v) { + int64_t old, min; + do { + old = *((volatile int64_t *)p); + min = (old < (int64_t)v) ? old : (int64_t)v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange64((LONGLONG volatile *)p, min, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, min) == false); +#endif + return old; +} + +static FORCEINLINE uint64_t __atomic_max(uint64_t *p, uint64_t v) { + int64_t old, max; + do { + old = *((volatile int64_t *)p); + max = (old > (int64_t)v) ? old : (int64_t)v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange64((LONGLONG volatile *)p, max, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, max) == false); +#endif + return old; +} + +static FORCEINLINE uint64_t __atomic_umin(uint64_t *p, uint64_t v) { + uint64_t old, min; + do { + old = *((volatile uint64_t *)p); + min = (old < v) ? old : v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange64((LONGLONG volatile *)p, min, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, min) == false); +#endif + return old; +} + +static FORCEINLINE uint64_t __atomic_umax(uint64_t *p, uint64_t v) { + uint64_t old, max; + do { + old = *((volatile uint64_t *)p); + max = (old > v) ? old : v; +#ifdef _MSC_VER + } while (InterlockedCompareExchange64((LONGLONG volatile *)p, max, old) != old); +#else + } while (__sync_bool_compare_and_swap(p, old, max) == false); +#endif + return old; +} + +static FORCEINLINE uint64_t __atomic_xchg(uint64_t *p, uint64_t v) { +#ifdef _MSC_VER + return InterlockedExchange64((LONGLONG volatile *)p, v); +#else + return __sync_lock_test_and_set(p, v); +#endif +} + +static FORCEINLINE uint64_t __atomic_cmpxchg(uint64_t *p, uint64_t cmpval, + uint64_t newval) { +#ifdef _MSC_VER + return InterlockedCompareExchange64((LONGLONG volatile *)p, newval, cmpval); +#else + return __sync_val_compare_and_swap(p, cmpval, newval); +#endif +} From 21c43737fe59b42260a73bee72d5bf6343e1e067 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 25 May 2012 14:27:30 -0700 Subject: [PATCH 163/173] Fix bug in examples/intrinsics/generic-32.h --- examples/intrinsics/generic-32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intrinsics/generic-32.h b/examples/intrinsics/generic-32.h index d7d044ef..244453a1 100644 --- a/examples/intrinsics/generic-32.h +++ b/examples/intrinsics/generic-32.h @@ -336,7 +336,7 @@ static FORCEINLINE VTYPE __smear_##NAME(VTYPE retType, STYPE v) { \ static FORCEINLINE VTYPE __broadcast_##NAME(VTYPE v, int index) { \ VTYPE ret; \ for (int i = 0; i < 32; ++i) \ - ret.v[i] = v.v[index & 0xff]; \ + ret.v[i] = v.v[index & 31]; \ return ret; \ } \ From 51ade48e3d9f26cd96118dd39d45ab0b27a7838d Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 25 May 2012 14:47:06 -0700 Subject: [PATCH 164/173] Fix some of the reduce-* tests for 32 and 64-wide targets --- tests/reduce-add-double-1.ispc | 8 +++----- tests/reduce-add-double-2.ispc | 8 +++----- tests/reduce-add-float-1.ispc | 8 +++----- tests/reduce-add-float-2.ispc | 8 +++----- tests/reduce-add-int-1.ispc | 8 +++----- tests/reduce-add-int.ispc | 8 +++----- tests/reduce-add-int64-1.ispc | 8 +++----- tests/reduce-add-int64.ispc | 8 +++----- tests/reduce-add-uint-1.ispc | 8 +++----- tests/reduce-add-uint64-1.ispc | 8 +++----- 10 files changed, 30 insertions(+), 50 deletions(-) diff --git a/tests/reduce-add-double-1.ispc b/tests/reduce-add-double-1.ispc index 9ff50b2c..4d40509f 100644 --- a/tests/reduce-add-double-1.ispc +++ b/tests/reduce-add-double-1.ispc @@ -13,11 +13,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - uniform int x = -1234; - if (programCount == 1) x = 1; - else if (programCount == 4) x = 4; - else if (programCount == 8) x = 16; - else if (programCount == 16) x = 64; + uniform int x = 0; + for (uniform int i = 1; i <= programCount; i += 2) + x += i; RET[programIndex] = x; } diff --git a/tests/reduce-add-double-2.ispc b/tests/reduce-add-double-2.ispc index 1be0d10b..aed4099c 100644 --- a/tests/reduce-add-double-2.ispc +++ b/tests/reduce-add-double-2.ispc @@ -12,11 +12,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - uniform int x = -1234; - if (programCount == 1) x = 1; - else if (programCount == 4) x = 10; - else if (programCount == 8) x = 36; - else if (programCount == 16) x = 136; + uniform int x = 0; + for (uniform int i = 1; i <= programCount; ++i) + x += i; RET[programIndex] = x; } diff --git a/tests/reduce-add-float-1.ispc b/tests/reduce-add-float-1.ispc index dd373849..627e67d6 100644 --- a/tests/reduce-add-float-1.ispc +++ b/tests/reduce-add-float-1.ispc @@ -13,11 +13,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - uniform int x = -1234; - if (programCount == 1) x = 1; - else if (programCount == 4) x = 4; - else if (programCount == 8) x = 16; - else if (programCount == 16) x = 64; + uniform int x = 0; + for (uniform int i = 1; i <= programCount; i += 2) + x += i; RET[programIndex] = x; } diff --git a/tests/reduce-add-float-2.ispc b/tests/reduce-add-float-2.ispc index 53aa85aa..473b4bdd 100644 --- a/tests/reduce-add-float-2.ispc +++ b/tests/reduce-add-float-2.ispc @@ -12,11 +12,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - uniform int x = -1234; - if (programCount == 1) x = 1; - else if (programCount == 4) x = 10; - else if (programCount == 8) x = 36; - else if (programCount == 16) x = 136; + uniform int x = 0; + for (uniform int i = 1; i <= programCount; ++i) + x += i; RET[programIndex] = x; } diff --git a/tests/reduce-add-int-1.ispc b/tests/reduce-add-int-1.ispc index 9ac887c6..5351a81c 100644 --- a/tests/reduce-add-int-1.ispc +++ b/tests/reduce-add-int-1.ispc @@ -13,11 +13,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - uniform int x = -1234; - if (programCount == 1) x = 1; - else if (programCount == 4) x = 4; - else if (programCount == 8) x = 16; - else if (programCount == 16) x = 64; + uniform int x = 0; + for (uniform int i = 1; i <= programCount; i += 2) + x += i; RET[programIndex] = x; } diff --git a/tests/reduce-add-int.ispc b/tests/reduce-add-int.ispc index 01ff745c..d5478374 100644 --- a/tests/reduce-add-int.ispc +++ b/tests/reduce-add-int.ispc @@ -13,11 +13,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - uniform int x = -1234; - if (programCount == 1) x = 1; - else if (programCount == 4) x = 10; - else if (programCount == 8) x = 36; - else if (programCount == 16) x = 136; + uniform int x = 0; + for (uniform int i = 1; i <= programCount; ++i) + x += i; RET[programIndex] = x; } diff --git a/tests/reduce-add-int64-1.ispc b/tests/reduce-add-int64-1.ispc index cdc88bc3..e7df3b23 100644 --- a/tests/reduce-add-int64-1.ispc +++ b/tests/reduce-add-int64-1.ispc @@ -13,11 +13,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - uniform int x = -1234; - if (programCount == 1) x = 1; - else if (programCount == 4) x = 4; - else if (programCount == 8) x = 16; - else if (programCount == 16) x = 64; + uniform int x = 0; + for (uniform int i = 1; i <= programCount; i += 2) + x += i; RET[programIndex] = x; } diff --git a/tests/reduce-add-int64.ispc b/tests/reduce-add-int64.ispc index 894dddea..5c85cfef 100644 --- a/tests/reduce-add-int64.ispc +++ b/tests/reduce-add-int64.ispc @@ -12,11 +12,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - uniform int x = -1234; - if (programCount == 1) x = 1; - else if (programCount == 4) x = 10; - else if (programCount == 8) x = 36; - else if (programCount == 16) x = 136; + uniform int x = 0; + for (uniform int i = 1; i <= programCount; ++i) + x += i; RET[programIndex] = x; } diff --git a/tests/reduce-add-uint-1.ispc b/tests/reduce-add-uint-1.ispc index 291200a6..955fa11a 100644 --- a/tests/reduce-add-uint-1.ispc +++ b/tests/reduce-add-uint-1.ispc @@ -13,10 +13,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - uniform int x = -1234; - if (programCount == 1) x = 1; - else if (programCount == 4) x = 4; - else if (programCount == 8) x = 16; - else if (programCount == 16) x = 64; + uniform int x = 0; + for (uniform int i = 1; i <= programCount; i += 2) + x += i; RET[programIndex] = x; } diff --git a/tests/reduce-add-uint64-1.ispc b/tests/reduce-add-uint64-1.ispc index 5469a898..98ffd2c2 100644 --- a/tests/reduce-add-uint64-1.ispc +++ b/tests/reduce-add-uint64-1.ispc @@ -13,10 +13,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - uniform int x = -1234; - if (programCount == 1) x = 1; - else if (programCount == 4) x = 4; - else if (programCount == 8) x = 16; - else if (programCount == 16) x = 64; + uniform int x = 0; + for (uniform int i = 1; i <= programCount; i += 2) + x += i; RET[programIndex] = x; } From 1f6075506c249d22d43a27ae468a10e32777e88c Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 28 May 2012 19:45:16 -0700 Subject: [PATCH 165/173] Fix linux build (Jean-Luc Duprat) --- stmt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stmt.cpp b/stmt.cpp index 11cc94ea..6049d4e2 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -2843,7 +2843,7 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) { pos); // Compute the per lane mask to test the mask bits against: (1 << iter) - ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt64, 1ll, + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt64, int64_t(1), iterSym->pos); Expr *shiftLaneExpr = new BinaryExpr(BinaryExpr::Shl, oneExpr, symExpr, pos); From ece65cab18f45e6615153a4f6aa1e27925b331b2 Mon Sep 17 00:00:00 2001 From: Jean-Luc Duprat Date: Tue, 29 May 2012 07:51:48 -0700 Subject: [PATCH 166/173] Fix some tests for up to 64-wide gangs --- tests/test-103.ispc | 2 +- tests/test-125.ispc | 6 +++++- tests/test-60.ispc | 4 ++-- tests/test-64.ispc | 20 ++++++++++++++++---- tests/test-65.ispc | 20 ++++++++++++++++---- tests/test-66.ispc | 20 ++++++++++++++++---- 6 files changed, 56 insertions(+), 16 deletions(-) diff --git a/tests/test-103.ispc b/tests/test-103.ispc index 1c53213a..c536d032 100644 --- a/tests/test-103.ispc +++ b/tests/test-103.ispc @@ -4,7 +4,7 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; - RET[programIndex] = a < 17.; + RET[programIndex] = a < 65.; } diff --git a/tests/test-125.ispc b/tests/test-125.ispc index d92d671c..e4a12811 100644 --- a/tests/test-125.ispc +++ b/tests/test-125.ispc @@ -5,8 +5,12 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; if (a < 3) { - if (all(a < 3)) + if (all(a < 256)) RET[programIndex] = 1; + else { + print("FALSE %\n", a); + print("programCount %\n", programCount); + } } else RET[programIndex] = 0; } diff --git a/tests/test-60.ispc b/tests/test-60.ispc index fbdf8bd3..87f4fac2 100644 --- a/tests/test-60.ispc +++ b/tests/test-60.ispc @@ -4,12 +4,12 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - while (a < 20) + while (a < 64) ++a; RET[programIndex] = a; } export void result(uniform float RET[]) { - RET[programIndex] = 20; + RET[programIndex] = 64; } diff --git a/tests/test-64.ispc b/tests/test-64.ispc index 5a674a95..b9af26cc 100644 --- a/tests/test-64.ispc +++ b/tests/test-64.ispc @@ -19,8 +19,20 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { export void result(uniform float RET[]) { - RET[0] = RET[4] = RET[8] = RET[12] = 2; - RET[1] = RET[5] = RET[9] = RET[13] = 3; - RET[2] = RET[6] = RET[10] = RET[14] = 5; - RET[3] = RET[7] = RET[11] = RET[15] = 6; + RET[0] = RET[4] = RET[8] = RET[12] =\ + RET[16] = RET[20] = RET[24] = RET[28] =\ + RET[32] = RET[36] = RET[40] = RET[44] =\ + RET[48] = RET[52] = RET[56] = RET[60] = 2; + RET[1] = RET[5] = RET[9] = RET[13] =\ + RET[17] = RET[21] = RET[25] = RET[29] =\ + RET[33] = RET[37] = RET[41] = RET[45] =\ + RET[49] = RET[53] = RET[57] = RET[61] = 3; + RET[2] = RET[6] = RET[10] = RET[14] =\ + RET[18] = RET[22] = RET[26] = RET[30] =\ + RET[34] = RET[38] = RET[42] = RET[46] =\ + RET[50] = RET[54] = RET[58] = RET[62] = 5; + RET[3] = RET[7] = RET[11] = RET[15] =\ + RET[19] = RET[23] = RET[27] = RET[31] =\ + RET[35] = RET[39] = RET[43] = RET[47] =\ + RET[51] = RET[55] = RET[59] = RET[63] = 6; } diff --git a/tests/test-65.ispc b/tests/test-65.ispc index 71b8b39c..7510b0d5 100644 --- a/tests/test-65.ispc +++ b/tests/test-65.ispc @@ -17,8 +17,20 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { export void result(uniform float RET[]) { - RET[0] = RET[4] = RET[8] = RET[12] = 1; - RET[1] = RET[5] = RET[9] = RET[13] = 3; - RET[2] = RET[6] = RET[10] = RET[14] = 3; - RET[3] = RET[7] = RET[11] = RET[15] = 29; + RET[0] = RET[4] = RET[8] = RET[12] =\ + RET[16] = RET[20] = RET[24] = RET[28] =\ + RET[32] = RET[36] = RET[40] = RET[44] =\ + RET[48] = RET[52] = RET[56] = RET[60] = 1; + RET[1] = RET[5] = RET[9] = RET[13] =\ + RET[17] = RET[21] = RET[25] = RET[29] =\ + RET[33] = RET[37] = RET[41] = RET[45] =\ + RET[49] = RET[53] = RET[57] = RET[61] = 3; + RET[2] = RET[6] = RET[10] = RET[14] =\ + RET[18] = RET[22] = RET[26] = RET[30] =\ + RET[34] = RET[38] = RET[42] = RET[46] =\ + RET[50] = RET[54] = RET[58] = RET[62] = 3; + RET[3] = RET[7] = RET[11] = RET[15] =\ + RET[19] = RET[23] = RET[27] = RET[31] =\ + RET[35] = RET[39] = RET[43] = RET[47] =\ + RET[51] = RET[55] = RET[59] = RET[63] = 29; } diff --git a/tests/test-66.ispc b/tests/test-66.ispc index 8f69e8a5..a7fe0adc 100644 --- a/tests/test-66.ispc +++ b/tests/test-66.ispc @@ -18,8 +18,20 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { export void result(uniform float RET[]) { - RET[0] = RET[4] = RET[8] = RET[12] = 32; - RET[1] = RET[5] = RET[9] = RET[13] = 32; - RET[2] = RET[6] = RET[10] = RET[14] = 38; - RET[3] = RET[7] = RET[11] = RET[15] = 39; + RET[0] = RET[4] = RET[8] = RET[12] =\ + RET[16] = RET[20] = RET[24] = RET[28] =\ + RET[32] = RET[36] = RET[40] = RET[44] =\ + RET[48] = RET[52] = RET[56] = RET[60] = 32; + RET[1] = RET[5] = RET[9] = RET[13] =\ + RET[17] = RET[21] = RET[25] = RET[29] =\ + RET[33] = RET[37] = RET[41] = RET[45] =\ + RET[49] = RET[53] = RET[57] = RET[61] = 32; + RET[2] = RET[6] = RET[10] = RET[14] =\ + RET[18] = RET[22] = RET[26] = RET[30] =\ + RET[34] = RET[38] = RET[42] = RET[46] =\ + RET[50] = RET[54] = RET[58] = RET[62] = 38; + RET[3] = RET[7] = RET[11] = RET[15] =\ + RET[19] = RET[23] = RET[27] = RET[31] =\ + RET[35] = RET[39] = RET[43] = RET[47] =\ + RET[51] = RET[55] = RET[59] = RET[63] = 39; } From 5084712a15c18c47b0287c94ffaeafd5ef88e258 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 29 May 2012 08:31:10 -0700 Subject: [PATCH 167/173] Fix bugs in examples/intrinsics/generic-64.h There were a number of situations where we were left-shifting 1 by a lane index that were failing due to shifting beyond 32-bits. Fixed by shifting the 64-bit constant value 1ull. --- examples/intrinsics/generic-64.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/intrinsics/generic-64.h b/examples/intrinsics/generic-64.h index 08ae20d0..d088edfe 100644 --- a/examples/intrinsics/generic-64.h +++ b/examples/intrinsics/generic-64.h @@ -388,7 +388,7 @@ static FORCEINLINE __vec64_i1 NAME(TYPE a, TYPE b) { \ __vec64_i1 ret; \ ret.v = 0; \ for (int i = 0; i < 64; ++i) \ - ret.v |= ((CAST)(a.v[i]) OP (CAST)(b.v[i])) << i; \ + ret.v |= uint64_t((CAST)(a.v[i]) OP (CAST)(b.v[i])) << i; \ return ret; \ } @@ -434,7 +434,7 @@ static FORCEINLINE TYPE NAME(VTYPE v) { \ static FORCEINLINE TYPE __select(__vec64_i1 mask, TYPE a, TYPE b) { \ TYPE ret; \ for (int i = 0; i < 64; ++i) \ - ret.v[i] = (mask.v & (1<v &= ~(1 << index); + vec->v &= ~(1ull << index); else - vec->v |= (1 << index); + vec->v |= (1ull << index); } static FORCEINLINE __vec64_i1 __load(__vec64_i1 *p, int align) { @@ -999,7 +999,7 @@ CAST(__vec64_i16, uint16_t, __vec64_i8, uint8_t, __cast_zext) static FORCEINLINE TYPE __cast_zext(TYPE, __vec64_i1 v) { \ TYPE ret; \ for (int i = 0; i < 64; ++i) \ - ret.v[i] = (v.v & (1 << i)) ? 1 : 0; \ + ret.v[i] = (v.v & (1ull << i)) ? 1 : 0; \ return ret; \ } @@ -1039,7 +1039,7 @@ CAST(__vec64_d, double, __vec64_i64, uint64_t, __cast_uitofp) static FORCEINLINE __vec64_f __cast_uitofp(__vec64_f, __vec64_i1 v) { __vec64_f ret; for (int i = 0; i < 64; ++i) - ret.v[i] = (v.v & (1 << i)) ? 1. : 0.; + ret.v[i] = (v.v & (1ull << i)) ? 1. : 0.; return ret; } From d86653668eda8624dbe31d5bc168070ee48f5ed0 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 29 May 2012 10:16:43 -0700 Subject: [PATCH 168/173] Fix a number of tests to work correctly with 32/64-wide targets. Still to be reviewed/fixed: tests/test-*, tests/[cfrs]* --- tests/acos.ispc | 8 ++++---- tests/aossoa-1.ispc | 4 +++- tests/aossoa-2.ispc | 4 +++- tests/aossoa-5.ispc | 4 +++- tests/aossoa-6.ispc | 4 +++- tests/array-gather-ifs.ispc | 4 ++-- tests/array-gather-multi-unif.ispc | 3 ++- tests/array-gather-simple.ispc | 10 ++++++---- tests/array-gather-unif-runflags.ispc | 4 ++-- tests/array-gather-unif.ispc | 4 ++-- tests/array-gather-vary.ispc | 6 +++--- tests/array-mixed-unif-vary-indexing-2.ispc | 3 +++ tests/array-mixed-unif-vary-indexing-3.ispc | 7 ++++--- tests/array-mixed-unif-vary-indexing.ispc | 1 + tests/array-multidim-gather-scatter.ispc | 2 +- tests/array-scatter-unif-2.ispc | 5 ++--- tests/array-scatter-unif-3.ispc | 5 ++--- tests/array-scatter-unif.ispc | 4 ++-- tests/array-scatter-vary.ispc | 5 ++--- tests/array-struct-gather.ispc | 4 ++-- tests/asin.ispc | 6 +++--- tests/atomics-12.ispc | 4 ++-- tests/atomics-13.ispc | 6 +++--- tests/atomics-14.ispc | 6 +++--- tests/atomics-4.ispc | 4 ++-- tests/exclusive-scan-add-1.ispc | 17 +++++++++++++---- tests/exclusive-scan-add-10.ispc | 18 +++++++++++++----- tests/exclusive-scan-add-2.ispc | 17 +++++++++++++---- tests/exclusive-scan-add-3.ispc | 3 +-- tests/exclusive-scan-add-5.ispc | 18 +++++++++++++----- tests/exclusive-scan-add-6.ispc | 17 +++++++++++++---- tests/exclusive-scan-add-7.ispc | 17 +++++++++++++---- tests/exclusive-scan-and-2.ispc | 4 ++-- tests/exclusive-scan-or-1.ispc | 4 ++-- tests/gs-double-improve-multidim-1.ispc | 6 +++--- .../gs-double-improve-multidim-struct-1.ispc | 8 ++++---- ...s-double-improve-progindex-plus-const.ispc | 6 ++++-- ...gs-double-improve-progindex-plus-unif.ispc | 4 ++-- tests/gs-double-improve-progindex.ispc | 4 +++- tests/gs-improve-multidim-1.ispc | 6 +++--- tests/gs-improve-multidim-struct-1.ispc | 10 +++++----- tests/gs-improve-progindex-plus-const.ispc | 4 +++- tests/gs-improve-progindex-plus-unif.ispc | 4 +++- tests/gs-improve-progindex.ispc | 4 +++- tests/ldexp-double.ispc | 5 +++-- tests/ldexp-float.ispc | 5 +++-- tests/local-atomics-10.ispc | 2 +- tests/local-atomics-12.ispc | 4 ++-- tests/local-atomics-13.ispc | 8 ++++---- tests/local-atomics-14.ispc | 4 ++-- tests/local-atomics-4.ispc | 6 ++++-- tests/local-atomics-7.ispc | 7 +++++-- tests/local-atomics-swap.ispc | 2 +- tests/masked-scatter-struct.ispc | 4 ++-- tests/masked-scatter-vector.ispc | 4 ++-- tests/masked-struct-scatter-varying.ispc | 4 ++-- tests/max-int-1.ispc | 4 ++-- tests/min-float.ispc | 2 +- tests/min-int.ispc | 2 +- tests/min-uint-2.ispc | 2 +- tests/nested-structs-2.ispc | 4 ++-- tests/pass-varying-lvalue-to-ref.ispc | 4 ++-- tests/phi-opts-1.ispc | 5 +++-- tests/popcnt-1.ispc | 19 +++++++++++++------ tests/popcnt-2.ispc | 12 +++++++----- tests/popcnt-3.ispc | 4 ++-- tests/typedef-2.ispc | 2 +- tests/varying-struct-3.ispc | 2 +- tests/varying-struct-6.ispc | 2 +- 69 files changed, 249 insertions(+), 158 deletions(-) diff --git a/tests/acos.ispc b/tests/acos.ispc index 45173782..225fda97 100644 --- a/tests/acos.ispc +++ b/tests/acos.ispc @@ -6,14 +6,14 @@ bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < export void f_v(uniform float RET[]) { uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 }; - uniform float r[8]; + uniform float r[programCount]; foreach (i = 0 ... 8) - r[i] = cos(acos(vals[i])); + r[i] = cos(acos(vals[i % 8])); int errors = 0; for (uniform int i = 0; i < 8; ++i) { - if (ok(r[i], vals[i]) == false) { - print("error @ %: got %, expected %\n", i, r[i], vals[i]); + if (ok(r[i], vals[i%8]) == false) { + print("error @ %: got %, expected %\n", i, r[i], vals[i%8]); ++errors; } } diff --git a/tests/aossoa-1.ispc b/tests/aossoa-1.ispc index 6323d80f..59964d6d 100644 --- a/tests/aossoa-1.ispc +++ b/tests/aossoa-1.ispc @@ -3,7 +3,9 @@ export uniform int width() { return programCount; } export void f_v(uniform float RET[]) { #define width 3 -#define maxProgramCount 16 +#define maxProgramCount 64 + assert(programCount <= maxProgramCount); + //CO const uniform int width = 3; //CO const uniform int maxProgramCount = 16; uniform float a[width*maxProgramCount], r[width*maxProgramCount]; diff --git a/tests/aossoa-2.ispc b/tests/aossoa-2.ispc index b23a25e4..9ff82226 100644 --- a/tests/aossoa-2.ispc +++ b/tests/aossoa-2.ispc @@ -3,7 +3,9 @@ export uniform int width() { return programCount; } export void f_v(uniform float RET[]) { #define width 4 -#define maxProgramCount 16 +#define maxProgramCount 64 + assert(programCount <= maxProgramCount); + //CO const uniform int width = 4; //CO const uniform int maxProgramCount = 16; uniform float a[width*maxProgramCount], r[width*maxProgramCount]; diff --git a/tests/aossoa-5.ispc b/tests/aossoa-5.ispc index 883786c0..eb4fed3a 100644 --- a/tests/aossoa-5.ispc +++ b/tests/aossoa-5.ispc @@ -3,7 +3,9 @@ export uniform int width() { return programCount; } export void f_v(uniform float RET[]) { #define width 3 -#define maxProgramCount 16 +#define maxProgramCount 64 + assert(programCount <= maxProgramCount); + //CO const uniform int width = 3; //CO const uniform int maxProgramCount = 16; uniform int a[width*maxProgramCount], r[width*maxProgramCount]; diff --git a/tests/aossoa-6.ispc b/tests/aossoa-6.ispc index 34d35348..b64cd10b 100644 --- a/tests/aossoa-6.ispc +++ b/tests/aossoa-6.ispc @@ -3,7 +3,9 @@ export uniform int width() { return programCount; } export void f_v(uniform float RET[]) { #define width 4 -#define maxProgramCount 16 +#define maxProgramCount 64 + assert(programCount <= maxProgramCount); + //CO const uniform int width = 4; //CO const uniform int maxProgramCount = 16; uniform int a[width*maxProgramCount], r[width*maxProgramCount]; diff --git a/tests/array-gather-ifs.ispc b/tests/array-gather-ifs.ispc index d635e10f..626d7c3b 100644 --- a/tests/array-gather-ifs.ispc +++ b/tests/array-gather-ifs.ispc @@ -5,9 +5,9 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - uniform float x[45]; + uniform float x[programCount+15]; uniform int i; - for (i = 0; i < 45; ++i) + for (i = 0; i < programCount+15; ++i) x[i] = i; float ret; diff --git a/tests/array-gather-multi-unif.ispc b/tests/array-gather-multi-unif.ispc index d876f314..016ecbfd 100644 --- a/tests/array-gather-multi-unif.ispc +++ b/tests/array-gather-multi-unif.ispc @@ -10,7 +10,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { for (uniform int i = 0; i < 29+b; ++i) for (uniform int j = 0; j < 29+b; ++j) x[i][j] = 0; - x[a][a] = a; + if (a < 34) + x[a][a] = a; RET[programIndex] = x[4][4] + x[1][1] + x[b][b] + x[0][0]; } diff --git a/tests/array-gather-simple.ispc b/tests/array-gather-simple.ispc index 8835b2f0..5abc7f97 100644 --- a/tests/array-gather-simple.ispc +++ b/tests/array-gather-simple.ispc @@ -12,8 +12,10 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } export void result(uniform float RET[]) { - RET[0] = 1; RET[4] = 5; RET[8] = 9; RET[12] = 13; - RET[1] = RET[5] = RET[9] = RET[13] = 0; - RET[2] = 6; RET[6] = 14; RET[10] = 22; RET[14] = 30; - RET[3] = RET[7] = RET[11] = RET[15] = 3; + for (uniform int i = 0; i < programCount; i += 4) { + RET[i] = i+1; + RET[i+1] = 0; + RET[i+2] = 2 * (i+3); + RET[i+3] = 3; + } } diff --git a/tests/array-gather-unif-runflags.ispc b/tests/array-gather-unif-runflags.ispc index f2936f05..c13f5c29 100644 --- a/tests/array-gather-unif-runflags.ispc +++ b/tests/array-gather-unif-runflags.ispc @@ -4,9 +4,9 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - uniform float x[45]; + uniform float x[programCount+5]; uniform int i; - for (i = 0; i < 45; ++i) + for (i = 0; i < programCount+5; ++i) x[i] = i+b; a -= 1; if (a == 3) a = 0; diff --git a/tests/array-gather-unif.ispc b/tests/array-gather-unif.ispc index 3e040ad3..7ff35c9d 100644 --- a/tests/array-gather-unif.ispc +++ b/tests/array-gather-unif.ispc @@ -4,9 +4,9 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - uniform float x[45]; + uniform float x[programCount+5]; uniform int i; - for (i = 0; i < 45; ++i) + for (i = 0; i < programCount+5; ++i) x[i] = i+b; RET[programIndex] = x[a]; } diff --git a/tests/array-gather-vary.ispc b/tests/array-gather-vary.ispc index bbbdd85d..6e8c2f5e 100644 --- a/tests/array-gather-vary.ispc +++ b/tests/array-gather-vary.ispc @@ -4,14 +4,14 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - float x[55]; + float x[programCount+10]; uniform int i; - for (i = 0; i < 45; ++i) + for (i = 0; i < programCount+10; ++i) x[i] = a+b; RET[programIndex] = x[a]; } export void result(uniform float RET[]) { - RET[programIndex] = 6 + programIndex;; + RET[programIndex] = 6 + programIndex; } diff --git a/tests/array-mixed-unif-vary-indexing-2.ispc b/tests/array-mixed-unif-vary-indexing-2.ispc index edd53c84..8143ca29 100644 --- a/tests/array-mixed-unif-vary-indexing-2.ispc +++ b/tests/array-mixed-unif-vary-indexing-2.ispc @@ -15,6 +15,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { x[a][b-1] = 0; else x[a][b-1] = 1; + + a = min(a, 46); + RET[programIndex] = x[3][a]; } diff --git a/tests/array-mixed-unif-vary-indexing-3.ispc b/tests/array-mixed-unif-vary-indexing-3.ispc index 686f121e..ab3a7a7c 100644 --- a/tests/array-mixed-unif-vary-indexing-3.ispc +++ b/tests/array-mixed-unif-vary-indexing-3.ispc @@ -4,9 +4,10 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - uniform float x[47][47]; - for (uniform int i = 0; i < 47; ++i) - for (uniform int j = 0; j < 47; ++j) + assert(programCount <= 64); + uniform float x[70][70]; + for (uniform int i = 0; i < 70; ++i) + for (uniform int j = 0; j < 70; ++j) x[i][j] = 2+b-5; // all are 2 except (4,2) = 0, (4,...) = 1, (4,programCount-1)=2 diff --git a/tests/array-mixed-unif-vary-indexing.ispc b/tests/array-mixed-unif-vary-indexing.ispc index ebe932ad..96fc0870 100644 --- a/tests/array-mixed-unif-vary-indexing.ispc +++ b/tests/array-mixed-unif-vary-indexing.ispc @@ -10,6 +10,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { for (uniform int j = 0; j < 47; ++j) x[i][j] = 2+b-5; + a = min(a,46); x[a][b-1] = 0; RET[programIndex] = x[2][a]; } diff --git a/tests/array-multidim-gather-scatter.ispc b/tests/array-multidim-gather-scatter.ispc index 8a2f3947..1528b070 100644 --- a/tests/array-multidim-gather-scatter.ispc +++ b/tests/array-multidim-gather-scatter.ispc @@ -11,7 +11,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { uniform int index[4] = { 0, 1, 2, 4 }; float v = index[programIndex & 0x3]; - x[a][v] = 0; + x[min(a,39)][v] = 0; RET[programIndex] = x[v+1][v]; } diff --git a/tests/array-scatter-unif-2.ispc b/tests/array-scatter-unif-2.ispc index ef0c659b..89181be6 100644 --- a/tests/array-scatter-unif-2.ispc +++ b/tests/array-scatter-unif-2.ispc @@ -4,9 +4,8 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - uniform float x[100]; - // HACK to avoid @llvm.memset... - for (uniform int i = 0; i < b*20; ++i) + uniform float x[2*programCount]; + for (uniform int i = 0; i < 2*programCount; ++i) x[i] = 0; x[2*(a-1)] = b; diff --git a/tests/array-scatter-unif-3.ispc b/tests/array-scatter-unif-3.ispc index 8aad3110..703af6fa 100644 --- a/tests/array-scatter-unif-3.ispc +++ b/tests/array-scatter-unif-3.ispc @@ -4,9 +4,8 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - uniform float x[100]; - // HACK to avoid @llvm.memset... - for (uniform int i = 0; i < b*20; ++i) + uniform float x[2*programCount]; + for (uniform int i = 0; i < 2*programCount; ++i) x[i] = 0; x[2*(a-1)] = b; diff --git a/tests/array-scatter-unif.ispc b/tests/array-scatter-unif.ispc index a553d703..6b5e75d0 100644 --- a/tests/array-scatter-unif.ispc +++ b/tests/array-scatter-unif.ispc @@ -5,8 +5,8 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - uniform float x[40]; - for (uniform int i = 0; i < 40; ++i) + uniform float x[programCount+5]; + for (uniform int i = 0; i < programCount+5; ++i) x[i] = 0.; x[a] = 2; RET[programIndex] = x[4] + x[0] + x[5]; diff --git a/tests/array-scatter-vary.ispc b/tests/array-scatter-vary.ispc index 07527519..d9d9bc37 100644 --- a/tests/array-scatter-vary.ispc +++ b/tests/array-scatter-vary.ispc @@ -4,9 +4,8 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - float x[30]; - // HACK to avoid @llvm.memset... - for (uniform int i = 0; i < b*6; ++i) + float x[2*programCount]; + for (uniform int i = 0; i < 2*programCount; ++i) x[i] = 0; x[a] = a; RET[programIndex] = x[4] + x[0] + x[5]; diff --git a/tests/array-struct-gather.ispc b/tests/array-struct-gather.ispc index 7a18acba..d3799800 100644 --- a/tests/array-struct-gather.ispc +++ b/tests/array-struct-gather.ispc @@ -4,14 +4,14 @@ export uniform int width() { return programCount; } struct Foo { - uniform float x[17]; + uniform float x[programCount+1]; }; export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; uniform Foo foo; uniform int i; - for (i = 0; i < 17; ++i) + for (i = 0; i < programCount+1; ++i) foo.x[i] = i; if ((int)a & 1) diff --git a/tests/asin.ispc b/tests/asin.ispc index a6839b09..4ad23b3a 100644 --- a/tests/asin.ispc +++ b/tests/asin.ispc @@ -8,12 +8,12 @@ export void f_v(uniform float RET[]) { uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 }; uniform float r[8]; foreach (i = 0 ... 8) - r[i] = sin(asin(vals[i])); + r[i] = sin(asin(vals[i%8])); int errors = 0; for (uniform int i = 0; i < 8; ++i) { - if (ok(r[i], vals[i]) == false) { - print("error @ %: got %, expected %\n", i, r[i], vals[i]); + if (ok(r[i], vals[i%8]) == false) { + print("error @ %: got %, expected %\n", i, r[i], vals[i%8]); ++errors; } } diff --git a/tests/atomics-12.ispc b/tests/atomics-12.ispc index 0596a85f..c27ad99c 100644 --- a/tests/atomics-12.ispc +++ b/tests/atomics-12.ispc @@ -6,14 +6,14 @@ uniform unsigned int32 s = 0; export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; float b = 0; - if (programIndex & 1) + if (programIndex < 30 && programIndex & 1) b = atomic_or_global(&s, (1 << programIndex)); RET[programIndex] = s; } export void result(uniform float RET[]) { uniform int sum = 0; - for (uniform int i = 0; i < programCount; ++i) + for (uniform int i = 0; i < min(30, programCount); ++i) if (i & 1) sum += (1 << i); RET[programIndex] = sum; diff --git a/tests/atomics-13.ispc b/tests/atomics-13.ispc index fe9a5d1e..86faaddb 100644 --- a/tests/atomics-13.ispc +++ b/tests/atomics-13.ispc @@ -5,12 +5,12 @@ uniform unsigned int32 s = 0; export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; - float b = 0; - if (programIndex & 1) + int32 b = 0; + if (programIndex < 32 && programIndex & 1) b = atomic_or_global(&s, (1 << programIndex)); RET[programIndex] = popcnt(reduce_max((int32)b)); } export void result(uniform float RET[]) { - RET[programIndex] = programCount == 1 ? 0 : ((programCount/2) - 1); + RET[programIndex] = programCount == 1 ? 0 : ((min(32, programCount)/2) - 1); } diff --git a/tests/atomics-14.ispc b/tests/atomics-14.ispc index c4551039..7beb3e75 100644 --- a/tests/atomics-14.ispc +++ b/tests/atomics-14.ispc @@ -7,14 +7,14 @@ export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; float b = 0; if (programIndex & 1) - b = atomic_or_global(&s, (1 << programIndex)); + b = atomic_or_global(&s, (1ull << programIndex)); RET[programIndex] = (s>>20); } export void result(uniform float RET[]) { - uniform int sum = 0; + uniform int64 sum = 0; for (uniform int i = 0; i < programCount; ++i) if (i & 1) - sum += (1 << i); + sum += (1ull << i); RET[programIndex] = ((unsigned int64)(0xffffffffff000000 | sum)) >> 20; } diff --git a/tests/atomics-4.ispc b/tests/atomics-4.ispc index 83e9fbf0..30b343d1 100644 --- a/tests/atomics-4.ispc +++ b/tests/atomics-4.ispc @@ -5,10 +5,10 @@ uniform int32 s = 0; export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; - float b = atomic_or_global(&s, (1< 0) { + if ((programIndex & 1) == 0 && programIndex > 0 && programIndex < 32) { int val = 0xffffffff; for (int i = 0; i < programIndex-1; i += 2) val &= ~(1<>20); } export void result(uniform float RET[]) { uniform int sum = 0; - for (uniform int i = 0; i < programCount; ++i) + for (uniform int i = 0; i < min(32, programCount); ++i) if (i & 1) sum += (1 << i); RET[programIndex] = ((unsigned int64)(0xffffffffff000000 | sum)) >> 20; diff --git a/tests/local-atomics-4.ispc b/tests/local-atomics-4.ispc index 651cf4c6..f7f6a04a 100644 --- a/tests/local-atomics-4.ispc +++ b/tests/local-atomics-4.ispc @@ -5,10 +5,12 @@ uniform int32 s = 0; export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; - float b = atomic_or_local(&s, (1< int3; export void f_fu(uniform float ret[], uniform float aa[], uniform float b) { float a = aa[programIndex]; - uniform int3 array[32]; - for (uniform int i = 0; i < 6*b + 2; ++i) { + uniform int3 array[programCount]; + for (uniform int i = 0; i < programCount + 5 - b; ++i) { for (uniform int j = 0; j < 3; ++j) array[i][j] = i+100*j; } diff --git a/tests/masked-struct-scatter-varying.ispc b/tests/masked-struct-scatter-varying.ispc index 928197a3..8211aa67 100644 --- a/tests/masked-struct-scatter-varying.ispc +++ b/tests/masked-struct-scatter-varying.ispc @@ -5,8 +5,8 @@ struct Foo { float x; float y; }; export void f_fu(uniform float ret[], uniform float aa[], uniform float b) { float a = aa[programIndex]; - Foo foo[32]; - for (uniform int i = 0; i < 32; ++i) + Foo foo[programCount+1]; + for (uniform int i = 0; i < programCount+1; ++i) foo[i].x = i; varying Foo fv = foo[a]; fv.x += 1000; diff --git a/tests/max-int-1.ispc b/tests/max-int-1.ispc index 7ad02713..f1492b8b 100644 --- a/tests/max-int-1.ispc +++ b/tests/max-int-1.ispc @@ -6,8 +6,8 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; int i = (int)a; - RET[programIndex] = max((int)20, i); + RET[programIndex] = max((int)200, i); } -export void result(uniform float RET[]) { RET[programIndex] = 20.; } +export void result(uniform float RET[]) { RET[programIndex] = 200.; } diff --git a/tests/min-float.ispc b/tests/min-float.ispc index 3577daab..caedd962 100644 --- a/tests/min-float.ispc +++ b/tests/min-float.ispc @@ -5,7 +5,7 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; - RET[programIndex] = min(a, 20.f); + RET[programIndex] = min(a, 200.f); } export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; } diff --git a/tests/min-int.ispc b/tests/min-int.ispc index 50df3e19..483b9b41 100644 --- a/tests/min-int.ispc +++ b/tests/min-int.ispc @@ -6,7 +6,7 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; int i = (int)a; - RET[programIndex] = min((int)20, i); + RET[programIndex] = min((int)200, i); } export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; } diff --git a/tests/min-uint-2.ispc b/tests/min-uint-2.ispc index e8f0e8c9..9338aeb0 100644 --- a/tests/min-uint-2.ispc +++ b/tests/min-uint-2.ispc @@ -3,7 +3,7 @@ export uniform int width() { return programCount; } export void f_f(uniform float r[], uniform float a[]) { unsigned int i = (unsigned int)a[programIndex]; - r[programIndex] = min((unsigned int)20, i); + r[programIndex] = min((unsigned int)200, i); } export void result(uniform float r[]) { diff --git a/tests/nested-structs-2.ispc b/tests/nested-structs-2.ispc index cb58e588..fd30c7ef 100644 --- a/tests/nested-structs-2.ispc +++ b/tests/nested-structs-2.ispc @@ -4,7 +4,7 @@ export uniform int width() { return programCount; } struct Foo { - float f[18]; + float f[129]; }; struct Bar { @@ -15,7 +15,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; uniform Bar bar; for (uniform int i = 0; i < 6; ++i) - for (uniform int j = 0; j < 18; ++j) + for (uniform int j = 0; j < 129; ++j) bar.foo[i].f[j] = 2.+b-5; bar.foo[5].f[a] = a; diff --git a/tests/pass-varying-lvalue-to-ref.ispc b/tests/pass-varying-lvalue-to-ref.ispc index 4089b9d9..68beba1c 100644 --- a/tests/pass-varying-lvalue-to-ref.ispc +++ b/tests/pass-varying-lvalue-to-ref.ispc @@ -4,8 +4,8 @@ export uniform int width() { return programCount; } void inc(uniform float * varying v) { ++(*v); } export void f_fu(uniform float ret[], uniform float aa[], uniform float b) { - uniform float foo[32]; - for (uniform int i = 0; i < 32; ++i) + uniform float foo[2*programCount]; + for (uniform int i = 0; i < 2*programCount; ++i) foo[i] = 10+i; int a = (int)aa[programIndex]; inc(&foo[a]); diff --git a/tests/phi-opts-1.ispc b/tests/phi-opts-1.ispc index d4265681..fb0a5282 100644 --- a/tests/phi-opts-1.ispc +++ b/tests/phi-opts-1.ispc @@ -3,8 +3,9 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { float sum = 0; - for (int i = 0; i < 16; i += programCount) - sum += aFOO[i+programIndex]; + for (int i = programIndex; i < 16; i += programCount) { + sum += aFOO[i]; + } RET[programIndex] = reduce_add(sum); } diff --git a/tests/popcnt-1.ispc b/tests/popcnt-1.ispc index 9a64c113..98139ea7 100644 --- a/tests/popcnt-1.ispc +++ b/tests/popcnt-1.ispc @@ -8,11 +8,18 @@ export void f_f(uniform float RET[], uniform float aFOO[]) { RET[programIndex] = popcnt((int)a); } -export void result(uniform float RET[]) { - uniform int pc[16] = { 1, 1, 2, 1, - 2, 2, 3, 1, - 2, 2, 3, 2, - 3, 3, 4, 1 }; - RET[programIndex] = pc[programIndex]; +static int manualpc(int v) { + int count = 0; + while (v != 0) { + if (v & 1) + ++count; + v >>= 1; + } + return count; +} + +export void result(uniform float RET[]) { + assert(programCount <= 64); + RET[programIndex] = manualpc(programIndex+1); } diff --git a/tests/popcnt-2.ispc b/tests/popcnt-2.ispc index f792dde0..9a48fb7e 100644 --- a/tests/popcnt-2.ispc +++ b/tests/popcnt-2.ispc @@ -18,9 +18,11 @@ export void f_f(uniform float RET[], uniform float aFOO[]) { RET[programIndex] = popcnt(int4(0xf0f0f0f0, 0xff, 0x10, 0)); } -export void result(uniform float RET[]) { - RET[0] = RET[4] = RET[8] = RET[12] = 16; - RET[1] = RET[5] = RET[9] = RET[13] = 8; - RET[2] = RET[6] = RET[10] = RET[14] = 1; - RET[3] = RET[7] = RET[11] = RET[15] = 0; +export void result(uniform float RET[]) { + for (uniform int i = 0; i < programCount; i += 4) { + RET[i] = 16; + RET[i+1] = 8; + RET[i+2] = 1; + RET[i+3] = 0; + } } diff --git a/tests/popcnt-3.ispc b/tests/popcnt-3.ispc index 110bf5e4..4ac8fa18 100644 --- a/tests/popcnt-3.ispc +++ b/tests/popcnt-3.ispc @@ -3,9 +3,9 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[4], uniform float aFOO[]) { +export void f_f(uniform float RET[], uniform float aFOO[]) { float a = aFOO[programIndex]; RET[programIndex] = popcnt(a < 3); } -export void result(uniform float RET[]) { RET[programIndex] = programCount == 1 ? 1 : 2; } +export void result(uniform float RET[]) { RET[programIndex] = (programCount == 1) ? 1 : 2; } diff --git a/tests/typedef-2.ispc b/tests/typedef-2.ispc index 9217ff2c..e8117dd4 100644 --- a/tests/typedef-2.ispc +++ b/tests/typedef-2.ispc @@ -19,7 +19,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { for (uniform int i = 0; i < 16; ++i) for (uniform int j = 0; j < 16; ++j) bar.foo[i].x[j] = b; - RET[programIndex] = bar.foo[a-1].x[a-1]; + RET[programIndex] = bar.foo[min(15, a-1)].x[min(15, a-1)]; } export void result(uniform float RET[]) { RET[programIndex] = 5; } diff --git a/tests/varying-struct-3.ispc b/tests/varying-struct-3.ispc index 1691e855..af64878c 100644 --- a/tests/varying-struct-3.ispc +++ b/tests/varying-struct-3.ispc @@ -15,7 +15,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { RET[programIndex] = g.x; } -export void result(uniform float RET[4]) { +export void result(uniform float RET[]) { RET[programIndex] = 15; RET[0] = RET[1] = 10; } diff --git a/tests/varying-struct-6.ispc b/tests/varying-struct-6.ispc index 450e76e6..08bba06c 100644 --- a/tests/varying-struct-6.ispc +++ b/tests/varying-struct-6.ispc @@ -18,4 +18,4 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { RET[programIndex] = bar.uf.x + bar.vf.y; } -export void result(uniform float RET[4]) { RET[programIndex] = 6+programIndex; } +export void result(uniform float RET[]) { RET[programIndex] = 6+programIndex; } From 5cb53f52c3885c23c559603b37ed8985c5f063a7 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 30 May 2012 10:31:12 -0700 Subject: [PATCH 169/173] Fix various tests/[frs]* files to be correct with 32 and 64-wide targets. Still todo: tests/c*, tests/test-* --- tests/frexp-double-1.ispc | 4 ++-- tests/frexp-double.ispc | 2 +- tests/frexp-float-1.ispc | 4 ++-- tests/frexp-float.ispc | 2 +- tests/recursion-forward-func-decl.ispc | 8 ++++---- tests/recursion.ispc | 8 ++++---- tests/scatter-struct-with-array-member.ispc | 1 + tests/scatter-struct.ispc | 4 ++-- tests/scatter-vector.ispc | 4 ++-- tests/soa-1.ispc | 1 + tests/soa-10.ispc | 1 + tests/soa-13.ispc | 1 + tests/soa-15.ispc | 12 +++--------- tests/soa-16.ispc | 14 +++----------- tests/soa-17.ispc | 15 +++------------ tests/soa-18.ispc | 1 + tests/soa-19.ispc | 1 + tests/soa-2.ispc | 1 + tests/soa-20.ispc | 3 ++- tests/soa-21.ispc | 5 +++-- tests/soa-22.ispc | 6 +++--- tests/soa-23.ispc | 8 ++++---- tests/soa-24.ispc | 6 +++--- tests/soa-25.ispc | 6 +++--- tests/soa-26.ispc | 1 - tests/soa-28.ispc | 5 +++-- tests/soa-29.ispc | 5 +++-- tests/soa-3.ispc | 1 + tests/soa-4.ispc | 5 +++-- tests/soa-5.ispc | 5 +++-- tests/soa-6.ispc | 5 +++-- tests/soa-7.ispc | 5 +++-- tests/soa-8.ispc | 5 +++-- tests/soa-9.ispc | 1 + tests/struct-gather-2.ispc | 4 ++-- tests/struct-gather-3.ispc | 4 ++-- tests/struct-gather.ispc | 4 ++-- tests/struct-ref-lvalue.ispc | 4 ++-- tests/struct-test-114.ispc | 5 +++-- tests/struct-vary-index-expr.ispc | 11 ++++++----- tests/switch-10.ispc | 2 +- tests/switch-11.ispc | 2 +- tests/switch-12.ispc | 2 +- tests/switch-8.ispc | 2 +- tests/switch-9.ispc | 2 +- 45 files changed, 97 insertions(+), 101 deletions(-) diff --git a/tests/frexp-double-1.ispc b/tests/frexp-double-1.ispc index db6f742e..6c38b05e 100644 --- a/tests/frexp-double-1.ispc +++ b/tests/frexp-double-1.ispc @@ -3,7 +3,7 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - double a = (1< Point pts[10]; -//CO uniform Point pts[40]; -//CO foreach (i = 0 ... 40) { - for (uniform int i = 0; i < 40; ++i) { + soa<4> Point pts[30]; + for (uniform int i = 0; i < 120; ++i) { pts[i].x = b*i; pts[i].y[0] = 2*b*i; pts[i].y[1] = 2*b*i+1; @@ -26,14 +24,10 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { pts[i].z = 3*b*i; } -//CO p((uniform float * uniform)&pts[0]); - -//CO print("delta %\n", ((uniform float * varying)(&pts[2+programIndex]) - -//CO (uniform float * uniform)&pts[0])); - float a = aFOO[programIndex]; a *= -1; Point vp = { a, { 2*a, 3*a, 4*a }, {5*a} }; + assert(programCount+2 < 120); pts[2+programIndex] = vp; //CO p((uniform float * uniform)&pts[0]); diff --git a/tests/soa-16.ispc b/tests/soa-16.ispc index 0f8c9c1f..f23c39cb 100644 --- a/tests/soa-16.ispc +++ b/tests/soa-16.ispc @@ -15,10 +15,8 @@ static void p(uniform float *uniform ptr) { } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - soa<4> Point pts[10]; -//CO uniform Point pts[40]; -//CO foreach (i = 0 ... 40) { - for (uniform int i = 0; i < 40; ++i) { + soa<4> Point pts[30]; + for (uniform int i = 0; i < 120; ++i) { pts[i].x = b*i; pts[i].y[0] = 2*b*i; pts[i].y[1] = 2*b*i+1; @@ -26,18 +24,12 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { pts[i].z = 3*b*i; } -//CO p((uniform float * uniform)&pts[0]); - -//CO print("delta %\n", ((uniform float * varying)(&pts[2+programIndex]) - -//CO (uniform float * uniform)&pts[0])); - float a = aFOO[programIndex]; a *= -1; Point vp = { a, { 2*a, 3*a, 4*a }, {5*a} }; + assert(programCount + 2 < 120); pts[2+programIndex] = vp; -//CO p((uniform float * uniform)&pts[0]); - RET[programIndex] = pts[programIndex].y[2]; } diff --git a/tests/soa-17.ispc b/tests/soa-17.ispc index 2423cf66..f25b85bd 100644 --- a/tests/soa-17.ispc +++ b/tests/soa-17.ispc @@ -16,10 +16,8 @@ static void p(uniform float *uniform ptr) { } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - soa<4> Point pts[10]; -//CO uniform Point pts[40]; -//CO foreach (i = 0 ... 40) { - for (uniform int i = 0; i < 40; ++i) { + soa<4> Point pts[40]; + for (uniform int i = 0; i < 160; ++i) { pts[i].x = b*i; pts[i].y[0] = 2*b*i; pts[i].y[1] = 2*b*i+1; @@ -27,19 +25,12 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { pts[i].z = 3*b*i; } -//CO p((uniform float * uniform)&pts[0]); - -//CO print("one size %\n", sizeof(soa<4> Point)); -//CO print("delta %\n", ((uniform int8 * varying)(&pts[2+programIndex]) - -//CO (uniform int8 * uniform)&pts[0])); - float a = aFOO[programIndex]; a *= -1; Point vp = { a, { 2*a, 3*a, 4*a }, {5*a} }; + assert(2+programIndex < 160); pts[2+programIndex] = vp; -//CO p((uniform float * uniform)&pts[0]); - RET[programIndex] = pts[programIndex].y[2]; } diff --git a/tests/soa-18.ispc b/tests/soa-18.ispc index 1e8b70f6..39e0a80f 100644 --- a/tests/soa-18.ispc +++ b/tests/soa-18.ispc @@ -17,6 +17,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { ++ptr; ptr->y = -programIndex; + assert(1+programCount < 80); RET[programIndex] = pts[1+programIndex].y; } diff --git a/tests/soa-19.ispc b/tests/soa-19.ispc index 3b7ad46f..f9a8103b 100644 --- a/tests/soa-19.ispc +++ b/tests/soa-19.ispc @@ -16,6 +16,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { soa<8> Point * ptr = &pts[6+programIndex]; ptr->y = -programIndex;; + assert(6+programIndex < 80); RET[programIndex] = pts[6+programIndex].y; } diff --git a/tests/soa-2.ispc b/tests/soa-2.ispc index d8ec37e3..e92d7c9b 100644 --- a/tests/soa-2.ispc +++ b/tests/soa-2.ispc @@ -13,6 +13,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { pts[i].z = 3*b*i; } + assert(programCount < 80); RET[programIndex] = pts[programIndex].z; } diff --git a/tests/soa-20.ispc b/tests/soa-20.ispc index f8a1fe6f..8e87ece6 100644 --- a/tests/soa-20.ispc +++ b/tests/soa-20.ispc @@ -12,7 +12,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { pts[i].y = 2*b*i; pts[i].z = 3*b*i; } - + + assert(6+programIndex < 80); soa<8> Point * ptr = &pts[6+programIndex]; RET[programIndex] = ptr - pts; } diff --git a/tests/soa-21.ispc b/tests/soa-21.ispc index f19ae448..46309788 100644 --- a/tests/soa-21.ispc +++ b/tests/soa-21.ispc @@ -12,8 +12,8 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Foo * uniform pts = uniform new soa<8> Foo[4]; - foreach (i = 0 ... 32) { + soa<8> Foo * uniform pts = uniform new soa<8> Foo[11]; + foreach (i = 0 ... 88) { pts[i].x = b*i; pts[i].z = -b*i; for (uniform int j = 0; j < 10; ++j) { @@ -23,6 +23,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } } + assert(7+programCount < 88); soa<8> Foo * ptr = &pts[7+programIndex]; RET[programIndex] = ptr->pts[3].z; } diff --git a/tests/soa-22.ispc b/tests/soa-22.ispc index 41795c96..60448694 100644 --- a/tests/soa-22.ispc +++ b/tests/soa-22.ispc @@ -12,9 +12,8 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Foo * uniform pts = uniform new soa<8> Foo[4]; -//CO uniform Foo pts[32]; - foreach (i = 0 ... 32) { + soa<8> Foo * uniform pts = uniform new soa<8> Foo[10]; + foreach (i = 0 ... 80) { pts[i].x = b*i; pts[i].z = -b*i; for (uniform int j = 0; j < 3; ++j) { @@ -27,6 +26,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } } + assert(programIndex < 80); RET[programIndex] = pts[programIndex].pts[programIndex % 3][programIndex % 4].z; } diff --git a/tests/soa-23.ispc b/tests/soa-23.ispc index 24b7b679..928eba25 100644 --- a/tests/soa-23.ispc +++ b/tests/soa-23.ispc @@ -11,15 +11,15 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Foo * uniform pts = uniform new soa<8> Foo[4]; -//CO uniform Foo pts[32]; - foreach (i = 0 ... 32) { + soa<8> Foo * uniform pts = uniform new soa<8> Foo[10]; + foreach (i = 0 ... 80) { pts[i].vec.x = b*i; pts[i].vec.y = -b*i; pts[i].vec.z = 2*b*i; pts[i].z = i; } - + + assert(programIndex + 2 < 80); RET[programIndex] = pts[programIndex+2].vec.y; } diff --git a/tests/soa-24.ispc b/tests/soa-24.ispc index 23835c9c..758f094e 100644 --- a/tests/soa-24.ispc +++ b/tests/soa-24.ispc @@ -11,9 +11,8 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Foo * uniform pts = uniform new soa<8> Foo[4]; -//CO uniform Foo pts[32]; - foreach (i = 0 ... 32) { + soa<8> Foo * uniform pts = uniform new soa<8> Foo[10]; + foreach (i = 0 ... 80) { pts[i].vec.x = b*i; pts[i].vec.y = -b*i; pts[i].vec.z = 2*b*i; @@ -21,6 +20,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } pts[programIndex+2].vec.z *= -1; + assert(programIndex < 80); float<3> vl = pts[programIndex].vec; RET[programIndex] = vl.z; } diff --git a/tests/soa-25.ispc b/tests/soa-25.ispc index d3be5253..b21e0d57 100644 --- a/tests/soa-25.ispc +++ b/tests/soa-25.ispc @@ -11,9 +11,8 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Foo * uniform pts = uniform new soa<8> Foo[4]; -//CO uniform Foo pts[32]; - foreach (i = 0 ... 32) { + soa<8> Foo * uniform pts = uniform new soa<8> Foo[10]; + foreach (i = 0 ... 80) { pts[i].vec.x = b*i; pts[i].vec.y = -b*i; pts[i].vec.z = 2*b*i; @@ -21,6 +20,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { } pts[2].vec.x *= -1; + assert(programCount < 80); float<3> vl = pts[programIndex].vec; RET[programIndex] = vl.x; } diff --git a/tests/soa-26.ispc b/tests/soa-26.ispc index b765825a..cf1fc6d2 100644 --- a/tests/soa-26.ispc +++ b/tests/soa-26.ispc @@ -12,7 +12,6 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; soa<8> Foo * uniform pts = uniform new soa<8> Foo[4]; -//CO uniform Foo pts[32]; for (uniform int i = 0; i < 32; ++i) { pts[i].vec.x = b*i; pts[i].vec.y = -b*i; diff --git a/tests/soa-28.ispc b/tests/soa-28.ispc index 92f3c4a3..a4df02dc 100644 --- a/tests/soa-28.ispc +++ b/tests/soa-28.ispc @@ -6,9 +6,9 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Point pts[10]; + soa<8> Point pts[20]; - foreach (i = b-5 ... 80) { + foreach (i = b-5 ... 160) { pts[i].x = b*i; pts[i].y = 2*b*i; pts[i].z = 3*b*i; @@ -16,6 +16,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { uniform Point up = pts[4]; + assert(2*programCount < 160); RET[programIndex] = pts[2*programIndex].x; } diff --git a/tests/soa-29.ispc b/tests/soa-29.ispc index e9a5a069..9c2d6f28 100644 --- a/tests/soa-29.ispc +++ b/tests/soa-29.ispc @@ -6,9 +6,9 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Point pts[10]; + soa<8> Point pts[20]; - for (int i = programIndex; i < 16*b; i += programCount) { + for (int i = programIndex; i < 32*b; i += programCount) { pts[i].x = b*i; pts[i].y = 2*b*i; pts[i].z = 3*b*i; @@ -16,6 +16,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { uniform Point up = pts[4]; + assert(2*programIndex < 160); RET[programIndex] = pts[2*programIndex].x; } diff --git a/tests/soa-3.ispc b/tests/soa-3.ispc index 0eed0bf4..2cec07a5 100644 --- a/tests/soa-3.ispc +++ b/tests/soa-3.ispc @@ -16,6 +16,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { pts[i].z = 3*b*i; } + assert(programCount < 80); RET[programIndex] = pts[programIndex].y[2]; } diff --git a/tests/soa-4.ispc b/tests/soa-4.ispc index 3af768fb..88a35393 100644 --- a/tests/soa-4.ispc +++ b/tests/soa-4.ispc @@ -6,14 +6,15 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Point pts[8]; - foreach (i = 0 ... 64) { + soa<8> Point pts[10]; + foreach (i = 0 ... 80) { pts[i].x = 0; pts[i].y = 0; pts[i].z = 0; } Point pv = { a, b, -a }; + assert(8+programCount < 80); pts[8+programIndex] = pv; RET[programIndex] = pts[8+programIndex].z; diff --git a/tests/soa-5.ispc b/tests/soa-5.ispc index ae775c65..070375c6 100644 --- a/tests/soa-5.ispc +++ b/tests/soa-5.ispc @@ -6,14 +6,15 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Point pts[8]; - foreach (i = 0 ... 64) { + soa<8> Point pts[9]; + foreach (i = 0 ... 72) { pts[i].x = 0; pts[i].y = 0; pts[i].z = 0; } Point pv = { a, b, -a }; + assert(6+programCount < 72); pts[6+programIndex] = pv; RET[programIndex] = pts[6+programIndex].x; diff --git a/tests/soa-6.ispc b/tests/soa-6.ispc index 4dd8439e..942b2d4e 100644 --- a/tests/soa-6.ispc +++ b/tests/soa-6.ispc @@ -6,14 +6,15 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Point pts[8]; - foreach (i = 0 ... 64) { + soa<8> Point pts[11]; + foreach (i = 0 ... 88) { pts[i].x = -42; pts[i].y = 0; pts[i].z = 0; } Point pv = { a, b, -a }; + assert(8+programCount < 88); pts[8+programIndex] = pv; RET[programIndex] = pts[6+programIndex].x; diff --git a/tests/soa-7.ispc b/tests/soa-7.ispc index 042cf3dc..a7163aa4 100644 --- a/tests/soa-7.ispc +++ b/tests/soa-7.ispc @@ -6,14 +6,15 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Point pts[8]; - foreach (i = 0 ... 64) { + soa<8> Point pts[11]; + foreach (i = 0 ... 88) { pts[i].x = -42; pts[i].y = 0; pts[i].z = 0; } Point pv = { a, b, -a }; + assert(8+programCount < 88); pts[8+programIndex].x = pv.x; pts[8+programIndex].y = pv.y; pts[8+programIndex].z = pv.z; diff --git a/tests/soa-8.ispc b/tests/soa-8.ispc index 0235fc2c..1e4b88d6 100644 --- a/tests/soa-8.ispc +++ b/tests/soa-8.ispc @@ -6,14 +6,15 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - soa<8> Point pts[8]; - foreach (i = 0 ... 64) { + soa<8> Point pts[12]; + foreach (i = 0 ... 96) { pts[i].x = -42; pts[i].y = 0; pts[i].z = 0; } Point pv = { a, b, -a }; + assert(8+programCount < 96); pts[7+programIndex] = pv; RET[programIndex] = pts[8+programIndex].x; diff --git a/tests/soa-9.ispc b/tests/soa-9.ispc index 0249434c..872065b9 100644 --- a/tests/soa-9.ispc +++ b/tests/soa-9.ispc @@ -16,6 +16,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { uniform Point up = { b, 3, 170 }; pts[1] = up; + assert(programCount < 80); RET[programIndex] = pts[programIndex].z; } diff --git a/tests/struct-gather-2.ispc b/tests/struct-gather-2.ispc index cfd427b7..6411ae90 100644 --- a/tests/struct-gather-2.ispc +++ b/tests/struct-gather-2.ispc @@ -13,9 +13,9 @@ float func(Foo foo[], int offset) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - Foo foo[17]; + Foo foo[programCount+5]; uniform int i; - for (i = 0; i < 17; ++i) + for (i = 0; i < programCount+5; ++i) foo[i].f = i*a; RET[programIndex] = func(foo, (int)a); } diff --git a/tests/struct-gather-3.ispc b/tests/struct-gather-3.ispc index cfd427b7..6de2c123 100644 --- a/tests/struct-gather-3.ispc +++ b/tests/struct-gather-3.ispc @@ -13,9 +13,9 @@ float func(Foo foo[], int offset) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - Foo foo[17]; + Foo foo[programCount+1]; uniform int i; - for (i = 0; i < 17; ++i) + for (i = 0; i < programCount+1; ++i) foo[i].f = i*a; RET[programIndex] = func(foo, (int)a); } diff --git a/tests/struct-gather.ispc b/tests/struct-gather.ispc index efa42e1d..a92a5862 100644 --- a/tests/struct-gather.ispc +++ b/tests/struct-gather.ispc @@ -9,9 +9,9 @@ struct Foo { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - Foo foo[17]; + Foo foo[programCount+1]; uniform int i; - for (i = 0; i < 17; ++i) + for (i = 0; i < programCount+1; ++i) foo[i].f = i*a; RET[programIndex] = foo[(int)a].f; } diff --git a/tests/struct-ref-lvalue.ispc b/tests/struct-ref-lvalue.ispc index 535a1a0f..5cfbdf31 100644 --- a/tests/struct-ref-lvalue.ispc +++ b/tests/struct-ref-lvalue.ispc @@ -11,8 +11,8 @@ void f(Foo foo[], float a) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - Foo foo[17]; - for (uniform int i = 0; i < 17; ++i) + Foo foo[programCount+5]; + for (uniform int i = 0; i < programCount+5; ++i) foo[i].f = a; f(foo, a); RET[programIndex] = foo[a].f; diff --git a/tests/struct-test-114.ispc b/tests/struct-test-114.ispc index 66c4b07c..b5d8b18b 100644 --- a/tests/struct-test-114.ispc +++ b/tests/struct-test-114.ispc @@ -10,12 +10,13 @@ struct Foo { export void f_fi(uniform float RET[], uniform float aFOO[], uniform int bFOO[]) { float a = aFOO[programIndex]; int b = bFOO[programIndex]; - varying Foo myFoo[17]; + varying Foo myFoo[128]; uniform int i; - for (i = 0; i < 17; ++i) { + for (i = 0; i < 128; ++i) { myFoo[i].x = i; myFoo[i].f = 2*i; } + assert(b/2 < 128); RET[programIndex] = myFoo[b/2].f; } diff --git a/tests/struct-vary-index-expr.ispc b/tests/struct-vary-index-expr.ispc index dbf8de8f..169a9570 100644 --- a/tests/struct-vary-index-expr.ispc +++ b/tests/struct-vary-index-expr.ispc @@ -7,13 +7,14 @@ struct Foo { float f; }; export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - Foo foo[17]; - for (uniform int i = 0; i < 17; ++i) + Foo foo[programCount+1]; + uniform int ind[programCount+1]; + for (uniform int i = 0; i < programCount+1; ++i) { foo[i].f = a; + ind[i] = i+1; + } ++foo[a].f; - assert(programCount <= 16); - uniform int i[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 }; - RET[programIndex] = foo[i[programIndex]].f; + RET[programIndex] = foo[ind[programIndex]].f; } export void result(uniform float RET[]) { RET[programIndex] = 2+programIndex; } diff --git a/tests/switch-10.ispc b/tests/switch-10.ispc index abd94d7c..2957aee6 100644 --- a/tests/switch-10.ispc +++ b/tests/switch-10.ispc @@ -21,7 +21,7 @@ int switchit(int a, uniform int b) { } return -1234; } - case 32: + case 9999: *((int *)NULL) = 0; default: return 0; diff --git a/tests/switch-11.ispc b/tests/switch-11.ispc index daacdf76..c520f4f5 100644 --- a/tests/switch-11.ispc +++ b/tests/switch-11.ispc @@ -27,7 +27,7 @@ int switchit(int a, uniform int b) { } return 42; } - case 32: + case 9999: *((int *)NULL) = 0; default: return 0; diff --git a/tests/switch-12.ispc b/tests/switch-12.ispc index 9a803012..67e4d076 100644 --- a/tests/switch-12.ispc +++ b/tests/switch-12.ispc @@ -31,7 +31,7 @@ int switchit(int a, uniform int b) { } return 42; } - case 32: + case 9999: *((int *)NULL) = 0; default: return 0; diff --git a/tests/switch-8.ispc b/tests/switch-8.ispc index ca1848e8..24297e2f 100644 --- a/tests/switch-8.ispc +++ b/tests/switch-8.ispc @@ -12,7 +12,7 @@ int switchit(int a, uniform int b) { if (a & 1) break; return 2; - case 32: + case 9999: *((int *)NULL) = 0; default: case 1: diff --git a/tests/switch-9.ispc b/tests/switch-9.ispc index 9bfd0d03..3d3f8318 100644 --- a/tests/switch-9.ispc +++ b/tests/switch-9.ispc @@ -12,7 +12,7 @@ int switchit(int a, uniform int b) { if (a & 1) break; return 2; - case 32: + case 9999: *((int *)NULL) = 0; default: return 0; From 8fd9b84a800e682c12cdab4bf0cf09b3a3f552d9 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 30 May 2012 10:35:41 -0700 Subject: [PATCH 170/173] Update seed_rng() in stdlib to take a varying seed. Previously, we were trying to take a uniform seed and then shuffle that around to initialize the state for each of the program instances. This was becoming increasingly untenable and brittle. Now a varying seed is expected and used. --- docs/ispc.rst | 2 +- examples/aobench/ao.ispc | 2 +- examples/aobench_instrumented/ao.ispc | 2 +- stdlib.ispc | 59 +++------------------------ tests/rand-distrib-1.ispc | 2 +- tests/rand-distrib.ispc | 2 +- 6 files changed, 11 insertions(+), 58 deletions(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index a5ada9d9..5d26c93a 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3147,7 +3147,7 @@ library. State for the RNG is maintained in an instance of the :: struct RNGState; - void seed_rng(varying RNGState * uniform state, uniform int seed) + void seed_rng(varying RNGState * uniform state, int seed) void seed_rng(uniform RNGState * uniform state, uniform int seed) After the RNG is seeded, the ``random()`` function can be used to get a diff --git a/examples/aobench/ao.ispc b/examples/aobench/ao.ispc index 692dc367..e768c8c7 100644 --- a/examples/aobench/ao.ispc +++ b/examples/aobench/ao.ispc @@ -210,7 +210,7 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w, { { 1.0f, 0.0f, -2.2f }, 0.5f } }; RNGState rngstate; - seed_rng(&rngstate, y0); + seed_rng(&rngstate, programIndex + (y0 << (programIndex & 15))); float invSamples = 1.f / nsubsamples; foreach_tiled(y = y0 ... y1, x = 0 ... w, diff --git a/examples/aobench_instrumented/ao.ispc b/examples/aobench_instrumented/ao.ispc index 06d9b505..4fd9bedb 100644 --- a/examples/aobench_instrumented/ao.ispc +++ b/examples/aobench_instrumented/ao.ispc @@ -211,7 +211,7 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w, { { 1.0f, 0.0f, -2.2f }, 0.5f } }; RNGState rngstate; - seed_rng(&rngstate, y0); + seed_rng(&rngstate, programIndex + (y0 << (programIndex & 15))); // Compute the mapping between the 'programCount'-wide program // instances running in parallel and samples in the image. diff --git a/stdlib.ispc b/stdlib.ispc index 4cfcdea4..ea4c6b98 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -4026,60 +4026,13 @@ static inline uniform float frandom(uniform RNGState * uniform state) return floatbits(0x3F800000 | irand)-1.0f; } -static inline uniform unsigned int __seed4(varying RNGState * uniform state, - uniform int start, - uniform unsigned int seed) { - uniform unsigned int c1 = 0xf0f0f0f0; - uniform unsigned int c2 = 0x0f0f0f0f; - - state->z1 = insert(state->z1, start + 0, seed); - state->z1 = insert(state->z1, start + 1, seed ^ c1); - state->z1 = insert(state->z1, start + 2, (seed << 3) ^ c1); - state->z1 = insert(state->z1, start + 3, (seed << 2) ^ c2); - - seed += 131; - state->z2 = insert(state->z2, start + 0, seed); - state->z2 = insert(state->z2, start + 1, seed ^ c1); - state->z2 = insert(state->z2, start + 2, (seed << 3) ^ c1); - state->z2 = insert(state->z2, start + 3, (seed << 2) ^ c2); - - seed ^= extract(state->z2, 2); - state->z3 = insert(state->z3, start + 0, seed); - state->z3 = insert(state->z3, start + 1, seed ^ c1); - state->z3 = insert(state->z3, start + 2, (seed << 3) ^ c1); - state->z3 = insert(state->z3, start + 3, (seed << 2) ^ c2); - - seed <<= 4; - seed += 3; - seed ^= extract(state->z1, 3); - state->z4 = insert(state->z4, start + 0, seed); - state->z4 = insert(state->z4, start + 1, seed ^ c1); - state->z4 = insert(state->z4, start + 2, (seed << 3) ^ c1); - state->z4 = insert(state->z4, start + 3, (seed << 2) ^ c2); - - return seed; -} - static inline void seed_rng(varying RNGState * uniform state, - uniform unsigned int seed) { - if (programCount == 1) { - state->z1 = seed; - state->z2 = seed ^ 0xbeeff00d; - state->z3 = ((seed & 0xffff) << 16) | (seed >> 16); - state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) | - ((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24); - } - else { - seed = __seed4(state, 0, seed); - if (programCount == 8) - __seed4(state, 4, seed ^ 0xbeeff00d); - if (programCount == 16) { - __seed4(state, 4, seed ^ 0xbeeff00d); - __seed4(state, 8, ((seed & 0xffff) << 16) | (seed >> 16)); - __seed4(state, 12, (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) | - ((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24)); - } - } + unsigned int seed) { + state->z1 = seed; + state->z2 = seed ^ 0xbeeff00d; + state->z3 = ((seed & 0xffff) << 16) | (seed >> 16); + state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) | + ((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24); } static inline void seed_rng(uniform RNGState * uniform state, diff --git a/tests/rand-distrib-1.ispc b/tests/rand-distrib-1.ispc index a53cef12..3a23a917 100644 --- a/tests/rand-distrib-1.ispc +++ b/tests/rand-distrib-1.ispc @@ -3,7 +3,7 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { RNGState state; - seed_rng(&state, 1); + seed_rng(&state, programIndex); int count[32]; for (uniform int i = 0; i < 32; ++i) count[i] = (b == 5.) ? 0 : 1; diff --git a/tests/rand-distrib.ispc b/tests/rand-distrib.ispc index edf24f2b..393ec063 100644 --- a/tests/rand-distrib.ispc +++ b/tests/rand-distrib.ispc @@ -3,7 +3,7 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { RNGState state; - seed_rng(&state, 1); + seed_rng(&state, programIndex); float sum = 0; uniform int iters = 40000; for (unsigned int i = 0; i < iters; ++i) From fe8b109ca5d4fc8c4bb4fd2bccb188bd14d08431 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 30 May 2012 13:06:07 -0700 Subject: [PATCH 171/173] Fix more tests for 32 and 64-wide execution. --- tests/coalesce-1.ispc | 3 ++- tests/count-leading-trailing-zeros-4.ispc | 4 ++-- tests/count-leading-trailing-zeros-5.ispc | 4 ++-- tests/transcendentals-0-2.ispc | 2 +- tests/transcendentals-1-3.ispc | 2 +- tests/unif-struct-test-114.ispc | 5 +++-- 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/coalesce-1.ispc b/tests/coalesce-1.ispc index 205f8d05..acfe8cdf 100644 --- a/tests/coalesce-1.ispc +++ b/tests/coalesce-1.ispc @@ -5,7 +5,8 @@ export void f_f(uniform float RET[], uniform float aFOO[]) { uniform float * uniform buf = uniform new uniform float[32*32]; for (uniform int i = 0; i < 32*32; ++i) buf[i] = i; - + + assert(programIndex <= 64); RET[programIndex] = buf[64-programIndex]; } diff --git a/tests/count-leading-trailing-zeros-4.ispc b/tests/count-leading-trailing-zeros-4.ispc index 5cef2b7a..475c18ca 100644 --- a/tests/count-leading-trailing-zeros-4.ispc +++ b/tests/count-leading-trailing-zeros-4.ispc @@ -3,10 +3,10 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - int32 i = (1 << programIndex); + int32 i = (1 << (programIndex % 28)); RET[programIndex] = count_leading_zeros(i); } export void result(uniform float RET[]) { - RET[programIndex] = 31-programIndex; + RET[programIndex] = 31-(programIndex%28); } diff --git a/tests/count-leading-trailing-zeros-5.ispc b/tests/count-leading-trailing-zeros-5.ispc index d68dc5e4..2fe8161e 100644 --- a/tests/count-leading-trailing-zeros-5.ispc +++ b/tests/count-leading-trailing-zeros-5.ispc @@ -3,10 +3,10 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - unsigned int64 i = ((unsigned int64)1 << (40+programIndex)); + unsigned int64 i = ((unsigned int64)1 << min(63, 40+programIndex)); RET[programIndex] = count_trailing_zeros(i); } export void result(uniform float RET[]) { - RET[programIndex] = 40+programIndex; + RET[programIndex] = min(63, 40+programIndex); } diff --git a/tests/transcendentals-0-2.ispc b/tests/transcendentals-0-2.ispc index 43ebca6a..dad39715 100644 --- a/tests/transcendentals-0-2.ispc +++ b/tests/transcendentals-0-2.ispc @@ -14,7 +14,7 @@ export uniform int width() { return programCount; } bool ok(float x, float ref) { - bool r = (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-5; + bool r = (abs(x - ref) < 1e-4) || abs((x-ref)/ref) < 1e-4; if (any(r == false)) print("mismatch got %, expected %\n", x, ref); return r; diff --git a/tests/transcendentals-1-3.ispc b/tests/transcendentals-1-3.ispc index f9cdc5fe..a4167e89 100644 --- a/tests/transcendentals-1-3.ispc +++ b/tests/transcendentals-1-3.ispc @@ -14,7 +14,7 @@ export uniform int width() { return programCount; } bool ok(float x, float ref) { - bool r = (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-5; + bool r = (abs(x - ref) < 1e-5) || abs((x-ref)/ref) < 1e-5; if (any(r == false)) print("mismatch got %, expected %\n", x, ref); return r; diff --git a/tests/unif-struct-test-114.ispc b/tests/unif-struct-test-114.ispc index d5c2e08a..dbee219c 100644 --- a/tests/unif-struct-test-114.ispc +++ b/tests/unif-struct-test-114.ispc @@ -8,12 +8,13 @@ struct Foo { }; export void f_fi(uniform float RET[], uniform float a[], uniform int bFOO[]) { int b = bFOO[programIndex]; - uniform struct Foo myFoo[17]; + uniform struct Foo myFoo[256]; uniform int i; - for (i = 0; i < 17; ++i) { + for (i = 0; i < 256; ++i) { myFoo[i].x = i; myFoo[i].f = 17+2*i; } + assert(b/2 < 256); RET[programIndex] = myFoo[b/2].f; } From ef049e92ef4d24ba90df50977d5ea39a6e3406c0 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 30 May 2012 16:28:21 -0700 Subject: [PATCH 172/173] Handle undefined struct types when generating headers. --- module.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/module.cpp b/module.cpp index d16916be..f69fa0f7 100644 --- a/module.cpp +++ b/module.cpp @@ -1136,6 +1136,9 @@ lGetExportedTypes(const Type *type, lGetExportedTypes(structType->GetElementType(i), exportedStructTypes, exportedEnumTypes, exportedVectorTypes); } + else if (CastType(type) != NULL) + // do nothing + ; else if (CastType(type) != NULL) lAddTypeIfNew(type, exportedEnumTypes); else if (CastType(type) != NULL) From 6df7d31a5b8fdeaa9ae5effb885594a5fa300fd4 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 30 May 2012 16:34:59 -0700 Subject: [PATCH 173/173] Fix incorrect assertion. Issue #272. --- expr.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/expr.cpp b/expr.cpp index 3eaaa96f..7c270f46 100644 --- a/expr.cpp +++ b/expr.cpp @@ -6415,9 +6415,10 @@ lUniformValueToVarying(FunctionEmitContext *ctx, llvm::Value *value, return retValue; } - // Otherwise we must have a uniform AtomicType, so smear its value - // across the vector lanes. - Assert(CastType(type) != NULL); + // Otherwise we must have a uniform atomic or pointer type, so smear + // its value across the vector lanes. + Assert(CastType(type) != NULL || + CastType(type) != NULL); return ctx->SmearUniform(value); }