Add __foreach_active statement to loop over active prog. instances.
For now this has the __ prefix, as an experimental feature currently only used in the standard library implementation. It's probably worth making something along these lines an official feature, but I'm not sure if this in its current form is quite the right thing.
This commit is contained in:
3
lex.ll
3
lex.ll
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2010-2011, Intel Corporation
|
Copyright (c) 2010-2012, Intel Corporation
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@@ -358,6 +358,7 @@ extern { RT; return TOKEN_EXTERN; }
|
|||||||
false { RT; return TOKEN_FALSE; }
|
false { RT; return TOKEN_FALSE; }
|
||||||
float { RT; return TOKEN_FLOAT; }
|
float { RT; return TOKEN_FLOAT; }
|
||||||
for { RT; return TOKEN_FOR; }
|
for { RT; return TOKEN_FOR; }
|
||||||
|
__foreach_active { RT; return TOKEN_FOREACH_ACTIVE; }
|
||||||
foreach { RT; return TOKEN_FOREACH; }
|
foreach { RT; return TOKEN_FOREACH; }
|
||||||
foreach_tiled { RT; return TOKEN_FOREACH_TILED; }
|
foreach_tiled { RT; return TOKEN_FOREACH_TILED; }
|
||||||
goto { RT; return TOKEN_GOTO; }
|
goto { RT; return TOKEN_GOTO; }
|
||||||
|
|||||||
26
parse.yy
26
parse.yy
@@ -188,7 +188,8 @@ struct ForeachDimension {
|
|||||||
%token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE
|
%token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE
|
||||||
|
|
||||||
%token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH
|
%token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH
|
||||||
%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH TOKEN_FOREACH TOKEN_FOREACH_TILED TOKEN_DOTDOTDOT
|
%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH TOKEN_FOREACH TOKEN_FOREACH_TILED
|
||||||
|
%token TOKEN_FOREACH_ACTIVE TOKEN_DOTDOTDOT
|
||||||
%token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN
|
%token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN
|
||||||
%token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK
|
%token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK
|
||||||
%token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT
|
%token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT
|
||||||
@@ -220,7 +221,7 @@ struct ForeachDimension {
|
|||||||
%type <structDeclarationList> struct_declaration_list
|
%type <structDeclarationList> struct_declaration_list
|
||||||
|
|
||||||
%type <symbolList> enumerator_list
|
%type <symbolList> enumerator_list
|
||||||
%type <symbol> enumerator foreach_identifier
|
%type <symbol> enumerator foreach_identifier foreach_active_identifier
|
||||||
%type <enumType> enum_specifier
|
%type <enumType> enum_specifier
|
||||||
|
|
||||||
%type <type> specifier_qualifier_list struct_or_union_specifier
|
%type <type> specifier_qualifier_list struct_or_union_specifier
|
||||||
@@ -1550,6 +1551,17 @@ foreach_identifier
|
|||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
foreach_active_scope
|
||||||
|
: TOKEN_FOREACH_ACTIVE { m->symbolTable->PushScope(); }
|
||||||
|
;
|
||||||
|
|
||||||
|
foreach_active_identifier
|
||||||
|
: TOKEN_IDENTIFIER
|
||||||
|
{
|
||||||
|
$$ = new Symbol(yytext, @1, AtomicType::UniformInt32);
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
foreach_dimension_specifier
|
foreach_dimension_specifier
|
||||||
: foreach_identifier '=' assignment_expression TOKEN_DOTDOTDOT assignment_expression
|
: foreach_identifier '=' assignment_expression TOKEN_DOTDOTDOT assignment_expression
|
||||||
{
|
{
|
||||||
@@ -1658,6 +1670,16 @@ iteration_statement
|
|||||||
$$ = new ForeachStmt(syms, begins, ends, $6, true, @1);
|
$$ = new ForeachStmt(syms, begins, ends, $6, true, @1);
|
||||||
m->symbolTable->PopScope();
|
m->symbolTable->PopScope();
|
||||||
}
|
}
|
||||||
|
| foreach_active_scope '(' foreach_active_identifier ')'
|
||||||
|
{
|
||||||
|
if ($3 != NULL)
|
||||||
|
m->symbolTable->AddVariable($3);
|
||||||
|
}
|
||||||
|
statement
|
||||||
|
{
|
||||||
|
$$ = CreateForeachActiveStmt($3, $6, Union(@1, @4));
|
||||||
|
m->symbolTable->PopScope();
|
||||||
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
goto_identifier
|
goto_identifier
|
||||||
|
|||||||
185
stdlib.ispc
185
stdlib.ispc
@@ -356,10 +356,7 @@ static inline void memcpy(void * varying dst, void * varying src,
|
|||||||
da[programIndex] = dst;
|
da[programIndex] = dst;
|
||||||
sa[programIndex] = src;
|
sa[programIndex] = src;
|
||||||
|
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
void * uniform d = da[i], * uniform s = sa[i];
|
void * uniform d = da[i], * uniform s = sa[i];
|
||||||
__memcpy32((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
__memcpy32((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||||
}
|
}
|
||||||
@@ -373,10 +370,7 @@ static inline void memcpy64(void * varying dst, void * varying src,
|
|||||||
da[programIndex] = dst;
|
da[programIndex] = dst;
|
||||||
sa[programIndex] = src;
|
sa[programIndex] = src;
|
||||||
|
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
void * uniform d = da[i], * uniform s = sa[i];
|
void * uniform d = da[i], * uniform s = sa[i];
|
||||||
__memcpy64((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
__memcpy64((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||||
}
|
}
|
||||||
@@ -400,10 +394,7 @@ static inline void memmove(void * varying dst, void * varying src,
|
|||||||
da[programIndex] = dst;
|
da[programIndex] = dst;
|
||||||
sa[programIndex] = src;
|
sa[programIndex] = src;
|
||||||
|
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
void * uniform d = da[i], * uniform s = sa[i];
|
void * uniform d = da[i], * uniform s = sa[i];
|
||||||
__memmove32((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
__memmove32((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||||
}
|
}
|
||||||
@@ -417,10 +408,7 @@ static inline void memmove64(void * varying dst, void * varying src,
|
|||||||
da[programIndex] = dst;
|
da[programIndex] = dst;
|
||||||
sa[programIndex] = src;
|
sa[programIndex] = src;
|
||||||
|
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
void * uniform d = da[i], * uniform s = sa[i];
|
void * uniform d = da[i], * uniform s = sa[i];
|
||||||
__memmove64((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
__memmove64((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||||
}
|
}
|
||||||
@@ -440,10 +428,7 @@ static inline void memset(void * varying ptr, int8 val, int32 count) {
|
|||||||
void * uniform pa[programCount];
|
void * uniform pa[programCount];
|
||||||
pa[programIndex] = ptr;
|
pa[programIndex] = ptr;
|
||||||
|
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
__memset32((int8 * uniform)pa[i], extract(val, i), extract(count, i));
|
__memset32((int8 * uniform)pa[i], extract(val, i), extract(count, i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -452,10 +437,7 @@ static inline void memset64(void * varying ptr, int8 val, int64 count) {
|
|||||||
void * uniform pa[programCount];
|
void * uniform pa[programCount];
|
||||||
pa[programIndex] = ptr;
|
pa[programIndex] = ptr;
|
||||||
|
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
__memset64((int8 * uniform)pa[i], extract(val, i), extract(count, i));
|
__memset64((int8 * uniform)pa[i], extract(val, i), extract(count, i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -644,10 +626,7 @@ static inline void prefetch_l1(const void * varying ptr) {
|
|||||||
const void * uniform ptrArray[programCount];
|
const void * uniform ptrArray[programCount];
|
||||||
ptrArray[programIndex] = ptr;
|
ptrArray[programIndex] = ptr;
|
||||||
|
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
const void * uniform p = ptrArray[i];
|
const void * uniform p = ptrArray[i];
|
||||||
prefetch_l1(p);
|
prefetch_l1(p);
|
||||||
}
|
}
|
||||||
@@ -657,10 +636,7 @@ static inline void prefetch_l2(const void * varying ptr) {
|
|||||||
const void * uniform ptrArray[programCount];
|
const void * uniform ptrArray[programCount];
|
||||||
ptrArray[programIndex] = ptr;
|
ptrArray[programIndex] = ptr;
|
||||||
|
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
const void * uniform p = ptrArray[i];
|
const void * uniform p = ptrArray[i];
|
||||||
prefetch_l2(p);
|
prefetch_l2(p);
|
||||||
}
|
}
|
||||||
@@ -670,10 +646,7 @@ static inline void prefetch_l3(const void * varying ptr) {
|
|||||||
const void * uniform ptrArray[programCount];
|
const void * uniform ptrArray[programCount];
|
||||||
ptrArray[programIndex] = ptr;
|
ptrArray[programIndex] = ptr;
|
||||||
|
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
const void * uniform p = ptrArray[i];
|
const void * uniform p = ptrArray[i];
|
||||||
prefetch_l3(p);
|
prefetch_l3(p);
|
||||||
}
|
}
|
||||||
@@ -683,10 +656,7 @@ static inline void prefetch_nt(const void * varying ptr) {
|
|||||||
const void * uniform ptrArray[programCount];
|
const void * uniform ptrArray[programCount];
|
||||||
ptrArray[programIndex] = ptr;
|
ptrArray[programIndex] = ptr;
|
||||||
|
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
const void * uniform p = ptrArray[i];
|
const void * uniform p = ptrArray[i];
|
||||||
prefetch_nt(p);
|
prefetch_nt(p);
|
||||||
}
|
}
|
||||||
@@ -1332,10 +1302,7 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
|||||||
ptrArray[programIndex] = ptr; \
|
ptrArray[programIndex] = ptr; \
|
||||||
memory_barrier(); \
|
memory_barrier(); \
|
||||||
TA ret; \
|
TA ret; \
|
||||||
uniform int mask = lanemask(); \
|
__foreach_active (i) { \
|
||||||
for (uniform int i = 0; i < programCount; ++i) { \
|
|
||||||
if ((mask & (1 << i)) == 0) \
|
|
||||||
continue; \
|
|
||||||
uniform TA * uniform p = ptrArray[i]; \
|
uniform TA * uniform p = ptrArray[i]; \
|
||||||
uniform TA v = extract(value, i); \
|
uniform TA v = extract(value, i); \
|
||||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||||
@@ -1392,10 +1359,7 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
|||||||
ptrArray[programIndex] = ptr; \
|
ptrArray[programIndex] = ptr; \
|
||||||
memory_barrier(); \
|
memory_barrier(); \
|
||||||
TA ret; \
|
TA ret; \
|
||||||
uniform int mask = lanemask(); \
|
__foreach_active (i) { \
|
||||||
for (uniform int i = 0; i < programCount; ++i) { \
|
|
||||||
if ((mask & (1 << i)) == 0) \
|
|
||||||
continue; \
|
|
||||||
uniform TA * uniform p = ptrArray[i]; \
|
uniform TA * uniform p = ptrArray[i]; \
|
||||||
uniform TA v = extract(value, i); \
|
uniform TA v = extract(value, i); \
|
||||||
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
|
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
|
||||||
@@ -1429,10 +1393,7 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
|||||||
ptrArray[programIndex] = ptr; \
|
ptrArray[programIndex] = ptr; \
|
||||||
memory_barrier(); \
|
memory_barrier(); \
|
||||||
TA ret; \
|
TA ret; \
|
||||||
uniform int mask = lanemask(); \
|
__foreach_active (i) { \
|
||||||
for (uniform int i = 0; i < programCount; ++i) { \
|
|
||||||
if ((mask & (1 << i)) == 0) \
|
|
||||||
continue; \
|
|
||||||
uniform TA * uniform p = ptrArray[i]; \
|
uniform TA * uniform p = ptrArray[i]; \
|
||||||
uniform TA v = extract(value, i); \
|
uniform TA v = extract(value, i); \
|
||||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||||
@@ -1513,10 +1474,7 @@ static inline TA atomic_compare_exchange_global( \
|
|||||||
ptrArray[programIndex] = ptr; \
|
ptrArray[programIndex] = ptr; \
|
||||||
memory_barrier(); \
|
memory_barrier(); \
|
||||||
TA ret; \
|
TA ret; \
|
||||||
uniform int mask = lanemask(); \
|
__foreach_active (i) { \
|
||||||
for (uniform int i = 0; i < programCount; ++i) { \
|
|
||||||
if ((mask & (1 << i)) == 0) \
|
|
||||||
continue; \
|
|
||||||
uniform TA r = \
|
uniform TA r = \
|
||||||
__atomic_compare_exchange_uniform_##TB##_global(ptrArray[i], \
|
__atomic_compare_exchange_uniform_##TB##_global(ptrArray[i], \
|
||||||
extract(oldval, i), \
|
extract(oldval, i), \
|
||||||
@@ -1548,10 +1506,7 @@ static inline uniform TYPE atomic_##NAME##_local(uniform TYPE * uniform ptr, \
|
|||||||
} \
|
} \
|
||||||
static inline TYPE atomic_##NAME##_local(uniform TYPE * uniform ptr, TYPE value) { \
|
static inline TYPE atomic_##NAME##_local(uniform TYPE * uniform ptr, TYPE value) { \
|
||||||
TYPE ret; \
|
TYPE ret; \
|
||||||
uniform int mask = lanemask(); \
|
__foreach_active (i) { \
|
||||||
for (uniform int i = 0; i < programCount; ++i) { \
|
|
||||||
if ((mask & (1 << i)) == 0) \
|
|
||||||
continue; \
|
|
||||||
ret = insert(ret, i, *ptr); \
|
ret = insert(ret, i, *ptr); \
|
||||||
*ptr = OPFUNC(*ptr, extract(value, i)); \
|
*ptr = OPFUNC(*ptr, extract(value, i)); \
|
||||||
} \
|
} \
|
||||||
@@ -1561,10 +1516,7 @@ static inline TYPE atomic_##NAME##_local(uniform TYPE * p, TYPE value) { \
|
|||||||
TYPE ret; \
|
TYPE ret; \
|
||||||
uniform TYPE * uniform ptrs[programCount]; \
|
uniform TYPE * uniform ptrs[programCount]; \
|
||||||
ptrs[programIndex] = p; \
|
ptrs[programIndex] = p; \
|
||||||
uniform int mask = lanemask(); \
|
__foreach_active (i) { \
|
||||||
for (uniform int i = 0; i < programCount; ++i) { \
|
|
||||||
if ((mask & (1 << i)) == 0) \
|
|
||||||
continue; \
|
|
||||||
ret = insert(ret, i, *ptrs[i]); \
|
ret = insert(ret, i, *ptrs[i]); \
|
||||||
*ptrs[i] = OPFUNC(*ptrs[i], extract(value, i)); \
|
*ptrs[i] = OPFUNC(*ptrs[i], extract(value, i)); \
|
||||||
} \
|
} \
|
||||||
@@ -1681,10 +1633,7 @@ static inline uniform TYPE atomic_compare_exchange_local(uniform TYPE * uniform
|
|||||||
static inline TYPE atomic_compare_exchange_local(uniform TYPE * uniform ptr, \
|
static inline TYPE atomic_compare_exchange_local(uniform TYPE * uniform ptr, \
|
||||||
TYPE cmp, TYPE update) { \
|
TYPE cmp, TYPE update) { \
|
||||||
TYPE ret; \
|
TYPE ret; \
|
||||||
uniform int mask = lanemask(); \
|
__foreach_active (i) { \
|
||||||
for (uniform int i = 0; i < programCount; ++i) { \
|
|
||||||
if ((mask & (1 << i)) == 0) \
|
|
||||||
continue; \
|
|
||||||
uniform TYPE old = *ptr; \
|
uniform TYPE old = *ptr; \
|
||||||
if (old == extract(cmp, i)) \
|
if (old == extract(cmp, i)) \
|
||||||
*ptr = extract(update, i); \
|
*ptr = extract(update, i); \
|
||||||
@@ -1697,10 +1646,7 @@ static inline TYPE atomic_compare_exchange_local(uniform TYPE * varying p, \
|
|||||||
uniform TYPE * uniform ptrs[programCount]; \
|
uniform TYPE * uniform ptrs[programCount]; \
|
||||||
ptrs[programIndex] = p; \
|
ptrs[programIndex] = p; \
|
||||||
TYPE ret; \
|
TYPE ret; \
|
||||||
uniform int mask = lanemask(); \
|
__foreach_active (i) { \
|
||||||
for (uniform int i = 0; i < programCount; ++i) { \
|
|
||||||
if ((mask & (1 << i)) == 0) \
|
|
||||||
continue; \
|
|
||||||
uniform TYPE old = *ptrs[i]; \
|
uniform TYPE old = *ptrs[i]; \
|
||||||
if (old == extract(cmp, i)) \
|
if (old == extract(cmp, i)) \
|
||||||
*ptrs[i] = extract(update, i); \
|
*ptrs[i] = extract(update, i); \
|
||||||
@@ -1787,10 +1733,7 @@ static inline float sin(float x_full) {
|
|||||||
}
|
}
|
||||||
else if (__math_lib == __math_lib_system) {
|
else if (__math_lib == __math_lib_system) {
|
||||||
float ret;
|
float ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform float r = __stdlib_sinf(extract(x_full, i));
|
uniform float r = __stdlib_sinf(extract(x_full, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -1920,10 +1863,7 @@ static inline float asin(float x) {
|
|||||||
if (__math_lib == __math_lib_svml ||
|
if (__math_lib == __math_lib_svml ||
|
||||||
__math_lib == __math_lib_system) {
|
__math_lib == __math_lib_system) {
|
||||||
float ret;
|
float ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform float r = __stdlib_asinf(extract(x, i));
|
uniform float r = __stdlib_asinf(extract(x, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -2026,10 +1966,7 @@ static inline float cos(float x_full) {
|
|||||||
}
|
}
|
||||||
else if (__math_lib == __math_lib_system) {
|
else if (__math_lib == __math_lib_system) {
|
||||||
float ret;
|
float ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform float r = __stdlib_cosf(extract(x_full, i));
|
uniform float r = __stdlib_cosf(extract(x_full, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -2163,10 +2100,7 @@ static inline void sincos(float x_full, varying float * uniform sin_result,
|
|||||||
__svml_sincos(x_full, sin_result, cos_result);
|
__svml_sincos(x_full, sin_result, cos_result);
|
||||||
}
|
}
|
||||||
else if (__math_lib == __math_lib_system) {
|
else if (__math_lib == __math_lib_system) {
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform float s, c;
|
uniform float s, c;
|
||||||
__stdlib_sincosf(extract(x_full, i), &s, &c);
|
__stdlib_sincosf(extract(x_full, i), &s, &c);
|
||||||
*sin_result = insert(*sin_result, i, s);
|
*sin_result = insert(*sin_result, i, s);
|
||||||
@@ -2297,10 +2231,7 @@ static inline float tan(float x_full) {
|
|||||||
}
|
}
|
||||||
else if (__math_lib == __math_lib_system) {
|
else if (__math_lib == __math_lib_system) {
|
||||||
float ret;
|
float ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform float r = __stdlib_tanf(extract(x_full, i));
|
uniform float r = __stdlib_tanf(extract(x_full, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -2449,10 +2380,7 @@ static inline float atan(float x_full) {
|
|||||||
}
|
}
|
||||||
else if (__math_lib == __math_lib_system) {
|
else if (__math_lib == __math_lib_system) {
|
||||||
float ret;
|
float ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform float r = __stdlib_atanf(extract(x_full, i));
|
uniform float r = __stdlib_atanf(extract(x_full, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -2545,10 +2473,7 @@ static inline float atan2(float y, float x) {
|
|||||||
}
|
}
|
||||||
else if (__math_lib == __math_lib_system) {
|
else if (__math_lib == __math_lib_system) {
|
||||||
float ret;
|
float ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform float r = __stdlib_atan2f(extract(y, i), extract(x, i));
|
uniform float r = __stdlib_atan2f(extract(y, i), extract(x, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -2606,10 +2531,7 @@ static inline float exp(float x_full) {
|
|||||||
}
|
}
|
||||||
else if (__math_lib == __math_lib_system) {
|
else if (__math_lib == __math_lib_system) {
|
||||||
float ret;
|
float ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform float r = __stdlib_expf(extract(x_full, i));
|
uniform float r = __stdlib_expf(extract(x_full, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -2806,10 +2728,7 @@ static inline float log(float x_full) {
|
|||||||
}
|
}
|
||||||
else if (__math_lib == __math_lib_system) {
|
else if (__math_lib == __math_lib_system) {
|
||||||
float ret;
|
float ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform float r = __stdlib_logf(extract(x_full, i));
|
uniform float r = __stdlib_logf(extract(x_full, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -2976,10 +2895,7 @@ static inline float pow(float a, float b) {
|
|||||||
}
|
}
|
||||||
else if (__math_lib == __math_lib_system) {
|
else if (__math_lib == __math_lib_system) {
|
||||||
float ret;
|
float ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform float r = __stdlib_powf(extract(a, i), extract(b, i));
|
uniform float r = __stdlib_powf(extract(a, i), extract(b, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -3058,10 +2974,7 @@ static inline double sin(double x) {
|
|||||||
return sin((float)x);
|
return sin((float)x);
|
||||||
else {
|
else {
|
||||||
double ret;
|
double ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform double r = __stdlib_sin(extract(x, i));
|
uniform double r = __stdlib_sin(extract(x, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -3081,10 +2994,7 @@ static inline double cos(double x) {
|
|||||||
return cos((float)x);
|
return cos((float)x);
|
||||||
else {
|
else {
|
||||||
double ret;
|
double ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform double r = __stdlib_cos(extract(x, i));
|
uniform double r = __stdlib_cos(extract(x, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -3108,11 +3018,8 @@ static inline void sincos(double x, varying double * uniform sin_result,
|
|||||||
*cos_result = cr;
|
*cos_result = cr;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
uniform double sr, cr;
|
uniform double sr, cr;
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
__stdlib_sincos(extract(x, i), &sr, &cr);
|
__stdlib_sincos(extract(x, i), &sr, &cr);
|
||||||
*sin_result = insert(*sin_result, i, sr);
|
*sin_result = insert(*sin_result, i, sr);
|
||||||
*cos_result = insert(*cos_result, i, cr);
|
*cos_result = insert(*cos_result, i, cr);
|
||||||
@@ -3137,10 +3044,7 @@ static inline double tan(double x) {
|
|||||||
return tan((float)x);
|
return tan((float)x);
|
||||||
else {
|
else {
|
||||||
double ret;
|
double ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform double r = __stdlib_tan(extract(x, i));
|
uniform double r = __stdlib_tan(extract(x, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -3160,10 +3064,7 @@ static inline double atan(double x) {
|
|||||||
return atan((float)x);
|
return atan((float)x);
|
||||||
else {
|
else {
|
||||||
double ret;
|
double ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform double r = __stdlib_atan(extract(x, i));
|
uniform double r = __stdlib_atan(extract(x, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -3183,10 +3084,7 @@ static inline double atan2(double y, double x) {
|
|||||||
return atan2((float)y, (float)x);
|
return atan2((float)y, (float)x);
|
||||||
else {
|
else {
|
||||||
double ret;
|
double ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform double r = __stdlib_atan2(extract(y, i), extract(x, i));
|
uniform double r = __stdlib_atan2(extract(y, i), extract(x, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -3206,10 +3104,7 @@ static inline double exp(double x) {
|
|||||||
return exp((float)x);
|
return exp((float)x);
|
||||||
else {
|
else {
|
||||||
double ret;
|
double ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform double r = __stdlib_exp(extract(x, i));
|
uniform double r = __stdlib_exp(extract(x, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -3229,10 +3124,7 @@ static inline double log(double x) {
|
|||||||
return log((float)x);
|
return log((float)x);
|
||||||
else {
|
else {
|
||||||
double ret;
|
double ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform double r = __stdlib_log(extract(x, i));
|
uniform double r = __stdlib_log(extract(x, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
@@ -3252,10 +3144,7 @@ static inline double pow(double a, double b) {
|
|||||||
return pow((float)a, (float)b);
|
return pow((float)a, (float)b);
|
||||||
else {
|
else {
|
||||||
double ret;
|
double ret;
|
||||||
uniform int mask = lanemask();
|
__foreach_active (i) {
|
||||||
for (uniform int i = 0; i < programCount; ++i) {
|
|
||||||
if ((mask & (1 << i)) == 0)
|
|
||||||
continue;
|
|
||||||
uniform double r = __stdlib_pow(extract(a, i), extract(b, i));
|
uniform double r = __stdlib_pow(extract(a, i), extract(b, i));
|
||||||
ret = insert(ret, i, r);
|
ret = insert(ret, i, r);
|
||||||
}
|
}
|
||||||
|
|||||||
78
stmt.cpp
78
stmt.cpp
@@ -2721,3 +2721,81 @@ int
|
|||||||
DeleteStmt::EstimateCost() const {
|
DeleteStmt::EstimateCost() const {
|
||||||
return COST_DELETE;
|
return COST_DELETE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
/** This generates AST nodes for an __foreach_active statement. This
|
||||||
|
construct can be synthesized ouf of the existing ForStmt (and other AST
|
||||||
|
nodes), so here we just build up the AST that we need rather than
|
||||||
|
having a new Stmt implementation for __foreach_active.
|
||||||
|
|
||||||
|
@param iterSym Symbol for the iteration variable (e.g. "i" in
|
||||||
|
__foreach_active (i) { .. .}
|
||||||
|
@param stmts Statements to execute each time through the loop, for
|
||||||
|
each active program instance.
|
||||||
|
@param pos Position of the __foreach_active statement in the source
|
||||||
|
file.
|
||||||
|
*/
|
||||||
|
Stmt *
|
||||||
|
CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) {
|
||||||
|
if (iterSym == NULL) {
|
||||||
|
Assert(m->errorCount > 0);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// loop initializer: set iter = 0
|
||||||
|
std::vector<VariableDeclaration> var;
|
||||||
|
ConstExpr *zeroExpr = new ConstExpr(AtomicType::UniformInt32, 0,
|
||||||
|
iterSym->pos);
|
||||||
|
var.push_back(VariableDeclaration(iterSym, zeroExpr));
|
||||||
|
Stmt *initStmt = new DeclStmt(var, iterSym->pos);
|
||||||
|
|
||||||
|
// loop test: (iter < programCount)
|
||||||
|
ConstExpr *progCountExpr =
|
||||||
|
new ConstExpr(AtomicType::UniformInt32, g->target.vectorWidth,
|
||||||
|
pos);
|
||||||
|
SymbolExpr *symExpr = new SymbolExpr(iterSym, iterSym->pos);
|
||||||
|
Expr *testExpr = new BinaryExpr(BinaryExpr::Lt, symExpr, progCountExpr,
|
||||||
|
pos);
|
||||||
|
|
||||||
|
// loop step: ++iterSym
|
||||||
|
UnaryExpr *incExpr = new UnaryExpr(UnaryExpr::PreInc, symExpr, pos);
|
||||||
|
Stmt *stepStmt = new ExprStmt(incExpr, pos);
|
||||||
|
|
||||||
|
// loop body
|
||||||
|
// First, call __movmsk(__mask)) to get the mask as a set of bits.
|
||||||
|
// This should be hoisted out of the loop
|
||||||
|
Symbol *maskSym = m->symbolTable->LookupVariable("__mask");
|
||||||
|
Assert(maskSym != NULL);
|
||||||
|
Expr *maskVecExpr = new SymbolExpr(maskSym, pos);
|
||||||
|
std::vector<Symbol *> mmFuns;
|
||||||
|
m->symbolTable->LookupFunction("__movmsk", &mmFuns);
|
||||||
|
Assert(mmFuns.size() == 2);
|
||||||
|
FunctionSymbolExpr *movmskFunc = new FunctionSymbolExpr("__movmsk", mmFuns,
|
||||||
|
pos);
|
||||||
|
ExprList *movmskArgs = new ExprList(maskVecExpr, pos);
|
||||||
|
FunctionCallExpr *movmskExpr = new FunctionCallExpr(movmskFunc, movmskArgs,
|
||||||
|
pos);
|
||||||
|
|
||||||
|
// Compute the per lane mask to test the mask bits against: (1 << iter)
|
||||||
|
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, 1,
|
||||||
|
iterSym->pos);
|
||||||
|
Expr *shiftLaneExpr = new BinaryExpr(BinaryExpr::Shl, oneExpr, symExpr,
|
||||||
|
pos);
|
||||||
|
|
||||||
|
// Compute the AND: movmsk & (1 << iter)
|
||||||
|
Expr *maskAndLaneExpr = new BinaryExpr(BinaryExpr::BitAnd, movmskExpr,
|
||||||
|
shiftLaneExpr, pos);
|
||||||
|
// Test to see if it's non-zero: (mask & (1 << iter)) != 0
|
||||||
|
Expr *ifTestExpr = new BinaryExpr(BinaryExpr::NotEqual, maskAndLaneExpr,
|
||||||
|
zeroExpr, pos);
|
||||||
|
|
||||||
|
// Now, enclose the provided statements in an if test such that they
|
||||||
|
// only run if the mask is non-zero for the lane we're currently
|
||||||
|
// handling in the loop.
|
||||||
|
IfStmt *laneCheckIf = new IfStmt(ifTestExpr, stmts, NULL, false, pos);
|
||||||
|
|
||||||
|
// And return a for loop that wires it all together.
|
||||||
|
return new ForStmt(initStmt, testExpr, stepStmt, laneCheckIf, false, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user