Added shuffle() variant to the standard library that takes two
varying values and a permutation index that spans the concatenation of the two of them (along the lines of SHUFPS...)
This commit is contained in:
25
builtins.cpp
25
builtins.cpp
@@ -176,30 +176,6 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Declare the function symbol 'bool __is_compile_time_constant_mask(mask type)'.
|
|
||||||
This function will never be defined; it's just a placeholder
|
|
||||||
that will be handled during the optimization process. See the
|
|
||||||
discussion of the implementation of CompileTimeConstantResolvePass for
|
|
||||||
more details.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
lDeclareCompileTimeConstant(llvm::Module *module) {
|
|
||||||
SourcePos noPos;
|
|
||||||
noPos.name = "__stdlib";
|
|
||||||
|
|
||||||
std::vector<const llvm::Type *> argTypes;
|
|
||||||
argTypes.push_back(LLVMTypes::MaskType);
|
|
||||||
|
|
||||||
llvm::FunctionType *fType =
|
|
||||||
llvm::FunctionType::get(LLVMTypes::BoolType, argTypes, false);
|
|
||||||
llvm::Function *func =
|
|
||||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
|
||||||
"__is_compile_time_constant_mask", module);
|
|
||||||
func->setOnlyReadsMemory(true);
|
|
||||||
func->setDoesNotThrow(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** Declare the 'pseudo-gather' functions. When the ispc front-end needs
|
/** Declare the 'pseudo-gather' functions. When the ispc front-end needs
|
||||||
to perform a gather, it generates a call to one of these functions,
|
to perform a gather, it generates a call to one of these functions,
|
||||||
which have signatures:
|
which have signatures:
|
||||||
@@ -583,7 +559,6 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
|
|
||||||
// Declare various placeholder functions that the optimizer will later
|
// Declare various placeholder functions that the optimizer will later
|
||||||
// find and replace with something more useful.
|
// find and replace with something more useful.
|
||||||
lDeclareCompileTimeConstant(module);
|
|
||||||
lDeclarePseudoGathers(module);
|
lDeclarePseudoGathers(module);
|
||||||
lDeclarePseudoScatters(module);
|
lDeclarePseudoScatters(module);
|
||||||
lDeclarePseudoMaskedStore(module);
|
lDeclarePseudoMaskedStore(module);
|
||||||
|
|||||||
@@ -6,6 +6,12 @@ initialize their members; they now must be initialized with initializer
|
|||||||
lists in braces (or initialized after of the initializer with a loop over
|
lists in braces (or initialized after of the initializer with a loop over
|
||||||
array elements, etc.)
|
array elements, etc.)
|
||||||
|
|
||||||
|
Added another shuffle() function to the standard library:
|
||||||
|
"<type> shuffle(<type> v0, <type> v1, int permute)", where the permutation
|
||||||
|
vector indexes over the concatenation of the two vectors (e.g. the value
|
||||||
|
0 corresponds to the first element of v0, the value 2*programCount-1
|
||||||
|
corresponds to the last element of v1, etc.)
|
||||||
|
|
||||||
=== v1.0.2 === (1 July 2011)
|
=== v1.0.2 === (1 July 2011)
|
||||||
|
|
||||||
Floating-point hexidecimal constants are now parsed correctly on Windows
|
Floating-point hexidecimal constants are now parsed correctly on Windows
|
||||||
|
|||||||
@@ -1704,10 +1704,11 @@ provided offset value can be positive or negative, and may be greater than
|
|||||||
int64 rotate(int64 value, uniform int offset)
|
int64 rotate(int64 value, uniform int offset)
|
||||||
|
|
||||||
|
|
||||||
Finally, ``shuffle()`` allows fully general shuffling of values among the
|
Finally, the ``shuffle()`` functions allow two variants of fully general
|
||||||
program instances. Each program instance's value of permutation gives the
|
shuffling of values among the program instances. For the first version,
|
||||||
program instance from which to get the value of ``value``. The provided
|
each program instance's value of permutation gives the program instance
|
||||||
values for ``permutation`` must all be between 0 and ``programCount-1``.
|
from which to get the value of ``value``. The provided values for
|
||||||
|
``permutation`` must all be between 0 and ``programCount-1``.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
@@ -1716,6 +1717,20 @@ values for ``permutation`` must all be between 0 and ``programCount-1``.
|
|||||||
double shuffle(double value, int permutation)
|
double shuffle(double value, int permutation)
|
||||||
int64 shuffle(int64 value, int permutation)
|
int64 shuffle(int64 value, int permutation)
|
||||||
|
|
||||||
|
|
||||||
|
The second variant of ``shuffle()`` permutes over the extended vector that
|
||||||
|
is the concatenation of the two provided values. In other words, a value
|
||||||
|
of 0 in an element of ``permutation`` corresponds to the first element of
|
||||||
|
``value0``, the value ``2*programCount-1`` corresponds to the last element
|
||||||
|
of ``value1``, etc.)
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
float shuffle(float value0, float value1, int permutation)
|
||||||
|
int32 shuffle(int32 value0, int32 value1, int permutation)
|
||||||
|
double shuffle(double value0, double value1, int permutation)
|
||||||
|
int64 shuffle(int64 value0, int64 value1, int permutation)
|
||||||
|
|
||||||
The various variants of ``popcnt()`` return the population count--the
|
The various variants of ``popcnt()`` return the population count--the
|
||||||
number of bits set in the given value.
|
number of bits set in the given value.
|
||||||
|
|
||||||
|
|||||||
31
opt.cpp
31
opt.cpp
@@ -2116,12 +2116,11 @@ CreateLowerGatherScatterPass() {
|
|||||||
// IsCompileTimeConstantPass
|
// IsCompileTimeConstantPass
|
||||||
|
|
||||||
/** LLVM IR implementations of target-specific functions may include calls
|
/** LLVM IR implementations of target-specific functions may include calls
|
||||||
to the functions "bool __is_compile_time_constant_mask(mask type)" and
|
to the functions "bool __is_compile_time_constant_*(...)"; these allow
|
||||||
"bool __is_compile_time_constant_int32(i32)"; these allow them to have
|
them to have specialied code paths for where the corresponding value is
|
||||||
specialied code paths for where the corresponding value is known at
|
known at compile time. For masks, for example, this allows them to not
|
||||||
compile time. For masks, for example, this allows them to not incur
|
incur the cost of a MOVMSK call at runtime to compute its value in
|
||||||
the cost of a MOVMSK call at runtime to compute its value in cases
|
cases where the mask value isn't known until runtime.
|
||||||
where the mask value isn't known until runtime.
|
|
||||||
|
|
||||||
This pass resolves these calls into either 'true' or 'false' values so
|
This pass resolves these calls into either 'true' or 'false' values so
|
||||||
that later optimization passes can operate with these as constants.
|
that later optimization passes can operate with these as constants.
|
||||||
@@ -2149,8 +2148,11 @@ llvm::RegisterPass<IsCompileTimeConstantPass>
|
|||||||
|
|
||||||
bool
|
bool
|
||||||
IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||||
llvm::Function *maskFunc = m->module->getFunction("__is_compile_time_constant_mask");
|
llvm::Function *funcs[] = {
|
||||||
llvm::Function *int32Func = m->module->getFunction("__is_compile_time_constant_int32");
|
m->module->getFunction("__is_compile_time_constant_mask"),
|
||||||
|
m->module->getFunction("__is_compile_time_constant_uniform_int32"),
|
||||||
|
m->module->getFunction("__is_compile_time_constant_varying_int32")
|
||||||
|
};
|
||||||
|
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
restart:
|
restart:
|
||||||
@@ -2158,8 +2160,17 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
// Iterate through the instructions looking for calls to the
|
// Iterate through the instructions looking for calls to the
|
||||||
// __is_compile_time_constant_*() functions
|
// __is_compile_time_constant_*() functions
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
||||||
if (!callInst || (callInst->getCalledFunction() != maskFunc &&
|
if (callInst == NULL)
|
||||||
callInst->getCalledFunction() != int32Func))
|
continue;
|
||||||
|
|
||||||
|
int j;
|
||||||
|
int nFuncs = sizeof(funcs) / sizeof(funcs[0]);
|
||||||
|
for (j = 0; j < nFuncs; ++j) {
|
||||||
|
if (callInst->getCalledFunction() == funcs[j])
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (j == nFuncs)
|
||||||
|
// not a __is_compile_time_constant_* function
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// This optimization pass can be disabled with the (poorly named)
|
// This optimization pass can be disabled with the (poorly named)
|
||||||
|
|||||||
16
stdlib.ispc
16
stdlib.ispc
@@ -129,6 +129,22 @@ static inline int64 shuffle(int64 v, int i) {
|
|||||||
return __shuffle_int64(v, i);
|
return __shuffle_int64(v, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline float shuffle(float v0, float v1, int i) {
|
||||||
|
return __shuffle2_float(v0, v1, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32 shuffle(int32 v0, int32 v1, int i) {
|
||||||
|
return __shuffle2_int32(v0, v1, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline double shuffle(double v0, double v1, int i) {
|
||||||
|
return __shuffle2_double(v0, v1, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64 shuffle(int64 v0, int64 v1, int i) {
|
||||||
|
return __shuffle2_int64(v0, v1, i);
|
||||||
|
}
|
||||||
|
|
||||||
// x[i]
|
// x[i]
|
||||||
static inline uniform float extract(float x, uniform int i) {
|
static inline uniform float extract(float x, uniform int i) {
|
||||||
return __extract(x, i);
|
return __extract(x, i);
|
||||||
|
|||||||
45
stdlib.m4
45
stdlib.m4
@@ -34,7 +34,7 @@
|
|||||||
;; builtins for various targets can use macros from this file to simplify
|
;; builtins for various targets can use macros from this file to simplify
|
||||||
;; generating code for their implementations of those builtins.
|
;; generating code for their implementations of those builtins.
|
||||||
|
|
||||||
declare i1 @__is_compile_time_constant_int32(i32)
|
declare i1 @__is_compile_time_constant_uniform_int32(i32)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -319,7 +319,7 @@ forloop(i, 1, eval($1-1), ` %r_`'i = insertelement <$1 x $2> %r_`'eval(i-1), $2
|
|||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x $2> @__rotate_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
define internal <$1 x $2> @__rotate_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
||||||
%isc = call i1 @__is_compile_time_constant_int32(i32 %1)
|
%isc = call i1 @__is_compile_time_constant_uniform_int32(i32 %1)
|
||||||
br i1 %isc, label %is_const, label %not_const
|
br i1 %isc, label %is_const, label %not_const
|
||||||
|
|
||||||
is_const:
|
is_const:
|
||||||
@@ -363,12 +363,53 @@ forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1)
|
|||||||
ret <$1 x $2> %ret_`'eval($1-1)
|
ret <$1 x $2> %ret_`'eval($1-1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define internal <$1 x $2> @__shuffle2_$3(<$1 x $2>, <$1 x $2>, <$1 x i32>) nounwind readnone alwaysinline {
|
||||||
|
%v2 = shufflevector <$1 x $2> %0, <$1 x $2> %1, <eval(2*$1) x i32> <
|
||||||
|
forloop(i, 0, eval(2*$1-2), `i32 i, ') i32 eval(2*$1-1)
|
||||||
|
>
|
||||||
|
forloop(i, 0, eval($1-1), `
|
||||||
|
%index_`'i = extractelement <$1 x i32> %2, i32 i')
|
||||||
|
|
||||||
|
%isc = call i1 @__is_compile_time_constant_varying_int32(<$1 x i32> %2)
|
||||||
|
br i1 %isc, label %is_const, label %not_const
|
||||||
|
|
||||||
|
is_const:
|
||||||
|
; extract from the requested lanes and insert into the result; LLVM turns
|
||||||
|
; this into good code in the end
|
||||||
|
forloop(i, 0, eval($1-1), `
|
||||||
|
%v_`'i = extractelement <eval(2*$1) x $2> %v2, i32 %index_`'i')
|
||||||
|
|
||||||
|
%ret_0 = insertelement <$1 x $2> undef, $2 %v_0, i32 0
|
||||||
|
forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1), $2 %v_`'i, i32 i
|
||||||
|
')
|
||||||
|
ret <$1 x $2> %ret_`'eval($1-1)
|
||||||
|
|
||||||
|
not_const:
|
||||||
|
; otherwise store the two vectors onto the stack and then use the given
|
||||||
|
; permutation vector to get indices into that array...
|
||||||
|
%ptr = alloca <eval(2*$1) x $2>
|
||||||
|
store <eval(2*$1) x $2> %v2, <eval(2*$1) x $2> * %ptr
|
||||||
|
%baseptr = bitcast <eval(2*$1) x $2> * %ptr to $2 *
|
||||||
|
|
||||||
|
%ptr_0 = getelementptr $2 * %baseptr, i32 %index_0
|
||||||
|
%val_0 = load $2 * %ptr_0
|
||||||
|
%result_0 = insertelement <$1 x $2> undef, $2 %val_0, i32 0
|
||||||
|
|
||||||
|
forloop(i, 1, eval($1-1), `
|
||||||
|
%ptr_`'i = getelementptr $2 * %baseptr, i32 %index_`'i
|
||||||
|
%val_`'i = load $2 * %ptr_`'i
|
||||||
|
%result_`'i = insertelement <$1 x $2> %result_`'eval(i-1), $2 %val_`'i, i32 i
|
||||||
|
')
|
||||||
|
|
||||||
|
ret <$1 x $2> %result_`'eval($1-1)
|
||||||
|
}
|
||||||
')
|
')
|
||||||
|
|
||||||
|
|
||||||
define(`stdlib_core', `
|
define(`stdlib_core', `
|
||||||
|
|
||||||
declare i1 @__is_compile_time_constant_mask(<$1 x i32> %mask)
|
declare i1 @__is_compile_time_constant_mask(<$1 x i32> %mask)
|
||||||
|
declare i1 @__is_compile_time_constant_varying_int32(<$1 x i32>)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; vector ops
|
;; vector ops
|
||||||
|
|||||||
13
tests/shuffle2-1.ispc
Normal file
13
tests/shuffle2-1.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int32 aa = aFOO[programIndex];
|
||||||
|
int32 bb = aa + programCount;
|
||||||
|
int32 shuf = shuffle(aa, bb, programCount + 1);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 2 + programCount;
|
||||||
|
}
|
||||||
13
tests/shuffle2-2.ispc
Normal file
13
tests/shuffle2-2.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int32 aa = aFOO[programIndex];
|
||||||
|
int32 bb = aa + programCount;
|
||||||
|
int32 shuf = shuffle(aa, bb, programIndex + 2);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 3 + programIndex;
|
||||||
|
}
|
||||||
13
tests/shuffle2-3.ispc
Normal file
13
tests/shuffle2-3.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
float aa = aFOO[programIndex];
|
||||||
|
float bb = aa + programCount;
|
||||||
|
float shuf = shuffle(aa, bb, programIndex + 2 + (int)b - 5);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 3 + programIndex;
|
||||||
|
}
|
||||||
13
tests/shuffle2-4.ispc
Normal file
13
tests/shuffle2-4.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
double aa = aFOO[programIndex];
|
||||||
|
double bb = aa + programCount;
|
||||||
|
double shuf = shuffle(aa, bb, 2*programIndex+(int)b-5);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1 + 2*programIndex;
|
||||||
|
}
|
||||||
13
tests/shuffle2-5.ispc
Normal file
13
tests/shuffle2-5.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
double aa = aFOO[programIndex];
|
||||||
|
double bb = aa + programCount;
|
||||||
|
double shuf = shuffle(aa, bb, 2*programIndex);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1 + 2*programIndex;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user