Added shuffle() variant to the standard library that takes two
varying values and a permutation index that spans the concatenation of the two of them (along the lines of SHUFPS...)
This commit is contained in:
25
builtins.cpp
25
builtins.cpp
@@ -176,30 +176,6 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
}
|
||||
}
|
||||
|
||||
/** Declare the function symbol 'bool __is_compile_time_constant_mask(mask type)'.
|
||||
This function will never be defined; it's just a placeholder
|
||||
that will be handled during the optimization process. See the
|
||||
discussion of the implementation of CompileTimeConstantResolvePass for
|
||||
more details.
|
||||
*/
|
||||
static void
|
||||
lDeclareCompileTimeConstant(llvm::Module *module) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::BoolType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__is_compile_time_constant_mask", module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
|
||||
/** Declare the 'pseudo-gather' functions. When the ispc front-end needs
|
||||
to perform a gather, it generates a call to one of these functions,
|
||||
which have signatures:
|
||||
@@ -583,7 +559,6 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
|
||||
// Declare various placeholder functions that the optimizer will later
|
||||
// find and replace with something more useful.
|
||||
lDeclareCompileTimeConstant(module);
|
||||
lDeclarePseudoGathers(module);
|
||||
lDeclarePseudoScatters(module);
|
||||
lDeclarePseudoMaskedStore(module);
|
||||
|
||||
@@ -6,6 +6,12 @@ initialize their members; they now must be initialized with initializer
|
||||
lists in braces (or initialized after of the initializer with a loop over
|
||||
array elements, etc.)
|
||||
|
||||
Added another shuffle() function to the standard library:
|
||||
"<type> shuffle(<type> v0, <type> v1, int permute)", where the permutation
|
||||
vector indexes over the concatenation of the two vectors (e.g. the value
|
||||
0 corresponds to the first element of v0, the value 2*programCount-1
|
||||
corresponds to the last element of v1, etc.)
|
||||
|
||||
=== v1.0.2 === (1 July 2011)
|
||||
|
||||
Floating-point hexidecimal constants are now parsed correctly on Windows
|
||||
|
||||
@@ -1704,10 +1704,11 @@ provided offset value can be positive or negative, and may be greater than
|
||||
int64 rotate(int64 value, uniform int offset)
|
||||
|
||||
|
||||
Finally, ``shuffle()`` allows fully general shuffling of values among the
|
||||
program instances. Each program instance's value of permutation gives the
|
||||
program instance from which to get the value of ``value``. The provided
|
||||
values for ``permutation`` must all be between 0 and ``programCount-1``.
|
||||
Finally, the ``shuffle()`` functions allow two variants of fully general
|
||||
shuffling of values among the program instances. For the first version,
|
||||
each program instance's value of permutation gives the program instance
|
||||
from which to get the value of ``value``. The provided values for
|
||||
``permutation`` must all be between 0 and ``programCount-1``.
|
||||
|
||||
::
|
||||
|
||||
@@ -1716,6 +1717,20 @@ values for ``permutation`` must all be between 0 and ``programCount-1``.
|
||||
double shuffle(double value, int permutation)
|
||||
int64 shuffle(int64 value, int permutation)
|
||||
|
||||
|
||||
The second variant of ``shuffle()`` permutes over the extended vector that
|
||||
is the concatenation of the two provided values. In other words, a value
|
||||
of 0 in an element of ``permutation`` corresponds to the first element of
|
||||
``value0``, the value ``2*programCount-1`` corresponds to the last element
|
||||
of ``value1``, etc.)
|
||||
|
||||
::
|
||||
|
||||
float shuffle(float value0, float value1, int permutation)
|
||||
int32 shuffle(int32 value0, int32 value1, int permutation)
|
||||
double shuffle(double value0, double value1, int permutation)
|
||||
int64 shuffle(int64 value0, int64 value1, int permutation)
|
||||
|
||||
The various variants of ``popcnt()`` return the population count--the
|
||||
number of bits set in the given value.
|
||||
|
||||
|
||||
31
opt.cpp
31
opt.cpp
@@ -2116,12 +2116,11 @@ CreateLowerGatherScatterPass() {
|
||||
// IsCompileTimeConstantPass
|
||||
|
||||
/** LLVM IR implementations of target-specific functions may include calls
|
||||
to the functions "bool __is_compile_time_constant_mask(mask type)" and
|
||||
"bool __is_compile_time_constant_int32(i32)"; these allow them to have
|
||||
specialied code paths for where the corresponding value is known at
|
||||
compile time. For masks, for example, this allows them to not incur
|
||||
the cost of a MOVMSK call at runtime to compute its value in cases
|
||||
where the mask value isn't known until runtime.
|
||||
to the functions "bool __is_compile_time_constant_*(...)"; these allow
|
||||
them to have specialied code paths for where the corresponding value is
|
||||
known at compile time. For masks, for example, this allows them to not
|
||||
incur the cost of a MOVMSK call at runtime to compute its value in
|
||||
cases where the mask value isn't known until runtime.
|
||||
|
||||
This pass resolves these calls into either 'true' or 'false' values so
|
||||
that later optimization passes can operate with these as constants.
|
||||
@@ -2149,8 +2148,11 @@ llvm::RegisterPass<IsCompileTimeConstantPass>
|
||||
|
||||
bool
|
||||
IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
llvm::Function *maskFunc = m->module->getFunction("__is_compile_time_constant_mask");
|
||||
llvm::Function *int32Func = m->module->getFunction("__is_compile_time_constant_int32");
|
||||
llvm::Function *funcs[] = {
|
||||
m->module->getFunction("__is_compile_time_constant_mask"),
|
||||
m->module->getFunction("__is_compile_time_constant_uniform_int32"),
|
||||
m->module->getFunction("__is_compile_time_constant_varying_int32")
|
||||
};
|
||||
|
||||
bool modifiedAny = false;
|
||||
restart:
|
||||
@@ -2158,8 +2160,17 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
// Iterate through the instructions looking for calls to the
|
||||
// __is_compile_time_constant_*() functions
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
||||
if (!callInst || (callInst->getCalledFunction() != maskFunc &&
|
||||
callInst->getCalledFunction() != int32Func))
|
||||
if (callInst == NULL)
|
||||
continue;
|
||||
|
||||
int j;
|
||||
int nFuncs = sizeof(funcs) / sizeof(funcs[0]);
|
||||
for (j = 0; j < nFuncs; ++j) {
|
||||
if (callInst->getCalledFunction() == funcs[j])
|
||||
break;
|
||||
}
|
||||
if (j == nFuncs)
|
||||
// not a __is_compile_time_constant_* function
|
||||
continue;
|
||||
|
||||
// This optimization pass can be disabled with the (poorly named)
|
||||
|
||||
16
stdlib.ispc
16
stdlib.ispc
@@ -129,6 +129,22 @@ static inline int64 shuffle(int64 v, int i) {
|
||||
return __shuffle_int64(v, i);
|
||||
}
|
||||
|
||||
static inline float shuffle(float v0, float v1, int i) {
|
||||
return __shuffle2_float(v0, v1, i);
|
||||
}
|
||||
|
||||
static inline int32 shuffle(int32 v0, int32 v1, int i) {
|
||||
return __shuffle2_int32(v0, v1, i);
|
||||
}
|
||||
|
||||
static inline double shuffle(double v0, double v1, int i) {
|
||||
return __shuffle2_double(v0, v1, i);
|
||||
}
|
||||
|
||||
static inline int64 shuffle(int64 v0, int64 v1, int i) {
|
||||
return __shuffle2_int64(v0, v1, i);
|
||||
}
|
||||
|
||||
// x[i]
|
||||
static inline uniform float extract(float x, uniform int i) {
|
||||
return __extract(x, i);
|
||||
|
||||
45
stdlib.m4
45
stdlib.m4
@@ -34,7 +34,7 @@
|
||||
;; builtins for various targets can use macros from this file to simplify
|
||||
;; generating code for their implementations of those builtins.
|
||||
|
||||
declare i1 @__is_compile_time_constant_int32(i32)
|
||||
declare i1 @__is_compile_time_constant_uniform_int32(i32)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -319,7 +319,7 @@ forloop(i, 1, eval($1-1), ` %r_`'i = insertelement <$1 x $2> %r_`'eval(i-1), $2
|
||||
}
|
||||
|
||||
define internal <$1 x $2> @__rotate_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
||||
%isc = call i1 @__is_compile_time_constant_int32(i32 %1)
|
||||
%isc = call i1 @__is_compile_time_constant_uniform_int32(i32 %1)
|
||||
br i1 %isc, label %is_const, label %not_const
|
||||
|
||||
is_const:
|
||||
@@ -363,12 +363,53 @@ forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1)
|
||||
ret <$1 x $2> %ret_`'eval($1-1)
|
||||
}
|
||||
|
||||
define internal <$1 x $2> @__shuffle2_$3(<$1 x $2>, <$1 x $2>, <$1 x i32>) nounwind readnone alwaysinline {
|
||||
%v2 = shufflevector <$1 x $2> %0, <$1 x $2> %1, <eval(2*$1) x i32> <
|
||||
forloop(i, 0, eval(2*$1-2), `i32 i, ') i32 eval(2*$1-1)
|
||||
>
|
||||
forloop(i, 0, eval($1-1), `
|
||||
%index_`'i = extractelement <$1 x i32> %2, i32 i')
|
||||
|
||||
%isc = call i1 @__is_compile_time_constant_varying_int32(<$1 x i32> %2)
|
||||
br i1 %isc, label %is_const, label %not_const
|
||||
|
||||
is_const:
|
||||
; extract from the requested lanes and insert into the result; LLVM turns
|
||||
; this into good code in the end
|
||||
forloop(i, 0, eval($1-1), `
|
||||
%v_`'i = extractelement <eval(2*$1) x $2> %v2, i32 %index_`'i')
|
||||
|
||||
%ret_0 = insertelement <$1 x $2> undef, $2 %v_0, i32 0
|
||||
forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1), $2 %v_`'i, i32 i
|
||||
')
|
||||
ret <$1 x $2> %ret_`'eval($1-1)
|
||||
|
||||
not_const:
|
||||
; otherwise store the two vectors onto the stack and then use the given
|
||||
; permutation vector to get indices into that array...
|
||||
%ptr = alloca <eval(2*$1) x $2>
|
||||
store <eval(2*$1) x $2> %v2, <eval(2*$1) x $2> * %ptr
|
||||
%baseptr = bitcast <eval(2*$1) x $2> * %ptr to $2 *
|
||||
|
||||
%ptr_0 = getelementptr $2 * %baseptr, i32 %index_0
|
||||
%val_0 = load $2 * %ptr_0
|
||||
%result_0 = insertelement <$1 x $2> undef, $2 %val_0, i32 0
|
||||
|
||||
forloop(i, 1, eval($1-1), `
|
||||
%ptr_`'i = getelementptr $2 * %baseptr, i32 %index_`'i
|
||||
%val_`'i = load $2 * %ptr_`'i
|
||||
%result_`'i = insertelement <$1 x $2> %result_`'eval(i-1), $2 %val_`'i, i32 i
|
||||
')
|
||||
|
||||
ret <$1 x $2> %result_`'eval($1-1)
|
||||
}
|
||||
')
|
||||
|
||||
|
||||
define(`stdlib_core', `
|
||||
|
||||
declare i1 @__is_compile_time_constant_mask(<$1 x i32> %mask)
|
||||
declare i1 @__is_compile_time_constant_varying_int32(<$1 x i32>)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; vector ops
|
||||
|
||||
13
tests/shuffle2-1.ispc
Normal file
13
tests/shuffle2-1.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int32 aa = aFOO[programIndex];
|
||||
int32 bb = aa + programCount;
|
||||
int32 shuf = shuffle(aa, bb, programCount + 1);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2 + programCount;
|
||||
}
|
||||
13
tests/shuffle2-2.ispc
Normal file
13
tests/shuffle2-2.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int32 aa = aFOO[programIndex];
|
||||
int32 bb = aa + programCount;
|
||||
int32 shuf = shuffle(aa, bb, programIndex + 2);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3 + programIndex;
|
||||
}
|
||||
13
tests/shuffle2-3.ispc
Normal file
13
tests/shuffle2-3.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float aa = aFOO[programIndex];
|
||||
float bb = aa + programCount;
|
||||
float shuf = shuffle(aa, bb, programIndex + 2 + (int)b - 5);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3 + programIndex;
|
||||
}
|
||||
13
tests/shuffle2-4.ispc
Normal file
13
tests/shuffle2-4.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
double aa = aFOO[programIndex];
|
||||
double bb = aa + programCount;
|
||||
double shuf = shuffle(aa, bb, 2*programIndex+(int)b-5);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + 2*programIndex;
|
||||
}
|
||||
13
tests/shuffle2-5.ispc
Normal file
13
tests/shuffle2-5.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
double aa = aFOO[programIndex];
|
||||
double bb = aa + programCount;
|
||||
double shuf = shuffle(aa, bb, 2*programIndex);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + 2*programIndex;
|
||||
}
|
||||
Reference in New Issue
Block a user