Add support for broadcast(), rotate(), and shuffle() stdlib routines
This commit is contained in:
4
Makefile
4
Makefile
@@ -94,9 +94,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
$(STDLIB_SRC): stdlib.m4
|
||||
|
||||
objs/stdlib-%.cpp: stdlib-%.ll
|
||||
objs/stdlib-%.cpp: stdlib-%.ll stdlib.m4
|
||||
@echo Creating C++ source from stdlib file $<
|
||||
@m4 stdlib.m4 $< | ./bitcode2cpp.py $< > $@
|
||||
|
||||
|
||||
@@ -74,7 +74,8 @@ Contents:
|
||||
|
||||
+ `Math Functions`_
|
||||
+ `Output Functions`_
|
||||
+ `Cross-Lane Operations`_
|
||||
+ `Cross-Program Instance Operations`_
|
||||
+ `Packed Load and Store Operations`_
|
||||
+ `Low-Level Bits`_
|
||||
|
||||
* `Interoperability with the Application`_
|
||||
@@ -1659,14 +1660,14 @@ values for the inactive program instances aren't printed. (In other cases,
|
||||
they may have garbage values or be otherwise undefined.)
|
||||
|
||||
|
||||
Cross-Lane Operations
|
||||
---------------------
|
||||
Cross-Program Instance Operations
|
||||
---------------------------------
|
||||
|
||||
Usually, ``ispc`` code expresses independent computation on separate data
|
||||
elements. There are, however, a number of cases where it's useful for the
|
||||
program instances to be able to cooperate in computing results. The
|
||||
cross-lane operations described in this section provide primitives for
|
||||
communication between the running program instances.
|
||||
Usually, ``ispc`` code expresses independent programs performing
|
||||
computation on separate data elements. There are, however, a number of
|
||||
cases where it's useful for the program instances to be able to cooperate
|
||||
in computing results. The cross-lane operations described in this section
|
||||
provide primitives for communication between the running program instances.
|
||||
|
||||
A few routines that evaluate conditions across the running program
|
||||
instances. For example, ``any()`` returns ``true`` if the given value
|
||||
@@ -1678,6 +1679,47 @@ and ``all()`` returns ``true`` if it true for all of them.
|
||||
uniform bool any(bool v)
|
||||
uniform bool all(bool v)
|
||||
|
||||
To broadcast a value from one program instance to all of the others, a
|
||||
``broadcast()`` function is available. It broadcasts the value of the
|
||||
``value`` parameter for the program instance given by ``index`` to all of
|
||||
the running program instances.
|
||||
|
||||
::
|
||||
|
||||
float broadcast(float value, uniform int index)
|
||||
int32 broadcast(int32 value, uniform int index)
|
||||
double broadcast(double value, uniform int index)
|
||||
int64 broadcast(int64 value, uniform int index)
|
||||
|
||||
The ``rotate()`` function allows each program instance to find the value of
|
||||
the given value that their neighbor ``offset`` steps away has. For
|
||||
example, on an 8-wide target, if ``offset`` has the value (1, 2, 3, 4, 5,
|
||||
6, 7, 8) in each of the running program instances, then ``rotate(value,
|
||||
-1)`` causes the first program instance to get the value 8, the second
|
||||
program instance to get the value 1, the third 2, and so forth. The
|
||||
provided offset value can be positive or negative, and may be greater than
|
||||
``programCount`` (it is masked to ensure valid offsets).
|
||||
|
||||
::
|
||||
|
||||
float rotate(float value, uniform int offset)
|
||||
int32 rotate(int32 value, uniform int offset)
|
||||
double rotate(double value, uniform int offset)
|
||||
int64 rotate(int64 value, uniform int offset)
|
||||
|
||||
|
||||
Finally, ``shuffle()`` allows fully general shuffling of values among the
|
||||
program instances. Each program instance's value of permutation gives the
|
||||
program instance from which to get the value of ``value``. The provided
|
||||
values for ``permutation`` must all be between 0 and ``programCount-1``.
|
||||
|
||||
::
|
||||
|
||||
float shuffle(float value, int permutation)
|
||||
int32 shuffle(int32 value, int permutation)
|
||||
double shuffle(double value, int permutation)
|
||||
int64 shuffle(int64 value, int permutation)
|
||||
|
||||
The various variants of ``popcnt()`` return the population count--the
|
||||
number of bits set in the given value.
|
||||
|
||||
@@ -1719,8 +1761,12 @@ given value across all of the currently-executing vector lanes.
|
||||
uniform unsigned int reduce_max(unsigned int a, unsigned int b)
|
||||
|
||||
|
||||
Finally, there are routines for writing out and reading in values from
|
||||
linear memory locations for the active program instances.
|
||||
|
||||
Packed Load and Store Operations
|
||||
--------------------------------
|
||||
|
||||
The standard library also offers routines for writing out and reading in
|
||||
values from linear memory locations for the active program instances.
|
||||
``packed_load_active()`` loads consecutive values from the given array,
|
||||
starting at ``a[offset]``, loading one value for each currently-executing
|
||||
program instance and storing it into that program instance's ``val``
|
||||
@@ -2280,21 +2326,11 @@ elements to work with and then proceeds with the computation.
|
||||
Communicating Between SPMD Program Instances
|
||||
--------------------------------------------
|
||||
|
||||
The ``programIndex`` built-in variable (see `Mapping Data To Program
|
||||
Instances`_) can be used to communicate between the set of executing
|
||||
program instances. Consider the following code, which shows all of the
|
||||
program instances writing into unique locations in an array.
|
||||
|
||||
::
|
||||
|
||||
float x = ...;
|
||||
uniform float allX[programCount];
|
||||
allX[programIndex] = x;
|
||||
|
||||
In this code, a program instance that reads ``allX[0]`` finds the value of
|
||||
``x`` that was computed by the first of the running program instances, and
|
||||
so forth. Program instances can communicate with their neighbor instances
|
||||
with indexing like ``allX[(programIndex+1)%programCount]``.
|
||||
The ``broadcast()``, ``rotate()``, and ``shuffle()`` standard library
|
||||
routiens provide a variety of mechanisms for the running program instances
|
||||
to communicate values to each other during execution. See the section
|
||||
`Cross-Program Instance Operations`_ for more information about their
|
||||
operation.
|
||||
|
||||
|
||||
Gather and Scatter
|
||||
|
||||
27
opt.cpp
27
opt.cpp
@@ -2116,11 +2116,12 @@ CreateLowerGatherScatterPass() {
|
||||
// IsCompileTimeConstantPass
|
||||
|
||||
/** LLVM IR implementations of target-specific functions may include calls
|
||||
to a function "bool __is_compile_time_constant_mask(mask type)"; this
|
||||
allows them to have specialied code paths for where the mask is known
|
||||
at compile time but not incurring the cost of a MOVMSK call at runtime
|
||||
to compute its value in cases where the mask value isn't known until
|
||||
runtime.
|
||||
to the functions "bool __is_compile_time_constant_mask(mask type)" and
|
||||
"bool __is_compile_time_constant_int32(i32)"; these allow them to have
|
||||
specialied code paths for where the corresponding value is known at
|
||||
compile time. For masks, for example, this allows them to not incur
|
||||
the cost of a MOVMSK call at runtime to compute its value in cases
|
||||
where the mask value isn't known until runtime.
|
||||
|
||||
This pass resolves these calls into either 'true' or 'false' values so
|
||||
that later optimization passes can operate with these as constants.
|
||||
@@ -2148,17 +2149,17 @@ llvm::RegisterPass<IsCompileTimeConstantPass>
|
||||
|
||||
bool
|
||||
IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
llvm::Function *func = m->module->getFunction("__is_compile_time_constant_mask");
|
||||
if (!func)
|
||||
return false;
|
||||
llvm::Function *maskFunc = m->module->getFunction("__is_compile_time_constant_mask");
|
||||
llvm::Function *int32Func = m->module->getFunction("__is_compile_time_constant_int32");
|
||||
|
||||
bool modifiedAny = false;
|
||||
restart:
|
||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
||||
// Iterate through the instructions looking for calls to
|
||||
// __is_compile_time_constant_mask().
|
||||
// Iterate through the instructions looking for calls to the
|
||||
// __is_compile_time_constant_*() functions
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
||||
if (!callInst || callInst->getCalledFunction() != func)
|
||||
if (!callInst || (callInst->getCalledFunction() != maskFunc &&
|
||||
callInst->getCalledFunction() != int32Func))
|
||||
continue;
|
||||
|
||||
// This optimization pass can be disabled with the (poorly named)
|
||||
@@ -2171,8 +2172,8 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
|
||||
// Is it a constant? Bingo, turn the call's value into a constant
|
||||
// true value.
|
||||
llvm::Value *mask = callInst->getArgOperand(0);
|
||||
if (llvm::isa<llvm::Constant>(mask)) {
|
||||
llvm::Value *operand = callInst->getArgOperand(0);
|
||||
if (llvm::isa<llvm::Constant>(operand)) {
|
||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, LLVMTrue);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
|
||||
48
stdlib.ispc
48
stdlib.ispc
@@ -81,6 +81,54 @@ static inline uniform unsigned int64 intbits(uniform double d) {
|
||||
return __intbits_uniform_double(d);
|
||||
}
|
||||
|
||||
static inline float broadcast(float v, uniform int i) {
|
||||
return __broadcast_float(v, i);
|
||||
}
|
||||
|
||||
static inline int32 broadcast(int32 v, uniform int i) {
|
||||
return __broadcast_int32(v, i);
|
||||
}
|
||||
|
||||
static inline double broadcast(double v, uniform int i) {
|
||||
return __broadcast_double(v, i);
|
||||
}
|
||||
|
||||
static inline int64 broadcast(int64 v, uniform int i) {
|
||||
return __broadcast_int64(v, i);
|
||||
}
|
||||
|
||||
static inline float rotate(float v, uniform int i) {
|
||||
return __rotate_float(v, i);
|
||||
}
|
||||
|
||||
static inline int32 rotate(int32 v, uniform int i) {
|
||||
return __rotate_int32(v, i);
|
||||
}
|
||||
|
||||
static inline double rotate(double v, uniform int i) {
|
||||
return __rotate_double(v, i);
|
||||
}
|
||||
|
||||
static inline int64 rotate(int64 v, uniform int i) {
|
||||
return __rotate_int64(v, i);
|
||||
}
|
||||
|
||||
static inline float shuffle(float v, int i) {
|
||||
return __shuffle_float(v, i);
|
||||
}
|
||||
|
||||
static inline int32 shuffle(int32 v, int i) {
|
||||
return __shuffle_int32(v, i);
|
||||
}
|
||||
|
||||
static inline double shuffle(double v, int i) {
|
||||
return __shuffle_double(v, i);
|
||||
}
|
||||
|
||||
static inline int64 shuffle(int64 v, int i) {
|
||||
return __shuffle_int64(v, i);
|
||||
}
|
||||
|
||||
// x[i]
|
||||
static inline uniform float extract(float x, uniform int i) {
|
||||
return __extract(x, i);
|
||||
|
||||
95
stdlib.m4
95
stdlib.m4
@@ -34,6 +34,8 @@
|
||||
;; builtins for various targets can use macros from this file to simplify
|
||||
;; generating code for their implementations of those builtins.
|
||||
|
||||
declare i1 @__is_compile_time_constant_int32(i32)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
@@ -284,6 +286,22 @@ ret <8 x float> %ret
|
||||
'
|
||||
)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; forloop macro
|
||||
|
||||
divert(`-1')
|
||||
# forloop(var, from, to, stmt) - improved version:
|
||||
# works even if VAR is not a strict macro name
|
||||
# performs sanity check that FROM is larger than TO
|
||||
# allows complex numerical expressions in TO and FROM
|
||||
define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
|
||||
`pushdef(`$1', eval(`$2'))_$0(`$1',
|
||||
eval(`$3'), `$4')popdef(`$1')')')
|
||||
define(`_forloop',
|
||||
`$3`'ifelse(indir(`$1'), `$2', `',
|
||||
`define(`$1', incr(indir(`$1')))$0($@)')')
|
||||
divert`'dnl
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; stdlib_core
|
||||
;;
|
||||
@@ -291,8 +309,67 @@ ret <8 x float> %ret
|
||||
;; target's vector width, which it takes as its first parameter.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
define(`shuffles', `
|
||||
define internal <$1 x $2> @__broadcast_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
||||
%v = extractelement <$1 x $2> %0, i32 %1
|
||||
%r_0 = insertelement <$1 x $2> undef, $2 %v, i32 0
|
||||
forloop(i, 1, eval($1-1), ` %r_`'i = insertelement <$1 x $2> %r_`'eval(i-1), $2 %v, i32 i
|
||||
')
|
||||
ret <$1 x $2> %r_`'eval($1-1)
|
||||
}
|
||||
|
||||
define internal <$1 x $2> @__rotate_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
||||
%isc = call i1 @__is_compile_time_constant_int32(i32 %1)
|
||||
br i1 %isc, label %is_const, label %not_const
|
||||
|
||||
is_const:
|
||||
; though verbose, this turms into tight code if %1 is a constant
|
||||
forloop(i, 0, eval($1-1), `
|
||||
%delta_`'i = add i32 %1, i
|
||||
%delta_clamped_`'i = and i32 %delta_`'i, eval($1-1)
|
||||
%v_`'i = extractelement <$1 x $2> %0, i32 %delta_clamped_`'i')
|
||||
|
||||
%ret_0 = insertelement <$1 x $2> undef, $2 %v_0, i32 0
|
||||
forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1), $2 %v_`'i, i32 i
|
||||
')
|
||||
ret <$1 x $2> %ret_`'eval($1-1)
|
||||
|
||||
not_const:
|
||||
; store two instances of the vector into memory
|
||||
%ptr = alloca <$1 x $2>, i32 2
|
||||
%ptr0 = getelementptr <$1 x $2> * %ptr, i32 0
|
||||
store <$1 x $2> %0, <$1 x $2> * %ptr0
|
||||
%ptr1 = getelementptr <$1 x $2> * %ptr, i32 1
|
||||
store <$1 x $2> %0, <$1 x $2> * %ptr1
|
||||
|
||||
; compute offset in [0,vectorwidth-1], then index into the doubled-up vector
|
||||
%offset = and i32 %1, eval($1-1)
|
||||
%ptr_as_elt_array = bitcast <$1 x $2> * %ptr to [eval(2*$1) x $2] *
|
||||
%load_ptr = getelementptr [eval(2*$1) x $2] * %ptr_as_elt_array, i32 0, i32 %offset
|
||||
%load_ptr_vec = bitcast $2 * %load_ptr to <$1 x $2> *
|
||||
%result = load <$1 x $2> * %load_ptr_vec, align $4
|
||||
ret <$1 x $2> %result
|
||||
}
|
||||
|
||||
define internal <$1 x $2> @__shuffle_$3(<$1 x $2>, <$1 x i32>) nounwind readnone alwaysinline {
|
||||
forloop(i, 0, eval($1-1), `
|
||||
%index_`'i = extractelement <$1 x i32> %1, i32 i')
|
||||
forloop(i, 0, eval($1-1), `
|
||||
%v_`'i = extractelement <$1 x $2> %0, i32 %index_`'i')
|
||||
|
||||
%ret_0 = insertelement <$1 x $2> undef, $2 %v_0, i32 0
|
||||
forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1), $2 %v_`'i, i32 i
|
||||
')
|
||||
ret <$1 x $2> %ret_`'eval($1-1)
|
||||
}
|
||||
|
||||
')
|
||||
|
||||
|
||||
define(`stdlib_core', `
|
||||
|
||||
declare i1 @__is_compile_time_constant_mask(<$1 x i32> %mask)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; vector ops
|
||||
|
||||
@@ -307,6 +384,10 @@ define internal <$1 x float> @__insert(<$1 x float>, i32,
|
||||
ret <$1 x float> %insert
|
||||
}
|
||||
|
||||
shuffles($1, float, float, 4)
|
||||
shuffles($1, i32, int32, 4)
|
||||
shuffles($1, double, double, 8)
|
||||
shuffles($1, i64, int64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; various bitcasts from one type to another
|
||||
@@ -524,7 +605,6 @@ define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32
|
||||
;; FIXME: use the per_lane macro, defined below, to implement these!
|
||||
|
||||
define(`packed_load_and_store', `
|
||||
declare i1 @__is_compile_time_constant_mask(<$1 x i32> %mask)
|
||||
|
||||
define i32 @__packed_load_active([0 x i32] *, i32 %start_offset, <$1 x i32> * %val_ptr,
|
||||
<$1 x i32> %full_mask) nounwind alwaysinline {
|
||||
@@ -661,19 +741,6 @@ done:
|
||||
;; Inside this code, any instances of the text "LANE" are replaced
|
||||
;; with an i32 value that represents the current lane number
|
||||
|
||||
divert(`-1')
|
||||
# forloop(var, from, to, stmt) - improved version:
|
||||
# works even if VAR is not a strict macro name
|
||||
# performs sanity check that FROM is larger than TO
|
||||
# allows complex numerical expressions in TO and FROM
|
||||
define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
|
||||
`pushdef(`$1', eval(`$2'))_$0(`$1',
|
||||
eval(`$3'), `$4')popdef(`$1')')')
|
||||
define(`_forloop',
|
||||
`$3`'ifelse(indir(`$1'), `$2', `',
|
||||
`define(`$1', incr(indir(`$1')))$0($@)')')
|
||||
divert`'dnl
|
||||
|
||||
; num lanes, mask, code block to do per lane
|
||||
define(`per_lane', `
|
||||
br label %pl_entry
|
||||
|
||||
12
tests/broadcast-1.ispc
Normal file
12
tests/broadcast-1.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex];
|
||||
int br = broadcast(a, (uniform int)b-2);
|
||||
RET[programIndex] = br;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 4;
|
||||
}
|
||||
12
tests/broadcast.ispc
Normal file
12
tests/broadcast.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = broadcast(a, 2);
|
||||
RET[programIndex] = b;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3;
|
||||
}
|
||||
12
tests/rotate-1.ispc
Normal file
12
tests/rotate-1.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex];
|
||||
int rot = rotate(a, -1);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
|
||||
}
|
||||
13
tests/rotate-2.ispc
Normal file
13
tests/rotate-2.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex];
|
||||
uniform int delta = b - 6; // -1
|
||||
int rot = rotate(a, delta);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
|
||||
}
|
||||
13
tests/rotate-3.ispc
Normal file
13
tests/rotate-3.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int64 a = aFOO[programIndex];
|
||||
uniform int delta = b - 6; // -1
|
||||
int64 rot = rotate(a, delta);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
|
||||
}
|
||||
12
tests/rotate-4.ispc
Normal file
12
tests/rotate-4.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int64 a = aFOO[programIndex];
|
||||
int64 rot = rotate(a, -1);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
|
||||
}
|
||||
12
tests/rotate.ispc
Normal file
12
tests/rotate.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex];
|
||||
int rot = rotate(a, 2);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + 2) % programCount;
|
||||
}
|
||||
13
tests/shuffle-1.ispc
Normal file
13
tests/shuffle-1.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
int reverse = programCount - 1 - programIndex;
|
||||
float shuf = shuffle(a, reverse);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount - programIndex;
|
||||
}
|
||||
13
tests/shuffle-2.ispc
Normal file
13
tests/shuffle-2.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
int reverse = programCount - 1 - programIndex + (int)b - 5;
|
||||
float shuf = shuffle(a, reverse);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount - programIndex;
|
||||
}
|
||||
12
tests/shuffle.ispc
Normal file
12
tests/shuffle.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int32 a = aFOO[programIndex];
|
||||
int32 shuf = shuffle(a, 1);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2;
|
||||
}
|
||||
Reference in New Issue
Block a user