Add memcpy(), memmove() and memset() to the standard library.
Issue #183.
This commit is contained in:
@@ -1768,6 +1768,55 @@ define <WIDTH x i32> @__sext_varying_bool(<WIDTH x MASK>) nounwind readnone alwa
|
|||||||
ret <WIDTH x i32> %0')
|
ret <WIDTH x i32> %0')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; memcpy/memmove/memset
|
||||||
|
|
||||||
|
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src,
|
||||||
|
i32 %len, i32 %align, i1 %isvolatile)
|
||||||
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src,
|
||||||
|
i64 %len, i32 %align, i1 %isvolatile)
|
||||||
|
|
||||||
|
define void @__memcpy32(i8 * %dst, i8 * %src, i32 %len) alwaysinline {
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8 * %dst, i8 * %src, i32 %len, i32 0, i1 0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__memcpy64(i8 * %dst, i8 * %src, i64 %len) alwaysinline {
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8 * %dst, i8 * %src, i64 %len, i32 0, i1 0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src,
|
||||||
|
i32 %len, i32 %align, i1 %isvolatile)
|
||||||
|
declare void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src,
|
||||||
|
i64 %len, i32 %align, i1 %isvolatile)
|
||||||
|
|
||||||
|
define void @__memmove32(i8 * %dst, i8 * %src, i32 %len) alwaysinline {
|
||||||
|
call void @llvm.memmove.p0i8.p0i8.i32(i8 * %dst, i8 * %src, i32 %len, i32 0, i1 0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__memmove64(i8 * %dst, i8 * %src, i64 %len) alwaysinline {
|
||||||
|
call void @llvm.memmove.p0i8.p0i8.i64(i8 * %dst, i8 * %src, i64 %len, i32 0, i1 0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
declare void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 %len, i32 %align,
|
||||||
|
i1 %isvolatile)
|
||||||
|
declare void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 %len, i32 %align,
|
||||||
|
i1 %isvolatile)
|
||||||
|
|
||||||
|
define void @__memset32(i8 * %dst, i8 %val, i32 %len) alwaysinline {
|
||||||
|
call void @llvm.memset.p0i8.i32(i8 * %dst, i8 %val, i32 %len, i32 0, i1 0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__memset64(i8 * %dst, i8 %val, i64 %len) alwaysinline {
|
||||||
|
call void @llvm.memset.p0i8.i64(i8 * %dst, i8 %val, i64 %len, i32 0, i1 0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; assert
|
;; assert
|
||||||
|
|
||||||
|
|||||||
@@ -134,9 +134,13 @@ Contents:
|
|||||||
|
|
||||||
* `Reductions`_
|
* `Reductions`_
|
||||||
|
|
||||||
+ `Data Conversions And Storage`_
|
+ `Data Movement`_
|
||||||
|
|
||||||
|
* `Setting and Copying Values In Memory`_
|
||||||
* `Packed Load and Store Operations`_
|
* `Packed Load and Store Operations`_
|
||||||
|
|
||||||
|
+ `Data Conversions`_
|
||||||
|
|
||||||
* `Converting Between Array-of-Structures and Structure-of-Arrays Layout`_
|
* `Converting Between Array-of-Structures and Structure-of-Arrays Layout`_
|
||||||
* `Conversions To and From Half-Precision Floats`_
|
* `Conversions To and From Half-Precision Floats`_
|
||||||
|
|
||||||
@@ -3387,8 +3391,52 @@ program instances into a compact output buffer is `discussed in the FAQ`_.
|
|||||||
.. _discussed in the FAQ: faq.html#how-can-a-gang-of-program-instances-generate-variable-amounts-of-output-efficiently
|
.. _discussed in the FAQ: faq.html#how-can-a-gang-of-program-instances-generate-variable-amounts-of-output-efficiently
|
||||||
|
|
||||||
|
|
||||||
Data Conversions And Storage
|
Data Movement
|
||||||
----------------------------
|
-------------
|
||||||
|
|
||||||
|
Setting and Copying Values In Memory
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
There are a few functions for copying blocks of memory and initializing
|
||||||
|
values in memory. Along the lines of the equivalently-named routines in
|
||||||
|
the C Standard libary, ``memcpy`` copies a given number of bytes starting
|
||||||
|
from a source location in memory to a destination locaiton, where the two
|
||||||
|
regions of memory are guaranteed by the caller to be non-overlapping.
|
||||||
|
Alternatively, ``memmove`` can be used to copy data if the buffers may
|
||||||
|
overlap.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
void memcpy(void * uniform dst, void * uniform src, uniform int32 count)
|
||||||
|
void memmove(void * uniform dst, void * uniform src, uniform int32 count)
|
||||||
|
void memcpy(void * varying dst, void * varying src, int32 count)
|
||||||
|
void memmove(void * varying dst, void * varying src, int32 count)
|
||||||
|
|
||||||
|
Note that there are variants of these functions that take both ``uniform``
|
||||||
|
and ``varying`` pointers.
|
||||||
|
|
||||||
|
To initialize values in memory, the ``memset`` routine can be used. (It
|
||||||
|
also behaves like the function of the same name in the C Standard Library.)
|
||||||
|
It sets the given number of bytes of memory starting at the given location
|
||||||
|
to the value provided.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
void memset(void * uniform ptr, uniform int8 val, uniform int32 count)
|
||||||
|
void memset(void * varying ptr, int8 val, int32 count)
|
||||||
|
|
||||||
|
There are also variants of all of these functions that take 64-bit values
|
||||||
|
for the number of bytes of memory to operate on:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
void memcpy64(void * uniform dst, void * uniform src, uniform int64 count)
|
||||||
|
void memcpy64(void * varying dst, void * varying src, int64 count)
|
||||||
|
void memmove64(void * uniform dst, void * uniform src, uniform int64 count)
|
||||||
|
void memmove64(void * varying dst, void * varying src, int64 count)
|
||||||
|
void memset64(void * uniform ptr, uniform int8 val, uniform int64 count)
|
||||||
|
void memset64(void * varying ptr, int8 val, int64 count)
|
||||||
|
|
||||||
|
|
||||||
Packed Load and Store Operations
|
Packed Load and Store Operations
|
||||||
--------------------------------
|
--------------------------------
|
||||||
@@ -3447,6 +3495,9 @@ of four negative values, and initializes the first four elements of
|
|||||||
indices where ``a[i]`` was less than zero.
|
indices where ``a[i]`` was less than zero.
|
||||||
|
|
||||||
|
|
||||||
|
Data Conversions
|
||||||
|
----------------
|
||||||
|
|
||||||
Converting Between Array-of-Structures and Structure-of-Arrays Layout
|
Converting Between Array-of-Structures and Structure-of-Arrays Layout
|
||||||
---------------------------------------------------------------------
|
---------------------------------------------------------------------
|
||||||
|
|
||||||
|
|||||||
125
stdlib.ispc
125
stdlib.ispc
@@ -335,6 +335,131 @@ static inline uniform int lanemask() {
|
|||||||
return __movmsk(__mask);
|
return __movmsk(__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// memcpy/memmove/memset
|
||||||
|
|
||||||
|
static inline void memcpy(void * uniform dst, void * uniform src,
|
||||||
|
uniform int32 count) {
|
||||||
|
__memcpy32((int8 * uniform)dst, (int8 * uniform)src, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memcpy64(void * uniform dst, void * uniform src,
|
||||||
|
uniform int64 count) {
|
||||||
|
__memcpy64((int8 * uniform)dst, (int8 * uniform)src, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memcpy(void * varying dst, void * varying src,
|
||||||
|
int32 count) {
|
||||||
|
void * uniform da[programCount];
|
||||||
|
void * uniform sa[programCount];
|
||||||
|
|
||||||
|
da[programIndex] = dst;
|
||||||
|
sa[programIndex] = src;
|
||||||
|
|
||||||
|
uniform int mask = lanemask();
|
||||||
|
for (uniform int i = 0; i < programCount; ++i) {
|
||||||
|
if ((mask & (1 << i)) == 0)
|
||||||
|
continue;
|
||||||
|
void * uniform d = da[i], * uniform s = sa[i];
|
||||||
|
__memcpy32((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memcpy64(void * varying dst, void * varying src,
|
||||||
|
int64 count) {
|
||||||
|
void * uniform da[programCount];
|
||||||
|
void * uniform sa[programCount];
|
||||||
|
|
||||||
|
da[programIndex] = dst;
|
||||||
|
sa[programIndex] = src;
|
||||||
|
|
||||||
|
uniform int mask = lanemask();
|
||||||
|
for (uniform int i = 0; i < programCount; ++i) {
|
||||||
|
if ((mask & (1 << i)) == 0)
|
||||||
|
continue;
|
||||||
|
void * uniform d = da[i], * uniform s = sa[i];
|
||||||
|
__memcpy64((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memmove(void * uniform dst, void * uniform src,
|
||||||
|
uniform int32 count) {
|
||||||
|
__memmove32((int8 * uniform)dst, (int8 * uniform)src, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memmove64(void * uniform dst, void * uniform src,
|
||||||
|
uniform int64 count) {
|
||||||
|
__memmove64((int8 * uniform)dst, (int8 * uniform)src, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memmove(void * varying dst, void * varying src,
|
||||||
|
int32 count) {
|
||||||
|
void * uniform da[programCount];
|
||||||
|
void * uniform sa[programCount];
|
||||||
|
|
||||||
|
da[programIndex] = dst;
|
||||||
|
sa[programIndex] = src;
|
||||||
|
|
||||||
|
uniform int mask = lanemask();
|
||||||
|
for (uniform int i = 0; i < programCount; ++i) {
|
||||||
|
if ((mask & (1 << i)) == 0)
|
||||||
|
continue;
|
||||||
|
void * uniform d = da[i], * uniform s = sa[i];
|
||||||
|
__memmove32((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memmove64(void * varying dst, void * varying src,
|
||||||
|
int64 count) {
|
||||||
|
void * uniform da[programCount];
|
||||||
|
void * uniform sa[programCount];
|
||||||
|
|
||||||
|
da[programIndex] = dst;
|
||||||
|
sa[programIndex] = src;
|
||||||
|
|
||||||
|
uniform int mask = lanemask();
|
||||||
|
for (uniform int i = 0; i < programCount; ++i) {
|
||||||
|
if ((mask & (1 << i)) == 0)
|
||||||
|
continue;
|
||||||
|
void * uniform d = da[i], * uniform s = sa[i];
|
||||||
|
__memmove64((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memset(void * uniform ptr, uniform int8 val,
|
||||||
|
uniform int32 count) {
|
||||||
|
__memset32((int8 * uniform)ptr, val, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memset64(void * uniform ptr, uniform int8 val,
|
||||||
|
uniform int64 count) {
|
||||||
|
__memset64((int8 * uniform)ptr, val, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memset(void * varying ptr, int8 val, int32 count) {
|
||||||
|
void * uniform pa[programCount];
|
||||||
|
pa[programIndex] = ptr;
|
||||||
|
|
||||||
|
uniform int mask = lanemask();
|
||||||
|
for (uniform int i = 0; i < programCount; ++i) {
|
||||||
|
if ((mask & (1 << i)) == 0)
|
||||||
|
continue;
|
||||||
|
__memset32((int8 * uniform)pa[i], extract(val, i), extract(count, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void memset64(void * varying ptr, int8 val, int64 count) {
|
||||||
|
void * uniform pa[programCount];
|
||||||
|
pa[programIndex] = ptr;
|
||||||
|
|
||||||
|
uniform int mask = lanemask();
|
||||||
|
for (uniform int i = 0; i < programCount; ++i) {
|
||||||
|
if ((mask & (1 << i)) == 0)
|
||||||
|
continue;
|
||||||
|
__memset64((int8 * uniform)pa[i], extract(val, i), extract(count, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// count leading/trailing zeros
|
// count leading/trailing zeros
|
||||||
|
|
||||||
|
|||||||
17
tests/memcpy-uniform.ispc
Normal file
17
tests/memcpy-uniform.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
int32 * uniform src = uniform new int32[1024];
|
||||||
|
int32 * uniform dst = uniform new int32[1024];
|
||||||
|
|
||||||
|
foreach (i = 0 ... 1024)
|
||||||
|
src[i] = i;
|
||||||
|
|
||||||
|
memcpy(&dst[32], src, (1024-32)*sizeof(uniform int));
|
||||||
|
RET[programIndex] = dst[64+programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 32 + programIndex;
|
||||||
|
}
|
||||||
21
tests/memcpy-varying.ispc
Normal file
21
tests/memcpy-varying.ispc
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
int32 *src = new int32[1024];
|
||||||
|
int32 *dst = new int32[1024];
|
||||||
|
|
||||||
|
for (uniform int i = 0; i < 1024; ++i)
|
||||||
|
src[i] = programIndex * 10000 + i;
|
||||||
|
|
||||||
|
if (programIndex == 2)
|
||||||
|
memcpy(dst, src, programCount*sizeof(uniform int));
|
||||||
|
else
|
||||||
|
memcpy(dst, src, programCount*sizeof(uniform int));
|
||||||
|
|
||||||
|
RET[programIndex] = dst[programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 10000 * programIndex + programIndex;
|
||||||
|
}
|
||||||
16
tests/memmove-uniform.ispc
Normal file
16
tests/memmove-uniform.ispc
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
int32 * uniform buf = uniform new int32[1024];
|
||||||
|
|
||||||
|
foreach (i = 0 ... 1024)
|
||||||
|
buf[i] = i;
|
||||||
|
|
||||||
|
memmove(&buf[1], buf, (1024-1)*sizeof(uniform int));
|
||||||
|
RET[programIndex] = buf[programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = max(0, programIndex-1);
|
||||||
|
}
|
||||||
19
tests/memmove-varying.ispc
Normal file
19
tests/memmove-varying.ispc
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
int32 *buf = new int32[1024];
|
||||||
|
|
||||||
|
for (uniform int i = 0; i < 1024; ++i)
|
||||||
|
buf[i] = programIndex * 10000 + i;
|
||||||
|
|
||||||
|
if (programIndex == 2)
|
||||||
|
memmove(buf, buf+programCount/2, programCount*sizeof(uniform int));
|
||||||
|
|
||||||
|
RET[programIndex] = buf[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 10000 * programIndex;
|
||||||
|
RET[2] = 10000 * 2 + programCount/2;
|
||||||
|
}
|
||||||
16
tests/memset-uniform.ispc
Normal file
16
tests/memset-uniform.ispc
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
int32 * uniform buf = uniform new int32[1024];
|
||||||
|
buf[0] = 0;
|
||||||
|
memset(buf+1, 0x7f, 1024*sizeof(uniform int32));
|
||||||
|
|
||||||
|
int v = buf[programIndex];
|
||||||
|
RET[programIndex] = (v == 0x7f7f7f7f);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1;
|
||||||
|
RET[0] = 0;
|
||||||
|
}
|
||||||
21
tests/memset-varying.ispc
Normal file
21
tests/memset-varying.ispc
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
int32 * varying buf = varying new int32[1024*(programIndex+1)];
|
||||||
|
|
||||||
|
if (programIndex & 1) {
|
||||||
|
memset(buf, 0xff, 1024*(programIndex+1)*sizeof(uniform int32));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
memset(buf, 0x01, 1024*(programIndex+1)*sizeof(uniform int32));
|
||||||
|
}
|
||||||
|
|
||||||
|
int v = buf[0];
|
||||||
|
int expected = (programIndex & 1) ? 0xffffffff : 0x01010101;
|
||||||
|
RET[programIndex] = (v == expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user