Add memcpy(), memmove() and memset() to the standard library.
Issue #183.
This commit is contained in:
@@ -1768,6 +1768,55 @@ define <WIDTH x i32> @__sext_varying_bool(<WIDTH x MASK>) nounwind readnone alwa
|
||||
ret <WIDTH x i32> %0')
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; memcpy/memmove/memset
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src,
|
||||
i32 %len, i32 %align, i1 %isvolatile)
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src,
|
||||
i64 %len, i32 %align, i1 %isvolatile)
|
||||
|
||||
define void @__memcpy32(i8 * %dst, i8 * %src, i32 %len) alwaysinline {
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8 * %dst, i8 * %src, i32 %len, i32 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__memcpy64(i8 * %dst, i8 * %src, i64 %len) alwaysinline {
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8 * %dst, i8 * %src, i64 %len, i32 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src,
|
||||
i32 %len, i32 %align, i1 %isvolatile)
|
||||
declare void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src,
|
||||
i64 %len, i32 %align, i1 %isvolatile)
|
||||
|
||||
define void @__memmove32(i8 * %dst, i8 * %src, i32 %len) alwaysinline {
|
||||
call void @llvm.memmove.p0i8.p0i8.i32(i8 * %dst, i8 * %src, i32 %len, i32 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__memmove64(i8 * %dst, i8 * %src, i64 %len) alwaysinline {
|
||||
call void @llvm.memmove.p0i8.p0i8.i64(i8 * %dst, i8 * %src, i64 %len, i32 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 %len, i32 %align,
|
||||
i1 %isvolatile)
|
||||
declare void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 %len, i32 %align,
|
||||
i1 %isvolatile)
|
||||
|
||||
define void @__memset32(i8 * %dst, i8 %val, i32 %len) alwaysinline {
|
||||
call void @llvm.memset.p0i8.i32(i8 * %dst, i8 %val, i32 %len, i32 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__memset64(i8 * %dst, i8 %val, i64 %len) alwaysinline {
|
||||
call void @llvm.memset.p0i8.i64(i8 * %dst, i8 %val, i64 %len, i32 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; assert
|
||||
|
||||
|
||||
@@ -134,9 +134,13 @@ Contents:
|
||||
|
||||
* `Reductions`_
|
||||
|
||||
+ `Data Conversions And Storage`_
|
||||
+ `Data Movement`_
|
||||
|
||||
* `Setting and Copying Values In Memory`_
|
||||
* `Packed Load and Store Operations`_
|
||||
|
||||
+ `Data Conversions`_
|
||||
|
||||
* `Converting Between Array-of-Structures and Structure-of-Arrays Layout`_
|
||||
* `Conversions To and From Half-Precision Floats`_
|
||||
|
||||
@@ -3387,8 +3391,52 @@ program instances into a compact output buffer is `discussed in the FAQ`_.
|
||||
.. _discussed in the FAQ: faq.html#how-can-a-gang-of-program-instances-generate-variable-amounts-of-output-efficiently
|
||||
|
||||
|
||||
Data Conversions And Storage
|
||||
----------------------------
|
||||
Data Movement
|
||||
-------------
|
||||
|
||||
Setting and Copying Values In Memory
|
||||
------------------------------------
|
||||
|
||||
There are a few functions for copying blocks of memory and initializing
|
||||
values in memory. Along the lines of the equivalently-named routines in
|
||||
the C Standard libary, ``memcpy`` copies a given number of bytes starting
|
||||
from a source location in memory to a destination locaiton, where the two
|
||||
regions of memory are guaranteed by the caller to be non-overlapping.
|
||||
Alternatively, ``memmove`` can be used to copy data if the buffers may
|
||||
overlap.
|
||||
|
||||
::
|
||||
|
||||
void memcpy(void * uniform dst, void * uniform src, uniform int32 count)
|
||||
void memmove(void * uniform dst, void * uniform src, uniform int32 count)
|
||||
void memcpy(void * varying dst, void * varying src, int32 count)
|
||||
void memmove(void * varying dst, void * varying src, int32 count)
|
||||
|
||||
Note that there are variants of these functions that take both ``uniform``
|
||||
and ``varying`` pointers.
|
||||
|
||||
To initialize values in memory, the ``memset`` routine can be used. (It
|
||||
also behaves like the function of the same name in the C Standard Library.)
|
||||
It sets the given number of bytes of memory starting at the given location
|
||||
to the value provided.
|
||||
|
||||
::
|
||||
|
||||
void memset(void * uniform ptr, uniform int8 val, uniform int32 count)
|
||||
void memset(void * varying ptr, int8 val, int32 count)
|
||||
|
||||
There are also variants of all of these functions that take 64-bit values
|
||||
for the number of bytes of memory to operate on:
|
||||
|
||||
::
|
||||
|
||||
void memcpy64(void * uniform dst, void * uniform src, uniform int64 count)
|
||||
void memcpy64(void * varying dst, void * varying src, int64 count)
|
||||
void memmove64(void * uniform dst, void * uniform src, uniform int64 count)
|
||||
void memmove64(void * varying dst, void * varying src, int64 count)
|
||||
void memset64(void * uniform ptr, uniform int8 val, uniform int64 count)
|
||||
void memset64(void * varying ptr, int8 val, int64 count)
|
||||
|
||||
|
||||
Packed Load and Store Operations
|
||||
--------------------------------
|
||||
@@ -3447,6 +3495,9 @@ of four negative values, and initializes the first four elements of
|
||||
indices where ``a[i]`` was less than zero.
|
||||
|
||||
|
||||
Data Conversions
|
||||
----------------
|
||||
|
||||
Converting Between Array-of-Structures and Structure-of-Arrays Layout
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
||||
125
stdlib.ispc
125
stdlib.ispc
@@ -335,6 +335,131 @@ static inline uniform int lanemask() {
|
||||
return __movmsk(__mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// memcpy/memmove/memset
|
||||
|
||||
static inline void memcpy(void * uniform dst, void * uniform src,
|
||||
uniform int32 count) {
|
||||
__memcpy32((int8 * uniform)dst, (int8 * uniform)src, count);
|
||||
}
|
||||
|
||||
static inline void memcpy64(void * uniform dst, void * uniform src,
|
||||
uniform int64 count) {
|
||||
__memcpy64((int8 * uniform)dst, (int8 * uniform)src, count);
|
||||
}
|
||||
|
||||
static inline void memcpy(void * varying dst, void * varying src,
|
||||
int32 count) {
|
||||
void * uniform da[programCount];
|
||||
void * uniform sa[programCount];
|
||||
|
||||
da[programIndex] = dst;
|
||||
sa[programIndex] = src;
|
||||
|
||||
uniform int mask = lanemask();
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
if ((mask & (1 << i)) == 0)
|
||||
continue;
|
||||
void * uniform d = da[i], * uniform s = sa[i];
|
||||
__memcpy32((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void memcpy64(void * varying dst, void * varying src,
|
||||
int64 count) {
|
||||
void * uniform da[programCount];
|
||||
void * uniform sa[programCount];
|
||||
|
||||
da[programIndex] = dst;
|
||||
sa[programIndex] = src;
|
||||
|
||||
uniform int mask = lanemask();
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
if ((mask & (1 << i)) == 0)
|
||||
continue;
|
||||
void * uniform d = da[i], * uniform s = sa[i];
|
||||
__memcpy64((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void memmove(void * uniform dst, void * uniform src,
|
||||
uniform int32 count) {
|
||||
__memmove32((int8 * uniform)dst, (int8 * uniform)src, count);
|
||||
}
|
||||
|
||||
static inline void memmove64(void * uniform dst, void * uniform src,
|
||||
uniform int64 count) {
|
||||
__memmove64((int8 * uniform)dst, (int8 * uniform)src, count);
|
||||
}
|
||||
|
||||
static inline void memmove(void * varying dst, void * varying src,
|
||||
int32 count) {
|
||||
void * uniform da[programCount];
|
||||
void * uniform sa[programCount];
|
||||
|
||||
da[programIndex] = dst;
|
||||
sa[programIndex] = src;
|
||||
|
||||
uniform int mask = lanemask();
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
if ((mask & (1 << i)) == 0)
|
||||
continue;
|
||||
void * uniform d = da[i], * uniform s = sa[i];
|
||||
__memmove32((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void memmove64(void * varying dst, void * varying src,
|
||||
int64 count) {
|
||||
void * uniform da[programCount];
|
||||
void * uniform sa[programCount];
|
||||
|
||||
da[programIndex] = dst;
|
||||
sa[programIndex] = src;
|
||||
|
||||
uniform int mask = lanemask();
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
if ((mask & (1 << i)) == 0)
|
||||
continue;
|
||||
void * uniform d = da[i], * uniform s = sa[i];
|
||||
__memmove64((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void memset(void * uniform ptr, uniform int8 val,
|
||||
uniform int32 count) {
|
||||
__memset32((int8 * uniform)ptr, val, count);
|
||||
}
|
||||
|
||||
static inline void memset64(void * uniform ptr, uniform int8 val,
|
||||
uniform int64 count) {
|
||||
__memset64((int8 * uniform)ptr, val, count);
|
||||
}
|
||||
|
||||
static inline void memset(void * varying ptr, int8 val, int32 count) {
|
||||
void * uniform pa[programCount];
|
||||
pa[programIndex] = ptr;
|
||||
|
||||
uniform int mask = lanemask();
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
if ((mask & (1 << i)) == 0)
|
||||
continue;
|
||||
__memset32((int8 * uniform)pa[i], extract(val, i), extract(count, i));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void memset64(void * varying ptr, int8 val, int64 count) {
|
||||
void * uniform pa[programCount];
|
||||
pa[programIndex] = ptr;
|
||||
|
||||
uniform int mask = lanemask();
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
if ((mask & (1 << i)) == 0)
|
||||
continue;
|
||||
__memset64((int8 * uniform)pa[i], extract(val, i), extract(count, i));
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// count leading/trailing zeros
|
||||
|
||||
|
||||
17
tests/memcpy-uniform.ispc
Normal file
17
tests/memcpy-uniform.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int32 * uniform src = uniform new int32[1024];
|
||||
int32 * uniform dst = uniform new int32[1024];
|
||||
|
||||
foreach (i = 0 ... 1024)
|
||||
src[i] = i;
|
||||
|
||||
memcpy(&dst[32], src, (1024-32)*sizeof(uniform int));
|
||||
RET[programIndex] = dst[64+programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 32 + programIndex;
|
||||
}
|
||||
21
tests/memcpy-varying.ispc
Normal file
21
tests/memcpy-varying.ispc
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int32 *src = new int32[1024];
|
||||
int32 *dst = new int32[1024];
|
||||
|
||||
for (uniform int i = 0; i < 1024; ++i)
|
||||
src[i] = programIndex * 10000 + i;
|
||||
|
||||
if (programIndex == 2)
|
||||
memcpy(dst, src, programCount*sizeof(uniform int));
|
||||
else
|
||||
memcpy(dst, src, programCount*sizeof(uniform int));
|
||||
|
||||
RET[programIndex] = dst[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 10000 * programIndex + programIndex;
|
||||
}
|
||||
16
tests/memmove-uniform.ispc
Normal file
16
tests/memmove-uniform.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int32 * uniform buf = uniform new int32[1024];
|
||||
|
||||
foreach (i = 0 ... 1024)
|
||||
buf[i] = i;
|
||||
|
||||
memmove(&buf[1], buf, (1024-1)*sizeof(uniform int));
|
||||
RET[programIndex] = buf[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = max(0, programIndex-1);
|
||||
}
|
||||
19
tests/memmove-varying.ispc
Normal file
19
tests/memmove-varying.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int32 *buf = new int32[1024];
|
||||
|
||||
for (uniform int i = 0; i < 1024; ++i)
|
||||
buf[i] = programIndex * 10000 + i;
|
||||
|
||||
if (programIndex == 2)
|
||||
memmove(buf, buf+programCount/2, programCount*sizeof(uniform int));
|
||||
|
||||
RET[programIndex] = buf[0];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 10000 * programIndex;
|
||||
RET[2] = 10000 * 2 + programCount/2;
|
||||
}
|
||||
16
tests/memset-uniform.ispc
Normal file
16
tests/memset-uniform.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int32 * uniform buf = uniform new int32[1024];
|
||||
buf[0] = 0;
|
||||
memset(buf+1, 0x7f, 1024*sizeof(uniform int32));
|
||||
|
||||
int v = buf[programIndex];
|
||||
RET[programIndex] = (v == 0x7f7f7f7f);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
RET[0] = 0;
|
||||
}
|
||||
21
tests/memset-varying.ispc
Normal file
21
tests/memset-varying.ispc
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int32 * varying buf = varying new int32[1024*(programIndex+1)];
|
||||
|
||||
if (programIndex & 1) {
|
||||
memset(buf, 0xff, 1024*(programIndex+1)*sizeof(uniform int32));
|
||||
}
|
||||
else {
|
||||
memset(buf, 0x01, 1024*(programIndex+1)*sizeof(uniform int32));
|
||||
}
|
||||
|
||||
int v = buf[0];
|
||||
int expected = (programIndex & 1) ? 0xffffffff : 0x01010101;
|
||||
RET[programIndex] = (v == expected);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
Reference in New Issue
Block a user