Add memcpy(), memmove() and memset() to the standard library.

Issue #183.
This commit is contained in:
Matt Pharr
2012-03-05 16:09:00 -08:00
parent c152ae3c32
commit 3b95452481
9 changed files with 338 additions and 3 deletions

View File

@@ -1768,6 +1768,55 @@ define <WIDTH x i32> @__sext_varying_bool(<WIDTH x MASK>) nounwind readnone alwa
ret <WIDTH x i32> %0')
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; memcpy/memmove/memset
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src,
i32 %len, i32 %align, i1 %isvolatile)
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src,
i64 %len, i32 %align, i1 %isvolatile)
define void @__memcpy32(i8 * %dst, i8 * %src, i32 %len) alwaysinline {
call void @llvm.memcpy.p0i8.p0i8.i32(i8 * %dst, i8 * %src, i32 %len, i32 0, i1 0)
ret void
}
define void @__memcpy64(i8 * %dst, i8 * %src, i64 %len) alwaysinline {
call void @llvm.memcpy.p0i8.p0i8.i64(i8 * %dst, i8 * %src, i64 %len, i32 0, i1 0)
ret void
}
declare void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src,
i32 %len, i32 %align, i1 %isvolatile)
declare void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src,
i64 %len, i32 %align, i1 %isvolatile)
define void @__memmove32(i8 * %dst, i8 * %src, i32 %len) alwaysinline {
call void @llvm.memmove.p0i8.p0i8.i32(i8 * %dst, i8 * %src, i32 %len, i32 0, i1 0)
ret void
}
define void @__memmove64(i8 * %dst, i8 * %src, i64 %len) alwaysinline {
call void @llvm.memmove.p0i8.p0i8.i64(i8 * %dst, i8 * %src, i64 %len, i32 0, i1 0)
ret void
}
declare void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 %len, i32 %align,
i1 %isvolatile)
declare void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 %len, i32 %align,
i1 %isvolatile)
define void @__memset32(i8 * %dst, i8 %val, i32 %len) alwaysinline {
call void @llvm.memset.p0i8.i32(i8 * %dst, i8 %val, i32 %len, i32 0, i1 0)
ret void
}
define void @__memset64(i8 * %dst, i8 %val, i64 %len) alwaysinline {
call void @llvm.memset.p0i8.i64(i8 * %dst, i8 %val, i64 %len, i32 0, i1 0)
ret void
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; assert

View File

@@ -134,9 +134,13 @@ Contents:
* `Reductions`_
+ `Data Conversions And Storage`_
+ `Data Movement`_
* `Setting and Copying Values In Memory`_
* `Packed Load and Store Operations`_
+ `Data Conversions`_
* `Converting Between Array-of-Structures and Structure-of-Arrays Layout`_
* `Conversions To and From Half-Precision Floats`_
@@ -3387,8 +3391,52 @@ program instances into a compact output buffer is `discussed in the FAQ`_.
.. _discussed in the FAQ: faq.html#how-can-a-gang-of-program-instances-generate-variable-amounts-of-output-efficiently
Data Conversions And Storage
----------------------------
Data Movement
-------------
Setting and Copying Values In Memory
------------------------------------
There are a few functions for copying blocks of memory and initializing
values in memory. Along the lines of the equivalently-named routines in
the C Standard libary, ``memcpy`` copies a given number of bytes starting
from a source location in memory to a destination locaiton, where the two
regions of memory are guaranteed by the caller to be non-overlapping.
Alternatively, ``memmove`` can be used to copy data if the buffers may
overlap.
::
void memcpy(void * uniform dst, void * uniform src, uniform int32 count)
void memmove(void * uniform dst, void * uniform src, uniform int32 count)
void memcpy(void * varying dst, void * varying src, int32 count)
void memmove(void * varying dst, void * varying src, int32 count)
Note that there are variants of these functions that take both ``uniform``
and ``varying`` pointers.
To initialize values in memory, the ``memset`` routine can be used. (It
also behaves like the function of the same name in the C Standard Library.)
It sets the given number of bytes of memory starting at the given location
to the value provided.
::
void memset(void * uniform ptr, uniform int8 val, uniform int32 count)
void memset(void * varying ptr, int8 val, int32 count)
There are also variants of all of these functions that take 64-bit values
for the number of bytes of memory to operate on:
::
void memcpy64(void * uniform dst, void * uniform src, uniform int64 count)
void memcpy64(void * varying dst, void * varying src, int64 count)
void memmove64(void * uniform dst, void * uniform src, uniform int64 count)
void memmove64(void * varying dst, void * varying src, int64 count)
void memset64(void * uniform ptr, uniform int8 val, uniform int64 count)
void memset64(void * varying ptr, int8 val, int64 count)
Packed Load and Store Operations
--------------------------------
@@ -3447,6 +3495,9 @@ of four negative values, and initializes the first four elements of
indices where ``a[i]`` was less than zero.
Data Conversions
----------------
Converting Between Array-of-Structures and Structure-of-Arrays Layout
---------------------------------------------------------------------

View File

@@ -335,6 +335,131 @@ static inline uniform int lanemask() {
return __movmsk(__mask);
}
///////////////////////////////////////////////////////////////////////////
// memcpy/memmove/memset
static inline void memcpy(void * uniform dst, void * uniform src,
uniform int32 count) {
__memcpy32((int8 * uniform)dst, (int8 * uniform)src, count);
}
static inline void memcpy64(void * uniform dst, void * uniform src,
uniform int64 count) {
__memcpy64((int8 * uniform)dst, (int8 * uniform)src, count);
}
static inline void memcpy(void * varying dst, void * varying src,
int32 count) {
void * uniform da[programCount];
void * uniform sa[programCount];
da[programIndex] = dst;
sa[programIndex] = src;
uniform int mask = lanemask();
for (uniform int i = 0; i < programCount; ++i) {
if ((mask & (1 << i)) == 0)
continue;
void * uniform d = da[i], * uniform s = sa[i];
__memcpy32((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
}
}
static inline void memcpy64(void * varying dst, void * varying src,
int64 count) {
void * uniform da[programCount];
void * uniform sa[programCount];
da[programIndex] = dst;
sa[programIndex] = src;
uniform int mask = lanemask();
for (uniform int i = 0; i < programCount; ++i) {
if ((mask & (1 << i)) == 0)
continue;
void * uniform d = da[i], * uniform s = sa[i];
__memcpy64((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
}
}
static inline void memmove(void * uniform dst, void * uniform src,
uniform int32 count) {
__memmove32((int8 * uniform)dst, (int8 * uniform)src, count);
}
static inline void memmove64(void * uniform dst, void * uniform src,
uniform int64 count) {
__memmove64((int8 * uniform)dst, (int8 * uniform)src, count);
}
static inline void memmove(void * varying dst, void * varying src,
int32 count) {
void * uniform da[programCount];
void * uniform sa[programCount];
da[programIndex] = dst;
sa[programIndex] = src;
uniform int mask = lanemask();
for (uniform int i = 0; i < programCount; ++i) {
if ((mask & (1 << i)) == 0)
continue;
void * uniform d = da[i], * uniform s = sa[i];
__memmove32((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
}
}
static inline void memmove64(void * varying dst, void * varying src,
int64 count) {
void * uniform da[programCount];
void * uniform sa[programCount];
da[programIndex] = dst;
sa[programIndex] = src;
uniform int mask = lanemask();
for (uniform int i = 0; i < programCount; ++i) {
if ((mask & (1 << i)) == 0)
continue;
void * uniform d = da[i], * uniform s = sa[i];
__memmove64((int8 * uniform)d, (int8 * uniform)s, extract(count, i));
}
}
static inline void memset(void * uniform ptr, uniform int8 val,
uniform int32 count) {
__memset32((int8 * uniform)ptr, val, count);
}
static inline void memset64(void * uniform ptr, uniform int8 val,
uniform int64 count) {
__memset64((int8 * uniform)ptr, val, count);
}
static inline void memset(void * varying ptr, int8 val, int32 count) {
void * uniform pa[programCount];
pa[programIndex] = ptr;
uniform int mask = lanemask();
for (uniform int i = 0; i < programCount; ++i) {
if ((mask & (1 << i)) == 0)
continue;
__memset32((int8 * uniform)pa[i], extract(val, i), extract(count, i));
}
}
static inline void memset64(void * varying ptr, int8 val, int64 count) {
void * uniform pa[programCount];
pa[programIndex] = ptr;
uniform int mask = lanemask();
for (uniform int i = 0; i < programCount; ++i) {
if ((mask & (1 << i)) == 0)
continue;
__memset64((int8 * uniform)pa[i], extract(val, i), extract(count, i));
}
}
///////////////////////////////////////////////////////////////////////////
// count leading/trailing zeros

17
tests/memcpy-uniform.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 * uniform src = uniform new int32[1024];
int32 * uniform dst = uniform new int32[1024];
foreach (i = 0 ... 1024)
src[i] = i;
memcpy(&dst[32], src, (1024-32)*sizeof(uniform int));
RET[programIndex] = dst[64+programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = 32 + programIndex;
}

21
tests/memcpy-varying.ispc Normal file
View File

@@ -0,0 +1,21 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 *src = new int32[1024];
int32 *dst = new int32[1024];
for (uniform int i = 0; i < 1024; ++i)
src[i] = programIndex * 10000 + i;
if (programIndex == 2)
memcpy(dst, src, programCount*sizeof(uniform int));
else
memcpy(dst, src, programCount*sizeof(uniform int));
RET[programIndex] = dst[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = 10000 * programIndex + programIndex;
}

View File

@@ -0,0 +1,16 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 * uniform buf = uniform new int32[1024];
foreach (i = 0 ... 1024)
buf[i] = i;
memmove(&buf[1], buf, (1024-1)*sizeof(uniform int));
RET[programIndex] = buf[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = max(0, programIndex-1);
}

View File

@@ -0,0 +1,19 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 *buf = new int32[1024];
for (uniform int i = 0; i < 1024; ++i)
buf[i] = programIndex * 10000 + i;
if (programIndex == 2)
memmove(buf, buf+programCount/2, programCount*sizeof(uniform int));
RET[programIndex] = buf[0];
}
export void result(uniform float RET[]) {
RET[programIndex] = 10000 * programIndex;
RET[2] = 10000 * 2 + programCount/2;
}

16
tests/memset-uniform.ispc Normal file
View File

@@ -0,0 +1,16 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 * uniform buf = uniform new int32[1024];
buf[0] = 0;
memset(buf+1, 0x7f, 1024*sizeof(uniform int32));
int v = buf[programIndex];
RET[programIndex] = (v == 0x7f7f7f7f);
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
RET[0] = 0;
}

21
tests/memset-varying.ispc Normal file
View File

@@ -0,0 +1,21 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 * varying buf = varying new int32[1024*(programIndex+1)];
if (programIndex & 1) {
memset(buf, 0xff, 1024*(programIndex+1)*sizeof(uniform int32));
}
else {
memset(buf, 0x01, 1024*(programIndex+1)*sizeof(uniform int32));
}
int v = buf[0];
int expected = (programIndex & 1) ? 0xffffffff : 0x01010101;
RET[programIndex] = (v == expected);
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}