Provide both signed and unsigned int variants of bitcode-based builtins.

When creating function Symbols for functions that were defined in LLVM bitcode for the standard library, if any of the function parameters are integer types, create two ispc-side Symbols: one where the integer types are all signed and the other where they are all unsigned.  This allows us to provide, for example, both store_to_int16(reference int a[], uniform int offset, int val) as well as store_to_int16(reference unsigned int a[], uniform int offset, unsigned int val). functions.

Added some additional tests to exercise the new variants of these.

Also fixed some cases where the __{load,store}_int{8,16} builtins would read from/write to memory even if the mask was all off (which could cause crashes in some cases.)
This commit is contained in:
Matt Pharr
2011-07-04 12:07:00 +01:00
parent fac50ba454
commit c14c3ceba6
14 changed files with 293 additions and 91 deletions

View File

@@ -557,33 +557,101 @@ define internal float @__stdlib_pow(float, float) nounwind readnone alwaysinline
;; $1: vector width of the target
define(`int8_16', `
define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset) nounwind alwaysinline {
define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %doload, label %skip
doload:
%ptr8 = bitcast [0 x i32] *%0 to i8 *
%ptr = getelementptr i8 * %ptr8, i32 %offset
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
%val = load i`'eval(8*$1) * %ptr64, align 1
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
; were assuming unsigned, so zero-extend to i32...
; unsigned, so zero-extend to i32...
%ret = zext <$1 x i8> %vval to <$1 x i32>
ret <$1 x i32> %ret
skip:
ret <$1 x i32> undef
}
define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset) nounwind alwaysinline {
define internal <$1 x i32> @__load_int8([0 x i32] *, i32 %offset,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %doload, label %skip
doload:
%ptr8 = bitcast [0 x i32] *%0 to i8 *
%ptr = getelementptr i8 * %ptr8, i32 %offset
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
%val = load i`'eval(8*$1) * %ptr64, align 1
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
; signed, so sign-extend to i32...
%ret = sext <$1 x i8> %vval to <$1 x i32>
ret <$1 x i32> %ret
skip:
ret <$1 x i32> undef
}
define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %doload, label %skip
doload:
%ptr16 = bitcast [0 x i32] *%0 to i16 *
%ptr = getelementptr i16 * %ptr16, i32 %offset
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
%val = load i`'eval(16*$1) * %ptr64, align 2
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
; unsigned, so use zero-extent...
; unsigned, so use zero-extend...
%ret = zext <$1 x i16> %vval to <$1 x i32>
ret <$1 x i32> %ret
skip:
ret <$1 x i32> undef
}
define internal void @__store_uint8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
<$1 x i32> %mask) nounwind alwaysinline {
define internal <$1 x i32> @__load_int16([0 x i32] *, i32 %offset,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %doload, label %skip
doload:
%ptr16 = bitcast [0 x i32] *%0 to i16 *
%ptr = getelementptr i16 * %ptr16, i32 %offset
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
%val = load i`'eval(16*$1) * %ptr64, align 2
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
; signed, so use sign-extend...
%ret = sext <$1 x i16> %vval to <$1 x i32>
ret <$1 x i32> %ret
skip:
ret <$1 x i32> undef
}
define internal void @__store_int8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %dostore, label %skip
dostore:
%val = trunc <$1 x i32> %val32 to <$1 x i8>
%val64 = bitcast <$1 x i8> %val to i`'eval(8*$1)
@@ -604,10 +672,18 @@ define internal void @__store_uint8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
store i`'eval(8*$1) %final, i`'eval(8*$1) * %ptr64, align 1
ret void
skip:
ret void
}
define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
<$1 x i32> %mask) nounwind alwaysinline {
define internal void @__store_int16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %dostore, label %skip
dostore:
%val = trunc <$1 x i32> %val32 to <$1 x i16>
%val64 = bitcast <$1 x i16> %val to i`'eval(16*$1)
@@ -627,6 +703,9 @@ define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32
store i`'eval(16*$1) %final, i`'eval(16*$1) * %ptr64, align 2
ret void
skip:
ret void
}
'
)