Add support for mask vectors of 8 and 16-bit element types.
There were a number of places throughout the system that assumed that the execution mask would only have either 32-bit or 1-bit elements. This commit makes it possible to have a target with an 8- or 16-bit mask.
This commit is contained in:
29
Makefile
29
Makefile
@@ -137,7 +137,7 @@ BISON_SRC=parse.yy
|
|||||||
FLEX_SRC=lex.ll
|
FLEX_SRC=lex.ll
|
||||||
|
|
||||||
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \
|
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \
|
||||||
stdlib_generic_ispc.o stdlib_x86_ispc.o \
|
stdlib_mask1_ispc.o stdlib_mask8_ispc.o stdlib_mask16_ispc.o stdlib_mask32_ispc.o \
|
||||||
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
|
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
|
||||||
|
|
||||||
default: ispc
|
default: ispc
|
||||||
@@ -243,12 +243,23 @@ objs/builtins-c-64.cpp: builtins/builtins.c
|
|||||||
@echo Creating C++ source from builtins definition file $<
|
@echo Creating C++ source from builtins definition file $<
|
||||||
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c 64 > $@
|
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c 64 > $@
|
||||||
|
|
||||||
objs/stdlib_generic_ispc.cpp: stdlib.ispc
|
objs/stdlib_mask1_ispc.cpp: stdlib.ispc
|
||||||
@echo Creating C++ source from $< for generic
|
@echo Creating C++ source from $< for mask1
|
||||||
@$(CLANG) -E -x c -DISPC_TARGET_GENERIC=1 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
@$(CLANG) -E -x c -DISPC_MASK_BITS=1 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||||
python stdlib2cpp.py generic > $@
|
python stdlib2cpp.py mask1 > $@
|
||||||
|
|
||||||
|
objs/stdlib_mask8_ispc.cpp: stdlib.ispc
|
||||||
|
@echo Creating C++ source from $< for mask8
|
||||||
|
@$(CLANG) -E -x c -DISPC_MASK_BITS=8 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||||
|
python stdlib2cpp.py mask8 > $@
|
||||||
|
|
||||||
|
objs/stdlib_mask16_ispc.cpp: stdlib.ispc
|
||||||
|
@echo Creating C++ source from $< for mask16
|
||||||
|
@$(CLANG) -E -x c -DISPC_MASK_BITS=16 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||||
|
python stdlib2cpp.py mask16 > $@
|
||||||
|
|
||||||
|
objs/stdlib_mask32_ispc.cpp: stdlib.ispc
|
||||||
|
@echo Creating C++ source from $< for mask32
|
||||||
|
@$(CLANG) -E -x c -DISPC_MASK_BITS=32 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||||
|
python stdlib2cpp.py mask32 > $@
|
||||||
|
|
||||||
objs/stdlib_x86_ispc.cpp: stdlib.ispc
|
|
||||||
@echo Creating C++ source from $< for x86
|
|
||||||
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
|
||||||
python stdlib2cpp.py x86 > $@
|
|
||||||
|
|||||||
35
builtins.cpp
35
builtins.cpp
@@ -112,10 +112,7 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
|||||||
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
||||||
|
|
||||||
// varying
|
// varying
|
||||||
if (LLVMTypes::MaskType != LLVMTypes::Int32VectorType &&
|
if (t == LLVMTypes::Int8VectorType)
|
||||||
t == LLVMTypes::MaskType)
|
|
||||||
return AtomicType::VaryingBool;
|
|
||||||
else if (t == LLVMTypes::Int8VectorType)
|
|
||||||
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
|
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
|
||||||
else if (t == LLVMTypes::Int16VectorType)
|
else if (t == LLVMTypes::Int16VectorType)
|
||||||
return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16;
|
return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16;
|
||||||
@@ -127,6 +124,8 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
|||||||
return AtomicType::VaryingDouble;
|
return AtomicType::VaryingDouble;
|
||||||
else if (t == LLVMTypes::Int64VectorType)
|
else if (t == LLVMTypes::Int64VectorType)
|
||||||
return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
|
return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
|
||||||
|
else if (t == LLVMTypes::MaskType)
|
||||||
|
return AtomicType::VaryingBool;
|
||||||
|
|
||||||
// pointers to uniform
|
// pointers to uniform
|
||||||
else if (t == LLVMTypes::Int8PointerType)
|
else if (t == LLVMTypes::Int8PointerType)
|
||||||
@@ -1038,16 +1037,30 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
// If the user wants the standard library to be included, parse the
|
// If the user wants the standard library to be included, parse the
|
||||||
// serialized version of the stdlib.ispc file to get its
|
// serialized version of the stdlib.ispc file to get its
|
||||||
// definitions added.
|
// definitions added.
|
||||||
|
extern char stdlib_mask1_code[], stdlib_mask8_code[];
|
||||||
|
extern char stdlib_mask16_code[], stdlib_mask32_code[];
|
||||||
if (g->target->getISA() == Target::GENERIC &&
|
if (g->target->getISA() == Target::GENERIC &&
|
||||||
g->target->getVectorWidth() != 1) { // 1 wide uses x86 stdlib
|
g->target->getVectorWidth() == 1) { // 1 wide uses 32 stdlib
|
||||||
extern char stdlib_generic_code[];
|
yy_scan_string(stdlib_mask32_code);
|
||||||
yy_scan_string(stdlib_generic_code);
|
|
||||||
yyparse();
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
extern char stdlib_x86_code[];
|
switch (g->target->getMaskBitCount()) {
|
||||||
yy_scan_string(stdlib_x86_code);
|
case 1:
|
||||||
yyparse();
|
yy_scan_string(stdlib_mask1_code);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
yy_scan_string(stdlib_mask8_code);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
yy_scan_string(stdlib_mask16_code);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
yy_scan_string(stdlib_mask32_code);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("Unhandled mask bit size for stdlib.ispc");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
yyparse();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
161
builtins/util.m4
161
builtins/util.m4
@@ -690,6 +690,75 @@ shuffles(i64, 8)
|
|||||||
;; $4: return type of the LLVM atomic type, in ispc naming paralance (e.g. int32)
|
;; $4: return type of the LLVM atomic type, in ispc naming paralance (e.g. int32)
|
||||||
;; $5: identity value for the operator (e.g. 0 for add, -1 for AND, ...)
|
;; $5: identity value for the operator (e.g. 0 for add, -1 for AND, ...)
|
||||||
|
|
||||||
|
define(`mask_converts', `
|
||||||
|
define internal <$1 x i8> @convertmask_i1_i8_$1(<$1 x i1>) {
|
||||||
|
%r = sext <$1 x i1> %0 to <$1 x i8>
|
||||||
|
ret <$1 x i8> %r
|
||||||
|
}
|
||||||
|
define internal <$1 x i16> @convertmask_i1_i16_$1(<$1 x i1>) {
|
||||||
|
%r = sext <$1 x i1> %0 to <$1 x i16>
|
||||||
|
ret <$1 x i16> %r
|
||||||
|
}
|
||||||
|
define internal <$1 x i32> @convertmask_i1_i32_$1(<$1 x i1>) {
|
||||||
|
%r = sext <$1 x i1> %0 to <$1 x i32>
|
||||||
|
ret <$1 x i32> %r
|
||||||
|
}
|
||||||
|
define internal <$1 x i64> @convertmask_i1_i64_$1(<$1 x i1>) {
|
||||||
|
%r = sext <$1 x i1> %0 to <$1 x i64>
|
||||||
|
ret <$1 x i64> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal <$1 x i8> @convertmask_i8_i8_$1(<$1 x i8>) {
|
||||||
|
ret <$1 x i8> %0
|
||||||
|
}
|
||||||
|
define internal <$1 x i16> @convertmask_i8_i86_$1(<$1 x i8>) {
|
||||||
|
%r = sext <$1 x i8> %0 to <$1 x i16>
|
||||||
|
ret <$1 x i16> %r
|
||||||
|
}
|
||||||
|
define internal <$1 x i32> @convertmask_i8_i32_$1(<$1 x i8>) {
|
||||||
|
%r = sext <$1 x i8> %0 to <$1 x i32>
|
||||||
|
ret <$1 x i32> %r
|
||||||
|
}
|
||||||
|
define internal <$1 x i64> @convertmask_i8_i64_$1(<$1 x i8>) {
|
||||||
|
%r = sext <$1 x i8> %0 to <$1 x i64>
|
||||||
|
ret <$1 x i64> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal <$1 x i8> @convertmask_i16_i8_$1(<$1 x i16>) {
|
||||||
|
%r = trunc <$1 x i16> %0 to <$1 x i8>
|
||||||
|
ret <$1 x i8> %r
|
||||||
|
}
|
||||||
|
define internal <$1 x i16> @convertmask_i16_i16_$1(<$1 x i16>) {
|
||||||
|
ret <$1 x i16> %0
|
||||||
|
}
|
||||||
|
define internal <$1 x i32> @convertmask_i16_i32_$1(<$1 x i16>) {
|
||||||
|
%r = sext <$1 x i16> %0 to <$1 x i32>
|
||||||
|
ret <$1 x i32> %r
|
||||||
|
}
|
||||||
|
define internal <$1 x i64> @convertmask_i16_i64_$1(<$1 x i16>) {
|
||||||
|
%r = sext <$1 x i16> %0 to <$1 x i64>
|
||||||
|
ret <$1 x i64> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal <$1 x i8> @convertmask_i32_i8_$1(<$1 x i32>) {
|
||||||
|
%r = trunc <$1 x i32> %0 to <$1 x i8>
|
||||||
|
ret <$1 x i8> %r
|
||||||
|
}
|
||||||
|
define internal <$1 x i16> @convertmask_i32_i16_$1(<$1 x i32>) {
|
||||||
|
%r = trunc <$1 x i32> %0 to <$1 x i16>
|
||||||
|
ret <$1 x i16> %r
|
||||||
|
}
|
||||||
|
define internal <$1 x i32> @convertmask_i32_i32_$1(<$1 x i32>) {
|
||||||
|
ret <$1 x i32> %0
|
||||||
|
}
|
||||||
|
define internal <$1 x i64> @convertmask_i32_i64_$1(<$1 x i32>) {
|
||||||
|
%r = sext <$1 x i32> %0 to <$1 x i64>
|
||||||
|
ret <$1 x i64> %r
|
||||||
|
}
|
||||||
|
')
|
||||||
|
|
||||||
|
mask_converts(WIDTH)
|
||||||
|
|
||||||
define(`global_atomic_associative', `
|
define(`global_atomic_associative', `
|
||||||
|
|
||||||
define <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val,
|
define <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val,
|
||||||
@@ -697,17 +766,10 @@ define <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val,
|
|||||||
; first, for any lanes where the mask is off, compute a vector where those lanes
|
; first, for any lanes where the mask is off, compute a vector where those lanes
|
||||||
; hold the identity value..
|
; hold the identity value..
|
||||||
|
|
||||||
; for the bit tricks below, we need the mask to be sign extended to be
|
; for the bit tricks below, we need the mask to have the
|
||||||
; the size of the element type.
|
; the same element size as the element type.
|
||||||
ifelse(
|
%mask = call <$1 x $3> @convertmask_`'MASK`'_$3_$1(<$1 x MASK> %m)
|
||||||
MASK,i1,`%mask = sext <$1 x MASK> %m to <$1 x $3>',
|
|
||||||
$3,i64, `%mask = sext <$1 x MASK> %m to <$1 x i64>',
|
|
||||||
$3,i32, `
|
|
||||||
; silly workaround to do %mask = %m, which is not possible directly..
|
|
||||||
%maskmem = alloca <$1 x i32>
|
|
||||||
store <$1 x i32> %m, <$1 x i32> * %maskmem
|
|
||||||
%mask = load <$1 x i32> * %maskmem'
|
|
||||||
)
|
|
||||||
; zero out any lanes that are off
|
; zero out any lanes that are off
|
||||||
%valoff = and <$1 x $3> %val, %mask
|
%valoff = and <$1 x $3> %val, %mask
|
||||||
|
|
||||||
@@ -2440,13 +2502,12 @@ define i32 @__sext_uniform_bool(i1) nounwind readnone alwaysinline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
define <WIDTH x i32> @__sext_varying_bool(<WIDTH x MASK>) nounwind readnone alwaysinline {
|
define <WIDTH x i32> @__sext_varying_bool(<WIDTH x MASK>) nounwind readnone alwaysinline {
|
||||||
ifelse(MASK,i1, `
|
ifelse(MASK,i32, `ret <WIDTH x i32> %0',
|
||||||
%se = sext <WIDTH x i1> %0 to <WIDTH x i32>
|
`%se = sext <WIDTH x MASK> %0 to <WIDTH x i32>
|
||||||
ret <WIDTH x i32> %se
|
ret <WIDTH x i32> %se')
|
||||||
', `
|
|
||||||
ret <WIDTH x i32> %0')
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; memcpy/memmove/memset
|
;; memcpy/memmove/memset
|
||||||
|
|
||||||
@@ -3201,8 +3262,8 @@ return:
|
|||||||
;; $1: llvm type of elements (and suffix for function name)
|
;; $1: llvm type of elements (and suffix for function name)
|
||||||
|
|
||||||
define(`gen_masked_store', `
|
define(`gen_masked_store', `
|
||||||
define void @__masked_store_$1(<WIDTH x $1>* nocapture, <WIDTH x $1>, <WIDTH x i32>) nounwind alwaysinline {
|
define void @__masked_store_$1(<WIDTH x $1>* nocapture, <WIDTH x $1>, <WIDTH x MASK>) nounwind alwaysinline {
|
||||||
per_lane(WIDTH, <WIDTH x i32> %2, `
|
per_lane(WIDTH, <WIDTH x MASK> %2, `
|
||||||
%ptr_LANE_ID = getelementptr <WIDTH x $1> * %0, i32 0, i32 LANE
|
%ptr_LANE_ID = getelementptr <WIDTH x $1> * %0, i32 0, i32 LANE
|
||||||
%storeval_LANE_ID = extractelement <WIDTH x $1> %1, i32 LANE
|
%storeval_LANE_ID = extractelement <WIDTH x $1> %1, i32 LANE
|
||||||
store $1 %storeval_LANE_ID, $1 * %ptr_LANE_ID')
|
store $1 %storeval_LANE_ID, $1 * %ptr_LANE_ID')
|
||||||
@@ -3378,10 +3439,10 @@ define void @__masked_store_blend_i16(<16 x i16>* nocapture, <16 x i16>,
|
|||||||
define(`packed_load_and_store', `
|
define(`packed_load_and_store', `
|
||||||
|
|
||||||
define i32 @__packed_load_active(i32 * %startptr, <WIDTH x i32> * %val_ptr,
|
define i32 @__packed_load_active(i32 * %startptr, <WIDTH x i32> * %val_ptr,
|
||||||
<WIDTH x i32> %full_mask) nounwind alwaysinline {
|
<WIDTH x MASK> %full_mask) nounwind alwaysinline {
|
||||||
entry:
|
entry:
|
||||||
%mask = call i64 @__movmsk(<WIDTH x i32> %full_mask)
|
%mask = call i64 @__movmsk(<WIDTH x MASK> %full_mask)
|
||||||
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x i32> %full_mask)
|
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %full_mask)
|
||||||
br i1 %mask_known, label %known_mask, label %unknown_mask
|
br i1 %mask_known, label %known_mask, label %unknown_mask
|
||||||
|
|
||||||
known_mask:
|
known_mask:
|
||||||
@@ -3432,10 +3493,10 @@ done:
|
|||||||
}
|
}
|
||||||
|
|
||||||
define i32 @__packed_store_active(i32 * %startptr, <WIDTH x i32> %vals,
|
define i32 @__packed_store_active(i32 * %startptr, <WIDTH x i32> %vals,
|
||||||
<WIDTH x i32> %full_mask) nounwind alwaysinline {
|
<WIDTH x MASK> %full_mask) nounwind alwaysinline {
|
||||||
entry:
|
entry:
|
||||||
%mask = call i64 @__movmsk(<WIDTH x i32> %full_mask)
|
%mask = call i64 @__movmsk(<WIDTH x MASK> %full_mask)
|
||||||
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x i32> %full_mask)
|
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %full_mask)
|
||||||
br i1 %mask_known, label %known_mask, label %unknown_mask
|
br i1 %mask_known, label %known_mask, label %unknown_mask
|
||||||
|
|
||||||
known_mask:
|
known_mask:
|
||||||
@@ -3544,10 +3605,10 @@ check_neighbors:
|
|||||||
%castvr = call <$1 x $4> @__rotate_i$6(<$1 x $4> %castvec, i32 1)
|
%castvr = call <$1 x $4> @__rotate_i$6(<$1 x $4> %castvec, i32 1)
|
||||||
%vr = bitcast <$1 x $4> %castvr to <$1 x $2>
|
%vr = bitcast <$1 x $4> %castvr to <$1 x $2>
|
||||||
%eq = $5 $7 <$1 x $2> %vec, %vr
|
%eq = $5 $7 <$1 x $2> %vec, %vr
|
||||||
ifelse(MASK,i32, `
|
ifelse(MASK,i1, `
|
||||||
%eq32 = sext <$1 x i1> %eq to <$1 x i32>
|
%eqmm = call i64 @__movmsk(<$1 x MASK> %eq)',
|
||||||
%eqmm = call i64 @__movmsk(<$1 x i32> %eq32)', `
|
`%eqm = sext <$1 x i1> %eq to <$1 x MASK>
|
||||||
%eqmm = call i64 @__movmsk(<$1 x MASK> %eq)')
|
%eqmm = call i64 @__movmsk(<$1 x MASK> %eqm)')
|
||||||
%alleq = icmp eq i64 %eqmm, ALL_ON_MASK
|
%alleq = icmp eq i64 %eqmm, ALL_ON_MASK
|
||||||
br i1 %alleq, label %all_equal, label %not_all_equal
|
br i1 %alleq, label %all_equal, label %not_all_equal
|
||||||
', `
|
', `
|
||||||
@@ -3722,9 +3783,9 @@ pl_done:
|
|||||||
define(`gen_gather_general', `
|
define(`gen_gather_general', `
|
||||||
; fully general 32-bit gather, takes array of pointers encoded as vector of i32s
|
; fully general 32-bit gather, takes array of pointers encoded as vector of i32s
|
||||||
define <WIDTH x $1> @__gather32_$1(<WIDTH x i32> %ptrs,
|
define <WIDTH x $1> @__gather32_$1(<WIDTH x i32> %ptrs,
|
||||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||||
%ret_ptr = alloca <WIDTH x $1>
|
%ret_ptr = alloca <WIDTH x $1>
|
||||||
per_lane(WIDTH, <WIDTH x i32> %vecmask, `
|
per_lane(WIDTH, <WIDTH x MASK> %vecmask, `
|
||||||
%iptr_LANE_ID = extractelement <WIDTH x i32> %ptrs, i32 LANE
|
%iptr_LANE_ID = extractelement <WIDTH x i32> %ptrs, i32 LANE
|
||||||
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $1 *
|
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $1 *
|
||||||
%val_LANE_ID = load $1 * %ptr_LANE_ID
|
%val_LANE_ID = load $1 * %ptr_LANE_ID
|
||||||
@@ -3738,9 +3799,9 @@ define <WIDTH x $1> @__gather32_$1(<WIDTH x i32> %ptrs,
|
|||||||
|
|
||||||
; fully general 64-bit gather, takes array of pointers encoded as vector of i32s
|
; fully general 64-bit gather, takes array of pointers encoded as vector of i32s
|
||||||
define <WIDTH x $1> @__gather64_$1(<WIDTH x i64> %ptrs,
|
define <WIDTH x $1> @__gather64_$1(<WIDTH x i64> %ptrs,
|
||||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||||
%ret_ptr = alloca <WIDTH x $1>
|
%ret_ptr = alloca <WIDTH x $1>
|
||||||
per_lane(WIDTH, <WIDTH x i32> %vecmask, `
|
per_lane(WIDTH, <WIDTH x MASK> %vecmask, `
|
||||||
%iptr_LANE_ID = extractelement <WIDTH x i64> %ptrs, i32 LANE
|
%iptr_LANE_ID = extractelement <WIDTH x i64> %ptrs, i32 LANE
|
||||||
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $1 *
|
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $1 *
|
||||||
%val_LANE_ID = load $1 * %ptr_LANE_ID
|
%val_LANE_ID = load $1 * %ptr_LANE_ID
|
||||||
@@ -3804,7 +3865,7 @@ define <WIDTH x $1> @__gather_elt64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %o
|
|||||||
|
|
||||||
define <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %offsets, i32 %offset_scale,
|
define <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||||
<WIDTH x i32> %offset_delta,
|
<WIDTH x i32> %offset_delta,
|
||||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||||
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
||||||
; to require that the 0th element of the array being gathered from is always
|
; to require that the 0th element of the array being gathered from is always
|
||||||
; legal to read from (and we do indeed require that, given the benefits!)
|
; legal to read from (and we do indeed require that, given the benefits!)
|
||||||
@@ -3813,13 +3874,13 @@ define <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32
|
|||||||
%offsetsPtr = alloca <WIDTH x i32>
|
%offsetsPtr = alloca <WIDTH x i32>
|
||||||
store <WIDTH x i32> zeroinitializer, <WIDTH x i32> * %offsetsPtr
|
store <WIDTH x i32> zeroinitializer, <WIDTH x i32> * %offsetsPtr
|
||||||
call void @__masked_store_blend_i32(<WIDTH x i32> * %offsetsPtr, <WIDTH x i32> %offsets,
|
call void @__masked_store_blend_i32(<WIDTH x i32> * %offsetsPtr, <WIDTH x i32> %offsets,
|
||||||
<WIDTH x i32> %vecmask)
|
<WIDTH x MASK> %vecmask)
|
||||||
%newOffsets = load <WIDTH x i32> * %offsetsPtr
|
%newOffsets = load <WIDTH x i32> * %offsetsPtr
|
||||||
|
|
||||||
%deltaPtr = alloca <WIDTH x i32>
|
%deltaPtr = alloca <WIDTH x i32>
|
||||||
store <WIDTH x i32> zeroinitializer, <WIDTH x i32> * %deltaPtr
|
store <WIDTH x i32> zeroinitializer, <WIDTH x i32> * %deltaPtr
|
||||||
call void @__masked_store_blend_i32(<WIDTH x i32> * %deltaPtr, <WIDTH x i32> %offset_delta,
|
call void @__masked_store_blend_i32(<WIDTH x i32> * %deltaPtr, <WIDTH x i32> %offset_delta,
|
||||||
<WIDTH x i32> %vecmask)
|
<WIDTH x MASK> %vecmask)
|
||||||
%newDelta = load <WIDTH x i32> * %deltaPtr
|
%newDelta = load <WIDTH x i32> * %deltaPtr
|
||||||
|
|
||||||
%ret0 = call <WIDTH x $1> @__gather_elt32_$1(i8 * %ptr, <WIDTH x i32> %newOffsets,
|
%ret0 = call <WIDTH x $1> @__gather_elt32_$1(i8 * %ptr, <WIDTH x i32> %newOffsets,
|
||||||
@@ -3835,7 +3896,7 @@ define <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32
|
|||||||
|
|
||||||
define <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_scale,
|
define <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||||
<WIDTH x i64> %offset_delta,
|
<WIDTH x i64> %offset_delta,
|
||||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||||
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
||||||
; to require that the 0th element of the array being gathered from is always
|
; to require that the 0th element of the array being gathered from is always
|
||||||
; legal to read from (and we do indeed require that, given the benefits!)
|
; legal to read from (and we do indeed require that, given the benefits!)
|
||||||
@@ -3844,13 +3905,13 @@ define <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * %ptr, <WIDTH x i64
|
|||||||
%offsetsPtr = alloca <WIDTH x i64>
|
%offsetsPtr = alloca <WIDTH x i64>
|
||||||
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %offsetsPtr
|
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %offsetsPtr
|
||||||
call void @__masked_store_blend_i64(<WIDTH x i64> * %offsetsPtr, <WIDTH x i64> %offsets,
|
call void @__masked_store_blend_i64(<WIDTH x i64> * %offsetsPtr, <WIDTH x i64> %offsets,
|
||||||
<WIDTH x i32> %vecmask)
|
<WIDTH x MASK> %vecmask)
|
||||||
%newOffsets = load <WIDTH x i64> * %offsetsPtr
|
%newOffsets = load <WIDTH x i64> * %offsetsPtr
|
||||||
|
|
||||||
%deltaPtr = alloca <WIDTH x i64>
|
%deltaPtr = alloca <WIDTH x i64>
|
||||||
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %deltaPtr
|
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %deltaPtr
|
||||||
call void @__masked_store_blend_i64(<WIDTH x i64> * %deltaPtr, <WIDTH x i64> %offset_delta,
|
call void @__masked_store_blend_i64(<WIDTH x i64> * %deltaPtr, <WIDTH x i64> %offset_delta,
|
||||||
<WIDTH x i32> %vecmask)
|
<WIDTH x MASK> %vecmask)
|
||||||
%newDelta = load <WIDTH x i64> * %deltaPtr
|
%newDelta = load <WIDTH x i64> * %deltaPtr
|
||||||
|
|
||||||
%ret0 = call <WIDTH x $1> @__gather_elt64_$1(i8 * %ptr, <WIDTH x i64> %newOffsets,
|
%ret0 = call <WIDTH x $1> @__gather_elt64_$1(i8 * %ptr, <WIDTH x i64> %newOffsets,
|
||||||
@@ -3876,27 +3937,27 @@ gen_gather_factored($1)
|
|||||||
define <WIDTH x $1>
|
define <WIDTH x $1>
|
||||||
@__gather_base_offsets32_$1(i8 * %ptr, i32 %offset_scale,
|
@__gather_base_offsets32_$1(i8 * %ptr, i32 %offset_scale,
|
||||||
<WIDTH x i32> %offsets,
|
<WIDTH x i32> %offsets,
|
||||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||||
%scale_vec = bitcast i32 %offset_scale to <1 x i32>
|
%scale_vec = bitcast i32 %offset_scale to <1 x i32>
|
||||||
%smear_scale = shufflevector <1 x i32> %scale_vec, <1 x i32> undef,
|
%smear_scale = shufflevector <1 x i32> %scale_vec, <1 x i32> undef,
|
||||||
<WIDTH x i32> < forloop(i, 1, eval(WIDTH-1), `i32 0, ') i32 0 >
|
<WIDTH x i32> < forloop(i, 1, eval(WIDTH-1), `i32 0, ') i32 0 >
|
||||||
%scaled_offsets = mul <WIDTH x i32> %smear_scale, %offsets
|
%scaled_offsets = mul <WIDTH x i32> %smear_scale, %offsets
|
||||||
%v = call <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %scaled_offsets, i32 1,
|
%v = call <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %scaled_offsets, i32 1,
|
||||||
<WIDTH x i32> zeroinitializer, <WIDTH x i32> %vecmask)
|
<WIDTH x i32> zeroinitializer, <WIDTH x MASK> %vecmask)
|
||||||
ret <WIDTH x $1> %v
|
ret <WIDTH x $1> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define <WIDTH x $1>
|
define <WIDTH x $1>
|
||||||
@__gather_base_offsets64_$1(i8 * %ptr, i32 %offset_scale,
|
@__gather_base_offsets64_$1(i8 * %ptr, i32 %offset_scale,
|
||||||
<WIDTH x i64> %offsets,
|
<WIDTH x i64> %offsets,
|
||||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||||
%scale64 = zext i32 %offset_scale to i64
|
%scale64 = zext i32 %offset_scale to i64
|
||||||
%scale_vec = bitcast i64 %scale64 to <1 x i64>
|
%scale_vec = bitcast i64 %scale64 to <1 x i64>
|
||||||
%smear_scale = shufflevector <1 x i64> %scale_vec, <1 x i64> undef,
|
%smear_scale = shufflevector <1 x i64> %scale_vec, <1 x i64> undef,
|
||||||
<WIDTH x i32> < forloop(i, 1, eval(WIDTH-1), `i32 0, ') i32 0 >
|
<WIDTH x i32> < forloop(i, 1, eval(WIDTH-1), `i32 0, ') i32 0 >
|
||||||
%scaled_offsets = mul <WIDTH x i64> %smear_scale, %offsets
|
%scaled_offsets = mul <WIDTH x i64> %smear_scale, %offsets
|
||||||
%v = call <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %scaled_offsets,
|
%v = call <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %scaled_offsets,
|
||||||
i32 1, <WIDTH x i64> zeroinitializer, <WIDTH x i32> %vecmask)
|
i32 1, <WIDTH x i64> zeroinitializer, <WIDTH x MASK> %vecmask)
|
||||||
ret <WIDTH x $1> %v
|
ret <WIDTH x $1> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3955,9 +4016,9 @@ define void @__scatter_elt64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_s
|
|||||||
|
|
||||||
define void @__scatter_factored_base_offsets32_$1(i8* %base, <WIDTH x i32> %offsets, i32 %offset_scale,
|
define void @__scatter_factored_base_offsets32_$1(i8* %base, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||||
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values,
|
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values,
|
||||||
<WIDTH x i32> %mask) nounwind alwaysinline {
|
<WIDTH x MASK> %mask) nounwind alwaysinline {
|
||||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||||
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||||
call void @__scatter_elt32_$1(i8 * %base, <WIDTH x i32> %offsets, i32 %offset_scale,
|
call void @__scatter_elt32_$1(i8 * %base, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||||
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values, i32 LANE)')
|
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values, i32 LANE)')
|
||||||
ret void
|
ret void
|
||||||
@@ -3965,9 +4026,9 @@ define void @__scatter_factored_base_offsets32_$1(i8* %base, <WIDTH x i32> %offs
|
|||||||
|
|
||||||
define void @__scatter_factored_base_offsets64_$1(i8* %base, <WIDTH x i64> %offsets, i32 %offset_scale,
|
define void @__scatter_factored_base_offsets64_$1(i8* %base, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||||
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values,
|
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values,
|
||||||
<WIDTH x i32> %mask) nounwind alwaysinline {
|
<WIDTH x MASK> %mask) nounwind alwaysinline {
|
||||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||||
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||||
call void @__scatter_elt64_$1(i8 * %base, <WIDTH x i64> %offsets, i32 %offset_scale,
|
call void @__scatter_elt64_$1(i8 * %base, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||||
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values, i32 LANE)')
|
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values, i32 LANE)')
|
||||||
ret void
|
ret void
|
||||||
@@ -3975,8 +4036,8 @@ define void @__scatter_factored_base_offsets64_$1(i8* %base, <WIDTH x i64> %offs
|
|||||||
|
|
||||||
; fully general 32-bit scatter, takes array of pointers encoded as vector of i32s
|
; fully general 32-bit scatter, takes array of pointers encoded as vector of i32s
|
||||||
define void @__scatter32_$1(<WIDTH x i32> %ptrs, <WIDTH x $1> %values,
|
define void @__scatter32_$1(<WIDTH x i32> %ptrs, <WIDTH x $1> %values,
|
||||||
<WIDTH x i32> %mask) nounwind alwaysinline {
|
<WIDTH x MASK> %mask) nounwind alwaysinline {
|
||||||
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||||
%iptr_LANE_ID = extractelement <WIDTH x i32> %ptrs, i32 LANE
|
%iptr_LANE_ID = extractelement <WIDTH x i32> %ptrs, i32 LANE
|
||||||
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $1 *
|
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $1 *
|
||||||
%val_LANE_ID = extractelement <WIDTH x $1> %values, i32 LANE
|
%val_LANE_ID = extractelement <WIDTH x $1> %values, i32 LANE
|
||||||
@@ -3987,8 +4048,8 @@ define void @__scatter32_$1(<WIDTH x i32> %ptrs, <WIDTH x $1> %values,
|
|||||||
|
|
||||||
; fully general 64-bit scatter, takes array of pointers encoded as vector of i64s
|
; fully general 64-bit scatter, takes array of pointers encoded as vector of i64s
|
||||||
define void @__scatter64_$1(<WIDTH x i64> %ptrs, <WIDTH x $1> %values,
|
define void @__scatter64_$1(<WIDTH x i64> %ptrs, <WIDTH x $1> %values,
|
||||||
<WIDTH x i32> %mask) nounwind alwaysinline {
|
<WIDTH x MASK> %mask) nounwind alwaysinline {
|
||||||
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||||
%iptr_LANE_ID = extractelement <WIDTH x i64> %ptrs, i32 LANE
|
%iptr_LANE_ID = extractelement <WIDTH x i64> %ptrs, i32 LANE
|
||||||
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $1 *
|
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $1 *
|
||||||
%val_LANE_ID = extractelement <WIDTH x $1> %values, i32 LANE
|
%val_LANE_ID = extractelement <WIDTH x $1> %values, i32 LANE
|
||||||
|
|||||||
26
ctx.cpp
26
ctx.cpp
@@ -1456,13 +1456,13 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
|
|||||||
for (unsigned int i = 0; i < at->getNumElements(); ++i) {
|
for (unsigned int i = 0; i < at->getNumElements(); ++i) {
|
||||||
llvm::Value *elt = ExtractInst(b, i);
|
llvm::Value *elt = ExtractInst(b, i);
|
||||||
llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType,
|
llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType,
|
||||||
LLVMGetName(elt, "_to_boolvec32"));
|
LLVMGetName(elt, "_to_boolvec"));
|
||||||
ret = InsertInst(ret, sext, i);
|
ret = InsertInst(ret, sext, i);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
return SExtInst(b, LLVMTypes::BoolVectorType, LLVMGetName(b, "_to_i32"));
|
return SExtInst(b, LLVMTypes::BoolVectorType, LLVMGetName(b, "_to_boolvec"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2781,6 +2781,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
|||||||
|
|
||||||
// Figure out if we need a 8, 16, 32 or 64-bit masked store.
|
// Figure out if we need a 8, 16, 32 or 64-bit masked store.
|
||||||
llvm::Function *maskedStoreFunc = NULL;
|
llvm::Function *maskedStoreFunc = NULL;
|
||||||
|
llvm::Type *llvmValueType = value->getType();
|
||||||
|
|
||||||
const PointerType *pt = CastType<PointerType>(valueType);
|
const PointerType *pt = CastType<PointerType>(valueType);
|
||||||
if (pt != NULL) {
|
if (pt != NULL) {
|
||||||
@@ -2809,8 +2810,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
|||||||
else
|
else
|
||||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
|
||||||
}
|
}
|
||||||
else if (Type::Equal(valueType, AtomicType::VaryingBool) &&
|
else if (llvmValueType == LLVMTypes::Int1VectorType) {
|
||||||
g->target->getMaskBitCount() == 1) {
|
|
||||||
llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask,
|
llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask,
|
||||||
LLVMMaskAllOn, "~mask");
|
LLVMMaskAllOn, "~mask");
|
||||||
llvm::Value *old = LoadInst(ptr);
|
llvm::Value *old = LoadInst(ptr);
|
||||||
@@ -2823,28 +2823,22 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
|||||||
StoreInst(final, ptr);
|
StoreInst(final, ptr);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else if (Type::Equal(valueType, AtomicType::VaryingDouble)) {
|
else if (llvmValueType == LLVMTypes::DoubleVectorType) {
|
||||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_double");
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_double");
|
||||||
}
|
}
|
||||||
else if (Type::Equal(valueType, AtomicType::VaryingInt64) ||
|
else if (llvmValueType == LLVMTypes::Int64VectorType) {
|
||||||
Type::Equal(valueType, AtomicType::VaryingUInt64)) {
|
|
||||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
|
||||||
}
|
}
|
||||||
else if (Type::Equal(valueType, AtomicType::VaryingFloat)) {
|
else if (llvmValueType == LLVMTypes::FloatVectorType) {
|
||||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_float");
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_float");
|
||||||
}
|
}
|
||||||
else if (Type::Equal(valueType, AtomicType::VaryingBool) ||
|
else if (llvmValueType == LLVMTypes::Int32VectorType) {
|
||||||
Type::Equal(valueType, AtomicType::VaryingInt32) ||
|
|
||||||
Type::Equal(valueType, AtomicType::VaryingUInt32) ||
|
|
||||||
CastType<EnumType>(valueType) != NULL) {
|
|
||||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
|
||||||
}
|
}
|
||||||
else if (Type::Equal(valueType, AtomicType::VaryingInt16) ||
|
else if (llvmValueType == LLVMTypes::Int16VectorType) {
|
||||||
Type::Equal(valueType, AtomicType::VaryingUInt16)) {
|
|
||||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i16");
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i16");
|
||||||
}
|
}
|
||||||
else if (Type::Equal(valueType, AtomicType::VaryingInt8) ||
|
else if (llvmValueType == LLVMTypes::Int8VectorType) {
|
||||||
Type::Equal(valueType, AtomicType::VaryingUInt8)) {
|
|
||||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i8");
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i8");
|
||||||
}
|
}
|
||||||
AssertPos(currentPos, maskedStoreFunc != NULL);
|
AssertPos(currentPos, maskedStoreFunc != NULL);
|
||||||
|
|||||||
36
expr.cpp
36
expr.cpp
@@ -6161,9 +6161,9 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
switch (fromType->basicType) {
|
switch (fromType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||||
// If we have a bool vector of i32 elements, first truncate
|
// If we have a bool vector of non-i1 elements, first
|
||||||
// down to a single bit
|
// truncate down to a single bit.
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||||
// And then do an unisgned int->float cast
|
// And then do an unisgned int->float cast
|
||||||
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int
|
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int
|
||||||
@@ -6205,8 +6205,8 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
switch (fromType->basicType) {
|
switch (fromType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||||
// truncate i32 bool vector values to i1s
|
// truncate bool vector values to i1s
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||||
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to double
|
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to double
|
||||||
exprVal, targetType, cOpName);
|
exprVal, targetType, cOpName);
|
||||||
@@ -6243,7 +6243,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
switch (fromType->basicType) {
|
switch (fromType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||||
break;
|
break;
|
||||||
@@ -6279,7 +6279,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
switch (fromType->basicType) {
|
switch (fromType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||||
break;
|
break;
|
||||||
@@ -6321,7 +6321,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
switch (fromType->basicType) {
|
switch (fromType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||||
break;
|
break;
|
||||||
@@ -6361,7 +6361,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
switch (fromType->basicType) {
|
switch (fromType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||||
break;
|
break;
|
||||||
@@ -6407,7 +6407,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
switch (fromType->basicType) {
|
switch (fromType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||||
break;
|
break;
|
||||||
@@ -6447,7 +6447,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
switch (fromType->basicType) {
|
switch (fromType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||||
break;
|
break;
|
||||||
@@ -6493,7 +6493,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
switch (fromType->basicType) {
|
switch (fromType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||||
break;
|
break;
|
||||||
@@ -6531,7 +6531,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
switch (fromType->basicType) {
|
switch (fromType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||||
break;
|
break;
|
||||||
@@ -6625,12 +6625,12 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
|
|
||||||
if (fromType->IsUniformType()) {
|
if (fromType->IsUniformType()) {
|
||||||
if (toType->IsVaryingType() &&
|
if (toType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) {
|
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType) {
|
||||||
// extend out to i32 bool values from i1 here. then we'll
|
// extend out to an bool as an i8/i16/i32 from the i1 here.
|
||||||
// turn into a vector below, the way it does for everyone
|
// Then we'll turn that into a vector below, the way it
|
||||||
// else...
|
// does for everyone else...
|
||||||
cast = ctx->SExtInst(cast, LLVMTypes::BoolVectorType->getElementType(),
|
cast = ctx->SExtInst(cast, LLVMTypes::BoolVectorType->getElementType(),
|
||||||
LLVMGetName(cast, "to_i32bool"));
|
LLVMGetName(cast, "to_i_bool"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
73
llvmutil.cpp
73
llvmutil.cpp
@@ -115,13 +115,25 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) {
|
|||||||
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
||||||
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
|
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
|
||||||
|
|
||||||
if (target.getMaskBitCount() == 1)
|
switch (target.getMaskBitCount()) {
|
||||||
|
case 1:
|
||||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||||
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.getVectorWidth());
|
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.getVectorWidth());
|
||||||
else {
|
break;
|
||||||
Assert(target.getMaskBitCount() == 32);
|
case 8:
|
||||||
|
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||||
|
llvm::VectorType::get(llvm::Type::getInt8Ty(*ctx), target.getVectorWidth());
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||||
|
llvm::VectorType::get(llvm::Type::getInt16Ty(*ctx), target.getVectorWidth());
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||||
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.getVectorWidth());
|
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.getVectorWidth());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("Unhandled mask width for initializing MaskType");
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMTypes::Int1VectorType =
|
LLVMTypes::Int1VectorType =
|
||||||
@@ -154,12 +166,26 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) {
|
|||||||
|
|
||||||
std::vector<llvm::Constant *> maskOnes;
|
std::vector<llvm::Constant *> maskOnes;
|
||||||
llvm::Constant *onMask = NULL;
|
llvm::Constant *onMask = NULL;
|
||||||
if (target.getMaskBitCount() == 1)
|
switch (target.getMaskBitCount()) {
|
||||||
|
case 1:
|
||||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 1,
|
onMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 1,
|
||||||
false /*unsigned*/); // 0x1
|
false /*unsigned*/); // 0x1
|
||||||
else
|
break;
|
||||||
|
case 8:
|
||||||
|
onMask = llvm::ConstantInt::get(llvm::Type::getInt8Ty(*ctx), -1,
|
||||||
|
true /*signed*/); // 0xff
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
onMask = llvm::ConstantInt::get(llvm::Type::getInt16Ty(*ctx), -1,
|
||||||
|
true /*signed*/); // 0xffff
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
|
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
|
||||||
true /*signed*/); // 0xffffffff
|
true /*signed*/); // 0xffffffff
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("Unhandled mask width for onMask");
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < target.getVectorWidth(); ++i)
|
for (int i = 0; i < target.getVectorWidth(); ++i)
|
||||||
maskOnes.push_back(onMask);
|
maskOnes.push_back(onMask);
|
||||||
@@ -167,13 +193,26 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) {
|
|||||||
|
|
||||||
std::vector<llvm::Constant *> maskZeros;
|
std::vector<llvm::Constant *> maskZeros;
|
||||||
llvm::Constant *offMask = NULL;
|
llvm::Constant *offMask = NULL;
|
||||||
if (target.getMaskBitCount() == 1)
|
switch (target.getMaskBitCount()) {
|
||||||
|
case 1:
|
||||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 0,
|
offMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 0,
|
||||||
true /*signed*/);
|
true /*signed*/);
|
||||||
else
|
break;
|
||||||
|
case 8:
|
||||||
|
offMask = llvm::ConstantInt::get(llvm::Type::getInt8Ty(*ctx), 0,
|
||||||
|
true /*signed*/);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
offMask = llvm::ConstantInt::get(llvm::Type::getInt16Ty(*ctx), 0,
|
||||||
|
true /*signed*/);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
|
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
|
||||||
true /*signed*/);
|
true /*signed*/);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("Unhandled mask width for offMask");
|
||||||
|
}
|
||||||
for (int i = 0; i < target.getVectorWidth(); ++i)
|
for (int i = 0; i < target.getVectorWidth(); ++i)
|
||||||
maskZeros.push_back(offMask);
|
maskZeros.push_back(offMask);
|
||||||
LLVMMaskAllOff = llvm::ConstantVector::get(maskZeros);
|
LLVMMaskAllOff = llvm::ConstantVector::get(maskZeros);
|
||||||
@@ -444,9 +483,14 @@ LLVMBoolVector(bool b) {
|
|||||||
if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||||
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, b ? 0xffffffff : 0,
|
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, b ? 0xffffffff : 0,
|
||||||
false /*unsigned*/);
|
false /*unsigned*/);
|
||||||
|
else if (LLVMTypes::BoolVectorType == LLVMTypes::Int16VectorType)
|
||||||
|
v = llvm::ConstantInt::get(LLVMTypes::Int16Type, b ? 0xffff : 0,
|
||||||
|
false /*unsigned*/);
|
||||||
|
else if (LLVMTypes::BoolVectorType == LLVMTypes::Int8VectorType)
|
||||||
|
v = llvm::ConstantInt::get(LLVMTypes::Int8Type, b ? 0xff : 0,
|
||||||
|
false /*unsigned*/);
|
||||||
else {
|
else {
|
||||||
Assert(LLVMTypes::BoolVectorType->getElementType() ==
|
Assert(LLVMTypes::BoolVectorType == LLVMTypes::Int1VectorType);
|
||||||
llvm::Type::getInt1Ty(*g->ctx));
|
|
||||||
v = b ? LLVMTrue : LLVMFalse;
|
v = b ? LLVMTrue : LLVMFalse;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -465,9 +509,14 @@ LLVMBoolVector(const bool *bvec) {
|
|||||||
if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||||
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, bvec[i] ? 0xffffffff : 0,
|
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, bvec[i] ? 0xffffffff : 0,
|
||||||
false /*unsigned*/);
|
false /*unsigned*/);
|
||||||
|
else if (LLVMTypes::BoolVectorType == LLVMTypes::Int16VectorType)
|
||||||
|
v = llvm::ConstantInt::get(LLVMTypes::Int16Type, bvec[i] ? 0xffff : 0,
|
||||||
|
false /*unsigned*/);
|
||||||
|
else if (LLVMTypes::BoolVectorType == LLVMTypes::Int8VectorType)
|
||||||
|
v = llvm::ConstantInt::get(LLVMTypes::Int8Type, bvec[i] ? 0xff : 0,
|
||||||
|
false /*unsigned*/);
|
||||||
else {
|
else {
|
||||||
Assert(LLVMTypes::BoolVectorType->getElementType() ==
|
Assert(LLVMTypes::BoolVectorType == LLVMTypes::Int1VectorType);
|
||||||
llvm::Type::getInt1Ty(*g->ctx));
|
|
||||||
v = bvec[i] ? LLVMTrue : LLVMFalse;
|
v = bvec[i] ? LLVMTrue : LLVMFalse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
20
parse.yy
20
parse.yy
@@ -2148,8 +2148,24 @@ lAddFunctionParams(Declarator *decl) {
|
|||||||
|
|
||||||
/** Add a symbol for the built-in mask variable to the symbol table */
|
/** Add a symbol for the built-in mask variable to the symbol table */
|
||||||
static void lAddMaskToSymbolTable(SourcePos pos) {
|
static void lAddMaskToSymbolTable(SourcePos pos) {
|
||||||
const Type *t = g->target->getMaskBitCount() == 1 ?
|
const Type *t;
|
||||||
AtomicType::VaryingBool : AtomicType::VaryingUInt32;
|
switch (g->target->getMaskBitCount()) {
|
||||||
|
case 1:
|
||||||
|
t = AtomicType::VaryingBool;
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
t = AtomicType::VaryingUInt8;
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
t = AtomicType::VaryingUInt16;
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
t = AtomicType::VaryingUInt32;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("Unhandled mask bitsize in lAddMaskToSymbolTable");
|
||||||
|
}
|
||||||
|
|
||||||
t = t->GetAsConstType();
|
t = t->GetAsConstType();
|
||||||
Symbol *maskSymbol = new Symbol("__mask", pos, t);
|
Symbol *maskSymbol = new Symbol("__mask", pos, t);
|
||||||
m->symbolTable->AddVariable(maskSymbol);
|
m->symbolTable->AddVariable(maskSymbol);
|
||||||
|
|||||||
37
stdlib.ispc
37
stdlib.ispc
@@ -38,12 +38,20 @@
|
|||||||
ispc code
|
ispc code
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef ISPC_TARGET_GENERIC
|
#if (ISPC_MASK_BITS == 1)
|
||||||
#define IntMaskType bool
|
#define IntMaskType bool
|
||||||
#define UIntMaskType bool
|
#define UIntMaskType bool
|
||||||
|
#elif (ISPC_MASK_BITS == 8)
|
||||||
|
#define IntMaskType int8
|
||||||
|
#define UIntMaskType unsigned int8
|
||||||
|
#elif (ISPC_MASK_BITS == 16)
|
||||||
|
#define IntMaskType int16
|
||||||
|
#define UIntMaskType unsigned int16
|
||||||
|
#elif (ISPC_MASK_BITS == 32)
|
||||||
|
#define IntMaskType int32
|
||||||
|
#define UIntMaskType unsigned int32
|
||||||
#else
|
#else
|
||||||
#define IntMaskType int32
|
#error Unknown value of ISPC_MASK_BITS
|
||||||
#define UIntMaskType unsigned int32
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -335,14 +343,15 @@ static inline int32 sign_extend(bool v) {
|
|||||||
return __sext_varying_bool(v);
|
return __sext_varying_bool(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__declspec(safe)
|
__declspec(safe)
|
||||||
static inline uniform bool any(bool v) {
|
static inline uniform bool any(bool v) {
|
||||||
// We only care about whether "any" is true for the active program instances,
|
// We only care about whether "any" is true for the active program instances,
|
||||||
// so we have to make v with the current program mask.
|
// so we have to make v with the current program mask.
|
||||||
#ifdef ISPC_TARGET_GENERIC
|
#if (ISPC_MASK_BITS == 1)
|
||||||
return __any(v & __mask);
|
return __any(v & __mask);
|
||||||
#else
|
#else
|
||||||
return __any(__sext_varying_bool(v) & __mask);
|
return __any((UIntMaskType)__sext_varying_bool(v) & __mask);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -350,11 +359,10 @@ __declspec(safe)
|
|||||||
static inline uniform bool all(bool v) {
|
static inline uniform bool all(bool v) {
|
||||||
// As with any(), we need to explicitly mask v with the current program mask
|
// As with any(), we need to explicitly mask v with the current program mask
|
||||||
// so we're only looking at the current lanes
|
// so we're only looking at the current lanes
|
||||||
|
#if (ISPC_MASK_BITS == 1)
|
||||||
#ifdef ISPC_TARGET_GENERIC
|
|
||||||
return __all(v | !__mask);
|
return __all(v | !__mask);
|
||||||
#else
|
#else
|
||||||
return __all(__sext_varying_bool(v) | !__mask);
|
return __all((UIntMaskType)__sext_varying_bool(v) | !__mask);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -362,11 +370,10 @@ __declspec(safe)
|
|||||||
static inline uniform bool none(bool v) {
|
static inline uniform bool none(bool v) {
|
||||||
// As with any(), we need to explicitly mask v with the current program mask
|
// As with any(), we need to explicitly mask v with the current program mask
|
||||||
// so we're only looking at the current lanes
|
// so we're only looking at the current lanes
|
||||||
|
#if (ISPC_MASK_BITS == 1)
|
||||||
#ifdef ISPC_TARGET_GENERIC
|
|
||||||
return __none(v & __mask);
|
return __none(v & __mask);
|
||||||
#else
|
#else
|
||||||
return __none(__sext_varying_bool(v) & __mask);
|
return __none((UIntMaskType)__sext_varying_bool(v) & __mask);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -399,10 +406,10 @@ static inline int popcnt(int64 v) {
|
|||||||
__declspec(safe)
|
__declspec(safe)
|
||||||
static inline uniform int popcnt(bool v) {
|
static inline uniform int popcnt(bool v) {
|
||||||
// As with any() and all(), only count across the active lanes
|
// As with any() and all(), only count across the active lanes
|
||||||
#ifdef ISPC_TARGET_GENERIC
|
#if (ISPC_MASK_BITS == 1)
|
||||||
return __popcnt_int64(__movmsk(v & __mask));
|
return __popcnt_int64(__movmsk(v & __mask));
|
||||||
#else
|
#else
|
||||||
return __popcnt_int64(__movmsk(__sext_varying_bool(v) & __mask));
|
return __popcnt_int64(__movmsk((UIntMaskType)__sext_varying_bool(v) & __mask));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user