Add support for mask vectors of 8 and 16-bit element types.
There were a number of places throughout the system that assumed that the execution mask would only have either 32-bit or 1-bit elements. This commit makes it possible to have a target with an 8- or 16-bit mask.
This commit is contained in:
29
Makefile
29
Makefile
@@ -137,7 +137,7 @@ BISON_SRC=parse.yy
|
||||
FLEX_SRC=lex.ll
|
||||
|
||||
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \
|
||||
stdlib_generic_ispc.o stdlib_x86_ispc.o \
|
||||
stdlib_mask1_ispc.o stdlib_mask8_ispc.o stdlib_mask16_ispc.o stdlib_mask32_ispc.o \
|
||||
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
|
||||
|
||||
default: ispc
|
||||
@@ -243,12 +243,23 @@ objs/builtins-c-64.cpp: builtins/builtins.c
|
||||
@echo Creating C++ source from builtins definition file $<
|
||||
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c 64 > $@
|
||||
|
||||
objs/stdlib_generic_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $< for generic
|
||||
@$(CLANG) -E -x c -DISPC_TARGET_GENERIC=1 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||
python stdlib2cpp.py generic > $@
|
||||
objs/stdlib_mask1_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $< for mask1
|
||||
@$(CLANG) -E -x c -DISPC_MASK_BITS=1 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||
python stdlib2cpp.py mask1 > $@
|
||||
|
||||
objs/stdlib_mask8_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $< for mask8
|
||||
@$(CLANG) -E -x c -DISPC_MASK_BITS=8 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||
python stdlib2cpp.py mask8 > $@
|
||||
|
||||
objs/stdlib_mask16_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $< for mask16
|
||||
@$(CLANG) -E -x c -DISPC_MASK_BITS=16 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||
python stdlib2cpp.py mask16 > $@
|
||||
|
||||
objs/stdlib_mask32_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $< for mask32
|
||||
@$(CLANG) -E -x c -DISPC_MASK_BITS=32 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||
python stdlib2cpp.py mask32 > $@
|
||||
|
||||
objs/stdlib_x86_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $< for x86
|
||||
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||
python stdlib2cpp.py x86 > $@
|
||||
|
||||
35
builtins.cpp
35
builtins.cpp
@@ -112,10 +112,7 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
||||
|
||||
// varying
|
||||
if (LLVMTypes::MaskType != LLVMTypes::Int32VectorType &&
|
||||
t == LLVMTypes::MaskType)
|
||||
return AtomicType::VaryingBool;
|
||||
else if (t == LLVMTypes::Int8VectorType)
|
||||
if (t == LLVMTypes::Int8VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
|
||||
else if (t == LLVMTypes::Int16VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16;
|
||||
@@ -127,6 +124,8 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
return AtomicType::VaryingDouble;
|
||||
else if (t == LLVMTypes::Int64VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
|
||||
else if (t == LLVMTypes::MaskType)
|
||||
return AtomicType::VaryingBool;
|
||||
|
||||
// pointers to uniform
|
||||
else if (t == LLVMTypes::Int8PointerType)
|
||||
@@ -1038,16 +1037,30 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
// If the user wants the standard library to be included, parse the
|
||||
// serialized version of the stdlib.ispc file to get its
|
||||
// definitions added.
|
||||
extern char stdlib_mask1_code[], stdlib_mask8_code[];
|
||||
extern char stdlib_mask16_code[], stdlib_mask32_code[];
|
||||
if (g->target->getISA() == Target::GENERIC &&
|
||||
g->target->getVectorWidth() != 1) { // 1 wide uses x86 stdlib
|
||||
extern char stdlib_generic_code[];
|
||||
yy_scan_string(stdlib_generic_code);
|
||||
yyparse();
|
||||
g->target->getVectorWidth() == 1) { // 1 wide uses 32 stdlib
|
||||
yy_scan_string(stdlib_mask32_code);
|
||||
}
|
||||
else {
|
||||
extern char stdlib_x86_code[];
|
||||
yy_scan_string(stdlib_x86_code);
|
||||
switch (g->target->getMaskBitCount()) {
|
||||
case 1:
|
||||
yy_scan_string(stdlib_mask1_code);
|
||||
break;
|
||||
case 8:
|
||||
yy_scan_string(stdlib_mask8_code);
|
||||
break;
|
||||
case 16:
|
||||
yy_scan_string(stdlib_mask16_code);
|
||||
break;
|
||||
case 32:
|
||||
yy_scan_string(stdlib_mask32_code);
|
||||
break;
|
||||
default:
|
||||
FATAL("Unhandled mask bit size for stdlib.ispc");
|
||||
}
|
||||
}
|
||||
yyparse();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
161
builtins/util.m4
161
builtins/util.m4
@@ -690,6 +690,75 @@ shuffles(i64, 8)
|
||||
;; $4: return type of the LLVM atomic type, in ispc naming paralance (e.g. int32)
|
||||
;; $5: identity value for the operator (e.g. 0 for add, -1 for AND, ...)
|
||||
|
||||
define(`mask_converts', `
|
||||
define internal <$1 x i8> @convertmask_i1_i8_$1(<$1 x i1>) {
|
||||
%r = sext <$1 x i1> %0 to <$1 x i8>
|
||||
ret <$1 x i8> %r
|
||||
}
|
||||
define internal <$1 x i16> @convertmask_i1_i16_$1(<$1 x i1>) {
|
||||
%r = sext <$1 x i1> %0 to <$1 x i16>
|
||||
ret <$1 x i16> %r
|
||||
}
|
||||
define internal <$1 x i32> @convertmask_i1_i32_$1(<$1 x i1>) {
|
||||
%r = sext <$1 x i1> %0 to <$1 x i32>
|
||||
ret <$1 x i32> %r
|
||||
}
|
||||
define internal <$1 x i64> @convertmask_i1_i64_$1(<$1 x i1>) {
|
||||
%r = sext <$1 x i1> %0 to <$1 x i64>
|
||||
ret <$1 x i64> %r
|
||||
}
|
||||
|
||||
define internal <$1 x i8> @convertmask_i8_i8_$1(<$1 x i8>) {
|
||||
ret <$1 x i8> %0
|
||||
}
|
||||
define internal <$1 x i16> @convertmask_i8_i86_$1(<$1 x i8>) {
|
||||
%r = sext <$1 x i8> %0 to <$1 x i16>
|
||||
ret <$1 x i16> %r
|
||||
}
|
||||
define internal <$1 x i32> @convertmask_i8_i32_$1(<$1 x i8>) {
|
||||
%r = sext <$1 x i8> %0 to <$1 x i32>
|
||||
ret <$1 x i32> %r
|
||||
}
|
||||
define internal <$1 x i64> @convertmask_i8_i64_$1(<$1 x i8>) {
|
||||
%r = sext <$1 x i8> %0 to <$1 x i64>
|
||||
ret <$1 x i64> %r
|
||||
}
|
||||
|
||||
define internal <$1 x i8> @convertmask_i16_i8_$1(<$1 x i16>) {
|
||||
%r = trunc <$1 x i16> %0 to <$1 x i8>
|
||||
ret <$1 x i8> %r
|
||||
}
|
||||
define internal <$1 x i16> @convertmask_i16_i16_$1(<$1 x i16>) {
|
||||
ret <$1 x i16> %0
|
||||
}
|
||||
define internal <$1 x i32> @convertmask_i16_i32_$1(<$1 x i16>) {
|
||||
%r = sext <$1 x i16> %0 to <$1 x i32>
|
||||
ret <$1 x i32> %r
|
||||
}
|
||||
define internal <$1 x i64> @convertmask_i16_i64_$1(<$1 x i16>) {
|
||||
%r = sext <$1 x i16> %0 to <$1 x i64>
|
||||
ret <$1 x i64> %r
|
||||
}
|
||||
|
||||
define internal <$1 x i8> @convertmask_i32_i8_$1(<$1 x i32>) {
|
||||
%r = trunc <$1 x i32> %0 to <$1 x i8>
|
||||
ret <$1 x i8> %r
|
||||
}
|
||||
define internal <$1 x i16> @convertmask_i32_i16_$1(<$1 x i32>) {
|
||||
%r = trunc <$1 x i32> %0 to <$1 x i16>
|
||||
ret <$1 x i16> %r
|
||||
}
|
||||
define internal <$1 x i32> @convertmask_i32_i32_$1(<$1 x i32>) {
|
||||
ret <$1 x i32> %0
|
||||
}
|
||||
define internal <$1 x i64> @convertmask_i32_i64_$1(<$1 x i32>) {
|
||||
%r = sext <$1 x i32> %0 to <$1 x i64>
|
||||
ret <$1 x i64> %r
|
||||
}
|
||||
')
|
||||
|
||||
mask_converts(WIDTH)
|
||||
|
||||
define(`global_atomic_associative', `
|
||||
|
||||
define <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val,
|
||||
@@ -697,17 +766,10 @@ define <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val,
|
||||
; first, for any lanes where the mask is off, compute a vector where those lanes
|
||||
; hold the identity value..
|
||||
|
||||
; for the bit tricks below, we need the mask to be sign extended to be
|
||||
; the size of the element type.
|
||||
ifelse(
|
||||
MASK,i1,`%mask = sext <$1 x MASK> %m to <$1 x $3>',
|
||||
$3,i64, `%mask = sext <$1 x MASK> %m to <$1 x i64>',
|
||||
$3,i32, `
|
||||
; silly workaround to do %mask = %m, which is not possible directly..
|
||||
%maskmem = alloca <$1 x i32>
|
||||
store <$1 x i32> %m, <$1 x i32> * %maskmem
|
||||
%mask = load <$1 x i32> * %maskmem'
|
||||
)
|
||||
; for the bit tricks below, we need the mask to have the
|
||||
; the same element size as the element type.
|
||||
%mask = call <$1 x $3> @convertmask_`'MASK`'_$3_$1(<$1 x MASK> %m)
|
||||
|
||||
; zero out any lanes that are off
|
||||
%valoff = and <$1 x $3> %val, %mask
|
||||
|
||||
@@ -2440,13 +2502,12 @@ define i32 @__sext_uniform_bool(i1) nounwind readnone alwaysinline {
|
||||
}
|
||||
|
||||
define <WIDTH x i32> @__sext_varying_bool(<WIDTH x MASK>) nounwind readnone alwaysinline {
|
||||
ifelse(MASK,i1, `
|
||||
%se = sext <WIDTH x i1> %0 to <WIDTH x i32>
|
||||
ret <WIDTH x i32> %se
|
||||
', `
|
||||
ret <WIDTH x i32> %0')
|
||||
ifelse(MASK,i32, `ret <WIDTH x i32> %0',
|
||||
`%se = sext <WIDTH x MASK> %0 to <WIDTH x i32>
|
||||
ret <WIDTH x i32> %se')
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; memcpy/memmove/memset
|
||||
|
||||
@@ -3201,8 +3262,8 @@ return:
|
||||
;; $1: llvm type of elements (and suffix for function name)
|
||||
|
||||
define(`gen_masked_store', `
|
||||
define void @__masked_store_$1(<WIDTH x $1>* nocapture, <WIDTH x $1>, <WIDTH x i32>) nounwind alwaysinline {
|
||||
per_lane(WIDTH, <WIDTH x i32> %2, `
|
||||
define void @__masked_store_$1(<WIDTH x $1>* nocapture, <WIDTH x $1>, <WIDTH x MASK>) nounwind alwaysinline {
|
||||
per_lane(WIDTH, <WIDTH x MASK> %2, `
|
||||
%ptr_LANE_ID = getelementptr <WIDTH x $1> * %0, i32 0, i32 LANE
|
||||
%storeval_LANE_ID = extractelement <WIDTH x $1> %1, i32 LANE
|
||||
store $1 %storeval_LANE_ID, $1 * %ptr_LANE_ID')
|
||||
@@ -3378,10 +3439,10 @@ define void @__masked_store_blend_i16(<16 x i16>* nocapture, <16 x i16>,
|
||||
define(`packed_load_and_store', `
|
||||
|
||||
define i32 @__packed_load_active(i32 * %startptr, <WIDTH x i32> * %val_ptr,
|
||||
<WIDTH x i32> %full_mask) nounwind alwaysinline {
|
||||
<WIDTH x MASK> %full_mask) nounwind alwaysinline {
|
||||
entry:
|
||||
%mask = call i64 @__movmsk(<WIDTH x i32> %full_mask)
|
||||
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x i32> %full_mask)
|
||||
%mask = call i64 @__movmsk(<WIDTH x MASK> %full_mask)
|
||||
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %full_mask)
|
||||
br i1 %mask_known, label %known_mask, label %unknown_mask
|
||||
|
||||
known_mask:
|
||||
@@ -3432,10 +3493,10 @@ done:
|
||||
}
|
||||
|
||||
define i32 @__packed_store_active(i32 * %startptr, <WIDTH x i32> %vals,
|
||||
<WIDTH x i32> %full_mask) nounwind alwaysinline {
|
||||
<WIDTH x MASK> %full_mask) nounwind alwaysinline {
|
||||
entry:
|
||||
%mask = call i64 @__movmsk(<WIDTH x i32> %full_mask)
|
||||
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x i32> %full_mask)
|
||||
%mask = call i64 @__movmsk(<WIDTH x MASK> %full_mask)
|
||||
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %full_mask)
|
||||
br i1 %mask_known, label %known_mask, label %unknown_mask
|
||||
|
||||
known_mask:
|
||||
@@ -3544,10 +3605,10 @@ check_neighbors:
|
||||
%castvr = call <$1 x $4> @__rotate_i$6(<$1 x $4> %castvec, i32 1)
|
||||
%vr = bitcast <$1 x $4> %castvr to <$1 x $2>
|
||||
%eq = $5 $7 <$1 x $2> %vec, %vr
|
||||
ifelse(MASK,i32, `
|
||||
%eq32 = sext <$1 x i1> %eq to <$1 x i32>
|
||||
%eqmm = call i64 @__movmsk(<$1 x i32> %eq32)', `
|
||||
%eqmm = call i64 @__movmsk(<$1 x MASK> %eq)')
|
||||
ifelse(MASK,i1, `
|
||||
%eqmm = call i64 @__movmsk(<$1 x MASK> %eq)',
|
||||
`%eqm = sext <$1 x i1> %eq to <$1 x MASK>
|
||||
%eqmm = call i64 @__movmsk(<$1 x MASK> %eqm)')
|
||||
%alleq = icmp eq i64 %eqmm, ALL_ON_MASK
|
||||
br i1 %alleq, label %all_equal, label %not_all_equal
|
||||
', `
|
||||
@@ -3722,9 +3783,9 @@ pl_done:
|
||||
define(`gen_gather_general', `
|
||||
; fully general 32-bit gather, takes array of pointers encoded as vector of i32s
|
||||
define <WIDTH x $1> @__gather32_$1(<WIDTH x i32> %ptrs,
|
||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||
%ret_ptr = alloca <WIDTH x $1>
|
||||
per_lane(WIDTH, <WIDTH x i32> %vecmask, `
|
||||
per_lane(WIDTH, <WIDTH x MASK> %vecmask, `
|
||||
%iptr_LANE_ID = extractelement <WIDTH x i32> %ptrs, i32 LANE
|
||||
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $1 *
|
||||
%val_LANE_ID = load $1 * %ptr_LANE_ID
|
||||
@@ -3738,9 +3799,9 @@ define <WIDTH x $1> @__gather32_$1(<WIDTH x i32> %ptrs,
|
||||
|
||||
; fully general 64-bit gather, takes array of pointers encoded as vector of i32s
|
||||
define <WIDTH x $1> @__gather64_$1(<WIDTH x i64> %ptrs,
|
||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||
%ret_ptr = alloca <WIDTH x $1>
|
||||
per_lane(WIDTH, <WIDTH x i32> %vecmask, `
|
||||
per_lane(WIDTH, <WIDTH x MASK> %vecmask, `
|
||||
%iptr_LANE_ID = extractelement <WIDTH x i64> %ptrs, i32 LANE
|
||||
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $1 *
|
||||
%val_LANE_ID = load $1 * %ptr_LANE_ID
|
||||
@@ -3804,7 +3865,7 @@ define <WIDTH x $1> @__gather_elt64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %o
|
||||
|
||||
define <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||
<WIDTH x i32> %offset_delta,
|
||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
||||
; to require that the 0th element of the array being gathered from is always
|
||||
; legal to read from (and we do indeed require that, given the benefits!)
|
||||
@@ -3813,13 +3874,13 @@ define <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32
|
||||
%offsetsPtr = alloca <WIDTH x i32>
|
||||
store <WIDTH x i32> zeroinitializer, <WIDTH x i32> * %offsetsPtr
|
||||
call void @__masked_store_blend_i32(<WIDTH x i32> * %offsetsPtr, <WIDTH x i32> %offsets,
|
||||
<WIDTH x i32> %vecmask)
|
||||
<WIDTH x MASK> %vecmask)
|
||||
%newOffsets = load <WIDTH x i32> * %offsetsPtr
|
||||
|
||||
%deltaPtr = alloca <WIDTH x i32>
|
||||
store <WIDTH x i32> zeroinitializer, <WIDTH x i32> * %deltaPtr
|
||||
call void @__masked_store_blend_i32(<WIDTH x i32> * %deltaPtr, <WIDTH x i32> %offset_delta,
|
||||
<WIDTH x i32> %vecmask)
|
||||
<WIDTH x MASK> %vecmask)
|
||||
%newDelta = load <WIDTH x i32> * %deltaPtr
|
||||
|
||||
%ret0 = call <WIDTH x $1> @__gather_elt32_$1(i8 * %ptr, <WIDTH x i32> %newOffsets,
|
||||
@@ -3835,7 +3896,7 @@ define <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32
|
||||
|
||||
define <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||
<WIDTH x i64> %offset_delta,
|
||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
||||
; to require that the 0th element of the array being gathered from is always
|
||||
; legal to read from (and we do indeed require that, given the benefits!)
|
||||
@@ -3844,13 +3905,13 @@ define <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * %ptr, <WIDTH x i64
|
||||
%offsetsPtr = alloca <WIDTH x i64>
|
||||
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %offsetsPtr
|
||||
call void @__masked_store_blend_i64(<WIDTH x i64> * %offsetsPtr, <WIDTH x i64> %offsets,
|
||||
<WIDTH x i32> %vecmask)
|
||||
<WIDTH x MASK> %vecmask)
|
||||
%newOffsets = load <WIDTH x i64> * %offsetsPtr
|
||||
|
||||
%deltaPtr = alloca <WIDTH x i64>
|
||||
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %deltaPtr
|
||||
call void @__masked_store_blend_i64(<WIDTH x i64> * %deltaPtr, <WIDTH x i64> %offset_delta,
|
||||
<WIDTH x i32> %vecmask)
|
||||
<WIDTH x MASK> %vecmask)
|
||||
%newDelta = load <WIDTH x i64> * %deltaPtr
|
||||
|
||||
%ret0 = call <WIDTH x $1> @__gather_elt64_$1(i8 * %ptr, <WIDTH x i64> %newOffsets,
|
||||
@@ -3876,27 +3937,27 @@ gen_gather_factored($1)
|
||||
define <WIDTH x $1>
|
||||
@__gather_base_offsets32_$1(i8 * %ptr, i32 %offset_scale,
|
||||
<WIDTH x i32> %offsets,
|
||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||
%scale_vec = bitcast i32 %offset_scale to <1 x i32>
|
||||
%smear_scale = shufflevector <1 x i32> %scale_vec, <1 x i32> undef,
|
||||
<WIDTH x i32> < forloop(i, 1, eval(WIDTH-1), `i32 0, ') i32 0 >
|
||||
%scaled_offsets = mul <WIDTH x i32> %smear_scale, %offsets
|
||||
%v = call <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %scaled_offsets, i32 1,
|
||||
<WIDTH x i32> zeroinitializer, <WIDTH x i32> %vecmask)
|
||||
<WIDTH x i32> zeroinitializer, <WIDTH x MASK> %vecmask)
|
||||
ret <WIDTH x $1> %v
|
||||
}
|
||||
|
||||
define <WIDTH x $1>
|
||||
@__gather_base_offsets64_$1(i8 * %ptr, i32 %offset_scale,
|
||||
<WIDTH x i64> %offsets,
|
||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
<WIDTH x MASK> %vecmask) nounwind readonly alwaysinline {
|
||||
%scale64 = zext i32 %offset_scale to i64
|
||||
%scale_vec = bitcast i64 %scale64 to <1 x i64>
|
||||
%smear_scale = shufflevector <1 x i64> %scale_vec, <1 x i64> undef,
|
||||
<WIDTH x i32> < forloop(i, 1, eval(WIDTH-1), `i32 0, ') i32 0 >
|
||||
%scaled_offsets = mul <WIDTH x i64> %smear_scale, %offsets
|
||||
%v = call <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %scaled_offsets,
|
||||
i32 1, <WIDTH x i64> zeroinitializer, <WIDTH x i32> %vecmask)
|
||||
i32 1, <WIDTH x i64> zeroinitializer, <WIDTH x MASK> %vecmask)
|
||||
ret <WIDTH x $1> %v
|
||||
}
|
||||
|
||||
@@ -3955,9 +4016,9 @@ define void @__scatter_elt64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_s
|
||||
|
||||
define void @__scatter_factored_base_offsets32_$1(i8* %base, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values,
|
||||
<WIDTH x i32> %mask) nounwind alwaysinline {
|
||||
<WIDTH x MASK> %mask) nounwind alwaysinline {
|
||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
||||
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||
call void @__scatter_elt32_$1(i8 * %base, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values, i32 LANE)')
|
||||
ret void
|
||||
@@ -3965,9 +4026,9 @@ define void @__scatter_factored_base_offsets32_$1(i8* %base, <WIDTH x i32> %offs
|
||||
|
||||
define void @__scatter_factored_base_offsets64_$1(i8* %base, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values,
|
||||
<WIDTH x i32> %mask) nounwind alwaysinline {
|
||||
<WIDTH x MASK> %mask) nounwind alwaysinline {
|
||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
||||
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||
call void @__scatter_elt64_$1(i8 * %base, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values, i32 LANE)')
|
||||
ret void
|
||||
@@ -3975,8 +4036,8 @@ define void @__scatter_factored_base_offsets64_$1(i8* %base, <WIDTH x i64> %offs
|
||||
|
||||
; fully general 32-bit scatter, takes array of pointers encoded as vector of i32s
|
||||
define void @__scatter32_$1(<WIDTH x i32> %ptrs, <WIDTH x $1> %values,
|
||||
<WIDTH x i32> %mask) nounwind alwaysinline {
|
||||
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
||||
<WIDTH x MASK> %mask) nounwind alwaysinline {
|
||||
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||
%iptr_LANE_ID = extractelement <WIDTH x i32> %ptrs, i32 LANE
|
||||
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $1 *
|
||||
%val_LANE_ID = extractelement <WIDTH x $1> %values, i32 LANE
|
||||
@@ -3987,8 +4048,8 @@ define void @__scatter32_$1(<WIDTH x i32> %ptrs, <WIDTH x $1> %values,
|
||||
|
||||
; fully general 64-bit scatter, takes array of pointers encoded as vector of i64s
|
||||
define void @__scatter64_$1(<WIDTH x i64> %ptrs, <WIDTH x $1> %values,
|
||||
<WIDTH x i32> %mask) nounwind alwaysinline {
|
||||
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
||||
<WIDTH x MASK> %mask) nounwind alwaysinline {
|
||||
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||
%iptr_LANE_ID = extractelement <WIDTH x i64> %ptrs, i32 LANE
|
||||
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $1 *
|
||||
%val_LANE_ID = extractelement <WIDTH x $1> %values, i32 LANE
|
||||
|
||||
26
ctx.cpp
26
ctx.cpp
@@ -1456,13 +1456,13 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
|
||||
for (unsigned int i = 0; i < at->getNumElements(); ++i) {
|
||||
llvm::Value *elt = ExtractInst(b, i);
|
||||
llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType,
|
||||
LLVMGetName(elt, "_to_boolvec32"));
|
||||
LLVMGetName(elt, "_to_boolvec"));
|
||||
ret = InsertInst(ret, sext, i);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
return SExtInst(b, LLVMTypes::BoolVectorType, LLVMGetName(b, "_to_i32"));
|
||||
return SExtInst(b, LLVMTypes::BoolVectorType, LLVMGetName(b, "_to_boolvec"));
|
||||
}
|
||||
|
||||
|
||||
@@ -2781,6 +2781,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
||||
|
||||
// Figure out if we need a 8, 16, 32 or 64-bit masked store.
|
||||
llvm::Function *maskedStoreFunc = NULL;
|
||||
llvm::Type *llvmValueType = value->getType();
|
||||
|
||||
const PointerType *pt = CastType<PointerType>(valueType);
|
||||
if (pt != NULL) {
|
||||
@@ -2809,8 +2810,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
||||
else
|
||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
|
||||
}
|
||||
else if (Type::Equal(valueType, AtomicType::VaryingBool) &&
|
||||
g->target->getMaskBitCount() == 1) {
|
||||
else if (llvmValueType == LLVMTypes::Int1VectorType) {
|
||||
llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask,
|
||||
LLVMMaskAllOn, "~mask");
|
||||
llvm::Value *old = LoadInst(ptr);
|
||||
@@ -2823,28 +2823,22 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
||||
StoreInst(final, ptr);
|
||||
return;
|
||||
}
|
||||
else if (Type::Equal(valueType, AtomicType::VaryingDouble)) {
|
||||
else if (llvmValueType == LLVMTypes::DoubleVectorType) {
|
||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_double");
|
||||
}
|
||||
else if (Type::Equal(valueType, AtomicType::VaryingInt64) ||
|
||||
Type::Equal(valueType, AtomicType::VaryingUInt64)) {
|
||||
else if (llvmValueType == LLVMTypes::Int64VectorType) {
|
||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
|
||||
}
|
||||
else if (Type::Equal(valueType, AtomicType::VaryingFloat)) {
|
||||
else if (llvmValueType == LLVMTypes::FloatVectorType) {
|
||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_float");
|
||||
}
|
||||
else if (Type::Equal(valueType, AtomicType::VaryingBool) ||
|
||||
Type::Equal(valueType, AtomicType::VaryingInt32) ||
|
||||
Type::Equal(valueType, AtomicType::VaryingUInt32) ||
|
||||
CastType<EnumType>(valueType) != NULL) {
|
||||
else if (llvmValueType == LLVMTypes::Int32VectorType) {
|
||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
|
||||
}
|
||||
else if (Type::Equal(valueType, AtomicType::VaryingInt16) ||
|
||||
Type::Equal(valueType, AtomicType::VaryingUInt16)) {
|
||||
else if (llvmValueType == LLVMTypes::Int16VectorType) {
|
||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i16");
|
||||
}
|
||||
else if (Type::Equal(valueType, AtomicType::VaryingInt8) ||
|
||||
Type::Equal(valueType, AtomicType::VaryingUInt8)) {
|
||||
else if (llvmValueType == LLVMTypes::Int8VectorType) {
|
||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i8");
|
||||
}
|
||||
AssertPos(currentPos, maskedStoreFunc != NULL);
|
||||
|
||||
36
expr.cpp
36
expr.cpp
@@ -6161,9 +6161,9 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
// If we have a bool vector of i32 elements, first truncate
|
||||
// down to a single bit
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||
// If we have a bool vector of non-i1 elements, first
|
||||
// truncate down to a single bit.
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||
// And then do an unisgned int->float cast
|
||||
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int
|
||||
@@ -6205,8 +6205,8 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
// truncate i32 bool vector values to i1s
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||
// truncate bool vector values to i1s
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to double
|
||||
exprVal, targetType, cOpName);
|
||||
@@ -6243,7 +6243,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||
break;
|
||||
@@ -6279,7 +6279,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||
break;
|
||||
@@ -6321,7 +6321,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||
break;
|
||||
@@ -6361,7 +6361,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||
break;
|
||||
@@ -6407,7 +6407,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||
break;
|
||||
@@ -6447,7 +6447,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||
break;
|
||||
@@ -6493,7 +6493,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||
break;
|
||||
@@ -6531,7 +6531,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, cOpName);
|
||||
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
|
||||
break;
|
||||
@@ -6625,12 +6625,12 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
|
||||
if (fromType->IsUniformType()) {
|
||||
if (toType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) {
|
||||
// extend out to i32 bool values from i1 here. then we'll
|
||||
// turn into a vector below, the way it does for everyone
|
||||
// else...
|
||||
LLVMTypes::BoolVectorType != LLVMTypes::Int1VectorType) {
|
||||
// extend out to an bool as an i8/i16/i32 from the i1 here.
|
||||
// Then we'll turn that into a vector below, the way it
|
||||
// does for everyone else...
|
||||
cast = ctx->SExtInst(cast, LLVMTypes::BoolVectorType->getElementType(),
|
||||
LLVMGetName(cast, "to_i32bool"));
|
||||
LLVMGetName(cast, "to_i_bool"));
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
73
llvmutil.cpp
73
llvmutil.cpp
@@ -115,13 +115,25 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) {
|
||||
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
||||
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
|
||||
|
||||
if (target.getMaskBitCount() == 1)
|
||||
switch (target.getMaskBitCount()) {
|
||||
case 1:
|
||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.getVectorWidth());
|
||||
else {
|
||||
Assert(target.getMaskBitCount() == 32);
|
||||
break;
|
||||
case 8:
|
||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt8Ty(*ctx), target.getVectorWidth());
|
||||
break;
|
||||
case 16:
|
||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt16Ty(*ctx), target.getVectorWidth());
|
||||
break;
|
||||
case 32:
|
||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.getVectorWidth());
|
||||
break;
|
||||
default:
|
||||
FATAL("Unhandled mask width for initializing MaskType");
|
||||
}
|
||||
|
||||
LLVMTypes::Int1VectorType =
|
||||
@@ -154,12 +166,26 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) {
|
||||
|
||||
std::vector<llvm::Constant *> maskOnes;
|
||||
llvm::Constant *onMask = NULL;
|
||||
if (target.getMaskBitCount() == 1)
|
||||
switch (target.getMaskBitCount()) {
|
||||
case 1:
|
||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 1,
|
||||
false /*unsigned*/); // 0x1
|
||||
else
|
||||
break;
|
||||
case 8:
|
||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt8Ty(*ctx), -1,
|
||||
true /*signed*/); // 0xff
|
||||
break;
|
||||
case 16:
|
||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt16Ty(*ctx), -1,
|
||||
true /*signed*/); // 0xffff
|
||||
break;
|
||||
case 32:
|
||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
|
||||
true /*signed*/); // 0xffffffff
|
||||
break;
|
||||
default:
|
||||
FATAL("Unhandled mask width for onMask");
|
||||
}
|
||||
|
||||
for (int i = 0; i < target.getVectorWidth(); ++i)
|
||||
maskOnes.push_back(onMask);
|
||||
@@ -167,13 +193,26 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) {
|
||||
|
||||
std::vector<llvm::Constant *> maskZeros;
|
||||
llvm::Constant *offMask = NULL;
|
||||
if (target.getMaskBitCount() == 1)
|
||||
switch (target.getMaskBitCount()) {
|
||||
case 1:
|
||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 0,
|
||||
true /*signed*/);
|
||||
else
|
||||
break;
|
||||
case 8:
|
||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt8Ty(*ctx), 0,
|
||||
true /*signed*/);
|
||||
break;
|
||||
case 16:
|
||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt16Ty(*ctx), 0,
|
||||
true /*signed*/);
|
||||
break;
|
||||
case 32:
|
||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
|
||||
true /*signed*/);
|
||||
|
||||
break;
|
||||
default:
|
||||
FATAL("Unhandled mask width for offMask");
|
||||
}
|
||||
for (int i = 0; i < target.getVectorWidth(); ++i)
|
||||
maskZeros.push_back(offMask);
|
||||
LLVMMaskAllOff = llvm::ConstantVector::get(maskZeros);
|
||||
@@ -444,9 +483,14 @@ LLVMBoolVector(bool b) {
|
||||
if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, b ? 0xffffffff : 0,
|
||||
false /*unsigned*/);
|
||||
else if (LLVMTypes::BoolVectorType == LLVMTypes::Int16VectorType)
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int16Type, b ? 0xffff : 0,
|
||||
false /*unsigned*/);
|
||||
else if (LLVMTypes::BoolVectorType == LLVMTypes::Int8VectorType)
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int8Type, b ? 0xff : 0,
|
||||
false /*unsigned*/);
|
||||
else {
|
||||
Assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
llvm::Type::getInt1Ty(*g->ctx));
|
||||
Assert(LLVMTypes::BoolVectorType == LLVMTypes::Int1VectorType);
|
||||
v = b ? LLVMTrue : LLVMFalse;
|
||||
}
|
||||
|
||||
@@ -465,9 +509,14 @@ LLVMBoolVector(const bool *bvec) {
|
||||
if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, bvec[i] ? 0xffffffff : 0,
|
||||
false /*unsigned*/);
|
||||
else if (LLVMTypes::BoolVectorType == LLVMTypes::Int16VectorType)
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int16Type, bvec[i] ? 0xffff : 0,
|
||||
false /*unsigned*/);
|
||||
else if (LLVMTypes::BoolVectorType == LLVMTypes::Int8VectorType)
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int8Type, bvec[i] ? 0xff : 0,
|
||||
false /*unsigned*/);
|
||||
else {
|
||||
Assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
llvm::Type::getInt1Ty(*g->ctx));
|
||||
Assert(LLVMTypes::BoolVectorType == LLVMTypes::Int1VectorType);
|
||||
v = bvec[i] ? LLVMTrue : LLVMFalse;
|
||||
}
|
||||
|
||||
|
||||
20
parse.yy
20
parse.yy
@@ -2148,8 +2148,24 @@ lAddFunctionParams(Declarator *decl) {
|
||||
|
||||
/** Add a symbol for the built-in mask variable to the symbol table */
|
||||
static void lAddMaskToSymbolTable(SourcePos pos) {
|
||||
const Type *t = g->target->getMaskBitCount() == 1 ?
|
||||
AtomicType::VaryingBool : AtomicType::VaryingUInt32;
|
||||
const Type *t;
|
||||
switch (g->target->getMaskBitCount()) {
|
||||
case 1:
|
||||
t = AtomicType::VaryingBool;
|
||||
break;
|
||||
case 8:
|
||||
t = AtomicType::VaryingUInt8;
|
||||
break;
|
||||
case 16:
|
||||
t = AtomicType::VaryingUInt16;
|
||||
break;
|
||||
case 32:
|
||||
t = AtomicType::VaryingUInt32;
|
||||
break;
|
||||
default:
|
||||
FATAL("Unhandled mask bitsize in lAddMaskToSymbolTable");
|
||||
}
|
||||
|
||||
t = t->GetAsConstType();
|
||||
Symbol *maskSymbol = new Symbol("__mask", pos, t);
|
||||
m->symbolTable->AddVariable(maskSymbol);
|
||||
|
||||
37
stdlib.ispc
37
stdlib.ispc
@@ -38,12 +38,20 @@
|
||||
ispc code
|
||||
*/
|
||||
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
#define IntMaskType bool
|
||||
#define UIntMaskType bool
|
||||
#if (ISPC_MASK_BITS == 1)
|
||||
#define IntMaskType bool
|
||||
#define UIntMaskType bool
|
||||
#elif (ISPC_MASK_BITS == 8)
|
||||
#define IntMaskType int8
|
||||
#define UIntMaskType unsigned int8
|
||||
#elif (ISPC_MASK_BITS == 16)
|
||||
#define IntMaskType int16
|
||||
#define UIntMaskType unsigned int16
|
||||
#elif (ISPC_MASK_BITS == 32)
|
||||
#define IntMaskType int32
|
||||
#define UIntMaskType unsigned int32
|
||||
#else
|
||||
#define IntMaskType int32
|
||||
#define UIntMaskType unsigned int32
|
||||
#error Unknown value of ISPC_MASK_BITS
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -335,14 +343,15 @@ static inline int32 sign_extend(bool v) {
|
||||
return __sext_varying_bool(v);
|
||||
}
|
||||
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform bool any(bool v) {
|
||||
// We only care about whether "any" is true for the active program instances,
|
||||
// so we have to make v with the current program mask.
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
#if (ISPC_MASK_BITS == 1)
|
||||
return __any(v & __mask);
|
||||
#else
|
||||
return __any(__sext_varying_bool(v) & __mask);
|
||||
return __any((UIntMaskType)__sext_varying_bool(v) & __mask);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -350,11 +359,10 @@ __declspec(safe)
|
||||
static inline uniform bool all(bool v) {
|
||||
// As with any(), we need to explicitly mask v with the current program mask
|
||||
// so we're only looking at the current lanes
|
||||
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
#if (ISPC_MASK_BITS == 1)
|
||||
return __all(v | !__mask);
|
||||
#else
|
||||
return __all(__sext_varying_bool(v) | !__mask);
|
||||
return __all((UIntMaskType)__sext_varying_bool(v) | !__mask);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -362,11 +370,10 @@ __declspec(safe)
|
||||
static inline uniform bool none(bool v) {
|
||||
// As with any(), we need to explicitly mask v with the current program mask
|
||||
// so we're only looking at the current lanes
|
||||
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
#if (ISPC_MASK_BITS == 1)
|
||||
return __none(v & __mask);
|
||||
#else
|
||||
return __none(__sext_varying_bool(v) & __mask);
|
||||
return __none((UIntMaskType)__sext_varying_bool(v) & __mask);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -399,10 +406,10 @@ static inline int popcnt(int64 v) {
|
||||
__declspec(safe)
|
||||
static inline uniform int popcnt(bool v) {
|
||||
// As with any() and all(), only count across the active lanes
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
#if (ISPC_MASK_BITS == 1)
|
||||
return __popcnt_int64(__movmsk(v & __mask));
|
||||
#else
|
||||
return __popcnt_int64(__movmsk(__sext_varying_bool(v) & __mask));
|
||||
return __popcnt_int64(__movmsk((UIntMaskType)__sext_varying_bool(v) & __mask));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user