diff --git a/Makefile b/Makefile index 43f41e09..92debe4f 100644 --- a/Makefile +++ b/Makefile @@ -141,7 +141,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \ type.cpp util.cpp HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ opt.h stmt.h sym.h type.h util.h -TARGETS=avxh avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \ +TARGETS=avx-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \ sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \ generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 ifneq ($(ARM_ENABLED), 0) @@ -160,7 +160,7 @@ BISON_SRC=parse.yy FLEX_SRC=lex.ll OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \ - stdlib_mask1_ispc.o stdlib_mask8_ispc.o stdlib_mask16_ispc.o stdlib_mask32_ispc.o \ + stdlib_mask1_ispc.o stdlib_mask8_ispc.o stdlib_mask16_ispc.o stdlib_mask32_ispc.o stdlib_mask64_ispc.o \ $(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o)) default: ispc @@ -268,20 +268,25 @@ objs/builtins-c-64.cpp: builtins/builtins.c objs/stdlib_mask1_ispc.cpp: stdlib.ispc @echo Creating C++ source from $< for mask1 - @$(CLANG) -E -x c -DISPC_MASK_BITS=1 -DISPC=1 -DPI=3.1415926536 $< -o - | \ + @$(CLANG) -E -x c -DISPC_MASK_BITS=1 -DISPC=1 -DPI=3.14159265358979 $< -o - | \ python stdlib2cpp.py mask1 > $@ objs/stdlib_mask8_ispc.cpp: stdlib.ispc @echo Creating C++ source from $< for mask8 - @$(CLANG) -E -x c -DISPC_MASK_BITS=8 -DISPC=1 -DPI=3.1415926536 $< -o - | \ + @$(CLANG) -E -x c -DISPC_MASK_BITS=8 -DISPC=1 -DPI=3.14159265358979 $< -o - | \ python stdlib2cpp.py mask8 > $@ objs/stdlib_mask16_ispc.cpp: stdlib.ispc @echo Creating C++ source from $< for mask16 - @$(CLANG) -E -x c -DISPC_MASK_BITS=16 -DISPC=1 -DPI=3.1415926536 $< -o - | \ + @$(CLANG) -E -x c -DISPC_MASK_BITS=16 -DISPC=1 -DPI=3.14159265358979 $< -o - | \ python stdlib2cpp.py mask16 > $@ objs/stdlib_mask32_ispc.cpp: stdlib.ispc @echo Creating C++ source from $< for mask32 - @$(CLANG) -E -x c -DISPC_MASK_BITS=32 -DISPC=1 -DPI=3.1415926536 $< -o - | \ + @$(CLANG) -E -x c -DISPC_MASK_BITS=32 -DISPC=1 -DPI=3.14159265358979 $< -o - | \ python stdlib2cpp.py mask32 > $@ + +objs/stdlib_mask64_ispc.cpp: stdlib.ispc + @echo Creating C++ source from $< for mask64 + @$(CLANG) -E -x c -DISPC_MASK_BITS=64 -DISPC=1 -DPI=3.14159265358979 $< -o - | \ + python stdlib2cpp.py mask64 > $@ diff --git a/builtins.cpp b/builtins.cpp index 816d4d78..f8d4136e 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -302,6 +302,7 @@ lCheckModuleIntrinsics(llvm::Module *module) { // check the llvm.x86.* intrinsics for now... if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) { llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID(); + if (id == 0) fprintf(stderr, "FATAL: intrinsic is not found: %s \n", funcName.c_str()); Assert(id != 0); llvm::Type *intrinsicType = llvm::Intrinsic::getType(*g->ctx, id); @@ -936,10 +937,10 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod switch (g->target->getVectorWidth()) { case 4: if (runtime32) { - EXPORT_MODULE(builtins_bitcode_avxh_32bit); + EXPORT_MODULE(builtins_bitcode_avx_i64x4_32bit); } else { - EXPORT_MODULE(builtins_bitcode_avxh_64bit); + EXPORT_MODULE(builtins_bitcode_avx_i64x4_64bit); } break; case 8: @@ -1105,7 +1106,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod // serialized version of the stdlib.ispc file to get its // definitions added. extern char stdlib_mask1_code[], stdlib_mask8_code[]; - extern char stdlib_mask16_code[], stdlib_mask32_code[]; + extern char stdlib_mask16_code[], stdlib_mask32_code[], stdlib_mask64_code[]; if (g->target->getISA() == Target::GENERIC && g->target->getVectorWidth() == 1) { // 1 wide uses 32 stdlib yy_scan_string(stdlib_mask32_code); @@ -1124,6 +1125,9 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod case 32: yy_scan_string(stdlib_mask32_code); break; + case 64: + yy_scan_string(stdlib_mask64_code); + break; default: FATAL("Unhandled mask bit size for stdlib.ispc"); } diff --git a/builtins/target-avxh.ll b/builtins/target-avx-i64x4.ll similarity index 98% rename from builtins/target-avxh.ll rename to builtins/target-avx-i64x4.ll index 98c9111d..d7dbb6bd 100644 --- a/builtins/target-avxh.ll +++ b/builtins/target-avx-i64x4.ll @@ -29,7 +29,7 @@ ;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -include(`target-avx-h.ll') +include(`target-avx-i64x4base.ll') rdrand_decls() diff --git a/builtins/target-avx-h.ll b/builtins/target-avx-i64x4base.ll similarity index 78% rename from builtins/target-avx-h.ll rename to builtins/target-avx-i64x4base.ll index 283eaddd..05bf178d 100644 --- a/builtins/target-avx-h.ll +++ b/builtins/target-avx-i64x4base.ll @@ -33,7 +33,7 @@ ;; Basic 4-wide definitions define(`WIDTH',`4') -define(`MASK',`i32') +define(`MASK',`i64') include(`util.m4') stdlib_core() @@ -185,32 +185,32 @@ define <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounwind reado ; horizontal ops ;; sse intrinsic -declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone +declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone -define i64 @__movmsk(<4 x i32>) nounwind readnone alwaysinline { - %floatmask = bitcast <4 x i32> %0 to <4 x float> - %v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone +define i64 @__movmsk(<4 x i64>) nounwind readnone alwaysinline { + %floatmask = bitcast <4 x i64> %0 to <4 x double> + %v = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %floatmask) nounwind readnone %v64 = zext i32 %v to i64 ret i64 %v64 } -define i1 @__any(<4 x i32>) nounwind readnone alwaysinline { - %floatmask = bitcast <4 x i32> %0 to <4 x float> - %v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone +define i1 @__any(<4 x i64>) nounwind readnone alwaysinline { + %floatmask = bitcast <4 x i64> %0 to <4 x double> + %v = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %floatmask) nounwind readnone %cmp = icmp ne i32 %v, 0 ret i1 %cmp } -define i1 @__all(<4 x i32>) nounwind readnone alwaysinline { - %floatmask = bitcast <4 x i32> %0 to <4 x float> - %v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone +define i1 @__all(<4 x i64>) nounwind readnone alwaysinline { + %floatmask = bitcast <4 x i64> %0 to <4 x double> + %v = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %floatmask) nounwind readnone %cmp = icmp eq i32 %v, 15 ret i1 %cmp } -define i1 @__none(<4 x i32>) nounwind readnone alwaysinline { - %floatmask = bitcast <4 x i32> %0 to <4 x float> - %v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone +define i1 @__none(<4 x i64>) nounwind readnone alwaysinline { + %floatmask = bitcast <4 x i64> %0 to <4 x double> + %v = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %floatmask) nounwind readnone %cmp = icmp eq i32 %v, 0 ret i1 %cmp } @@ -392,7 +392,8 @@ masked_load(i16, 2) declare <4 x float> @llvm.x86.avx.maskload.ps(i8 *, <4 x float> %mask) declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x double> %mask) -define <4 x i32> @__masked_load_i32(i8 *, <4 x i32> %mask) nounwind alwaysinline { +define <4 x i32> @__masked_load_i32(i8 *, <4 x i64> %mask64) nounwind alwaysinline { + %mask = trunc <4 x i64> %mask64 to <4 x i32> %floatmask = bitcast <4 x i32> %mask to <4 x float> %floatval = call <4 x float> @llvm.x86.avx.maskload.ps(i8 * %0, <4 x float> %floatmask) %retval = bitcast <4 x float> %floatval to <4 x i32> @@ -400,18 +401,11 @@ define <4 x i32> @__masked_load_i32(i8 *, <4 x i32> %mask) nounwind alwaysinline } -define <4 x i64> @__masked_load_i64(i8 *, <4 x i32> %mask) nounwind alwaysinline { - ; double up masks, bitcast to doubles - %mask0 = shufflevector <4 x i32> %mask, <4 x i32> undef, - <8 x i32> - %mask0d = bitcast <8 x i32> %mask0 to <4 x double> - - %val0d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %0, <4 x double> %mask0d) - - %vald = shufflevector <4 x double> %val0d, <4 x double> undef, - <4 x i32> - %val = bitcast <4 x double> %vald to <4 x i64> - ret <4 x i64> %val +define <4 x i64> @__masked_load_i64(i8 *, <4 x i64> %mask) nounwind alwaysinline { + %doublemask = bitcast <4 x i64> %mask to <4 x double> + %doubleval = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %0, <4 x double> %doublemask) + %retval = bitcast <4 x double> %doubleval to <4 x i64> + ret <4 x i64> %retval } masked_load_float_double() @@ -428,83 +422,62 @@ declare void @llvm.x86.avx.maskstore.ps (i8 *, <4 x float>, <4 x float>) declare void @llvm.x86.avx.maskstore.pd.256(i8 *, <4 x double>, <4 x double>) define void @__masked_store_i32(<4 x i32>* nocapture, <4 x i32>, - <4 x i32>) nounwind alwaysinline { - %ptr = bitcast <4 x i32> * %0 to i8 * - %val = bitcast <4 x i32> %1 to <4 x float> - %mask = bitcast <4 x i32> %2 to <4 x float> + <4 x i64>) nounwind alwaysinline { + %mask32 = trunc <4 x i64> %2 to <4 x i32> + + %ptr = bitcast <4 x i32> * %0 to i8 * + %val = bitcast <4 x i32> %1 to <4 x float> + %mask = bitcast <4 x i32> %mask32 to <4 x float> call void @llvm.x86.avx.maskstore.ps(i8 * %ptr, <4 x float> %mask, <4 x float> %val) ret void } define void @__masked_store_i64(<4 x i64>* nocapture, <4 x i64>, - <4 x i32> %mask) nounwind alwaysinline { - %ptr = bitcast <4 x i64> * %0 to i8 * - %val = bitcast <4 x i64> %1 to <4 x double> - - %mask0 = shufflevector <4 x i32> %mask, <4 x i32> undef, - <8 x i32> - - %mask0d = bitcast <8 x i32> %mask0 to <4 x double> - - %val0 = shufflevector <4 x double> %val, <4 x double> undef, - <4 x i32> - - call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr, <4 x double> %mask0d, <4 x double> %val0) + <4 x i64>) nounwind alwaysinline { + %ptr = bitcast <4 x i64> * %0 to i8 * + %val = bitcast <4 x i64> %1 to <4 x double> + %mask = bitcast <4 x i64> %2 to <4 x double> + call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr, <4 x double> %mask, <4 x double> %val) ret void } -masked_store_blend_8_16_by_4() +masked_store_blend_8_16_by_4_mask64() ;; sse intrinsic -declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, +declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone - define void @__masked_store_blend_i32(<4 x i32>* nocapture, <4 x i32>, - <4 x i32> %mask) nounwind alwaysinline { + <4 x i64>) nounwind alwaysinline { + %mask = trunc <4 x i64> %2 to <4 x i32> %mask_as_float = bitcast <4 x i32> %mask to <4 x float> - %oldValue = load <4 x i32>* %0, align 4 - %oldAsFloat = bitcast <4 x i32> %oldValue to <4 x float> - %newAsFloat = bitcast <4 x i32> %1 to <4 x float> - %blend = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %oldAsFloat, - <4 x float> %newAsFloat, - <4 x float> %mask_as_float) + %oldValue = load <4 x i32>* %0, align 4 + %oldAsFloat = bitcast <4 x i32> %oldValue to <4 x float> + %newAsFloat = bitcast <4 x i32> %1 to <4 x float> + %blend = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %oldAsFloat, + <4 x float> %newAsFloat, + <4 x float> %mask_as_float) %blendAsInt = bitcast <4 x float> %blend to <4 x i32> store <4 x i32> %blendAsInt, <4 x i32>* %0, align 4 ret void } ;; avx intrinsic -declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, - <8 x float>) nounwind readnone +declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, + <4 x double>) nounwind readnone -define void @__masked_store_blend_i64(<4 x i64>* nocapture %ptr, <4 x i64> %new, - <4 x i32> %i32mask) nounwind alwaysinline { - %oldValue = load <4 x i64>* %ptr, align 8 - %mask = bitcast <4 x i32> %i32mask to <4 x float> - - ; Do 4x64-bit blends by doing two <8 x i32> blends, where the <8 x i32> values - ; are actually bitcast <4 x i64> values - ; - ; set up the first four 64-bit values - %old01 = bitcast <4 x i64> %oldValue to <4 x i64> - %old01f = bitcast <4 x i64> %old01 to <8 x float> - %new01 = bitcast <4 x i64> %new to <4 x i64> - %new01f = bitcast <4 x i64> %new01 to <8 x float> - ; compute mask--note that the indices are all doubled-up - %mask01 = shufflevector <4 x float> %mask, <4 x float> undef, - <8 x i32> - ; and blend them - %result01f = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %old01f, - <8 x float> %new01f, - <8 x float> %mask01) - %result01 = bitcast <8 x float> %result01f to <4 x i64> - - - %final = bitcast <4 x i64> %result01 to <4 x i64> - store <4 x i64> %final, <4 x i64> * %ptr, align 8 +define void @__masked_store_blend_i64(<4 x i64>* nocapture , <4 x i64>, + <4 x i64>) nounwind alwaysinline { + %mask_as_double = bitcast <4 x i64> %2 to <4 x double> + %oldValue = load <4 x i64>* %0, align 4 + %oldAsDouble = bitcast <4 x i64> %oldValue to <4 x double> + %newAsDouble = bitcast <4 x i64> %1 to <4 x double> + %blend = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %oldAsDouble, + <4 x double> %newAsDouble, + <4 x double> %mask_as_double) + %blendAsInt = bitcast <4 x double> %blend to <4 x i64> + store <4 x i64> %blendAsInt, <4 x i64>* %0, align 4 ret void } diff --git a/builtins/util.m4 b/builtins/util.m4 index 6c90c821..68fa818b 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -947,6 +947,22 @@ define internal <$1 x i64> @convertmask_i32_i64_$1(<$1 x i32>) { %r = sext <$1 x i32> %0 to <$1 x i64> ret <$1 x i64> %r } + +define internal <$1 x i8> @convertmask_i64_i8_$1(<$1 x i64>) { + %r = trunc <$1 x i64> %0 to <$1 x i8> + ret <$1 x i8> %r +} +define internal <$1 x i16> @convertmask_i64_i16_$1(<$1 x i64>) { + %r = trunc <$1 x i64> %0 to <$1 x i16> + ret <$1 x i16> %r +} +define internal <$1 x i32> @convertmask_i64_i32_$1(<$1 x i64>) { + %r = trunc <$1 x i64> %0 to <$1 x i32> + ret <$1 x i32> %r +} +define internal <$1 x i64> @convertmask_i64_i64_$1(<$1 x i64>) { + ret <$1 x i64> %0 +} ') mask_converts(WIDTH) @@ -2689,9 +2705,13 @@ define i32 @__sext_uniform_bool(i1) nounwind readnone alwaysinline { } define @__sext_varying_bool() nounwind readnone alwaysinline { - ifelse(MASK,i32, `ret %0', - `%se = sext %0 to - ret %se') +;; ifelse(MASK,i32, `ret %0', +;; `%se = sext %0 to +;; ret %se') + ifelse(MASK,i32, `%se = bitcast %0 to ', + MASK,i64, `%se = trunc %0 to ', + `%se = sext %0 to ') + ret %se } @@ -3508,6 +3528,56 @@ define void @__masked_store_blend_i16(<4 x i16>* nocapture, <4 x i16>, } ') +define(`masked_store_blend_8_16_by_4_mask64', ` +define void @__masked_store_blend_i8(<4 x i8>* nocapture, <4 x i8>, + <4 x i64>) nounwind alwaysinline { + %old = load <4 x i8> * %0, align 1 + ifelse(LLVM_VERSION,LLVM_3_0,` + %old32 = bitcast <4 x i8> %old to i32 + %new32 = bitcast <4 x i8> %1 to i32 + + %mask8 = trunc <4 x i64> %2 to <4 x i8> + %mask32 = bitcast <4 x i8> %mask8 to i32 + %notmask32 = xor i32 %mask32, -1 + + %newmasked = and i32 %new32, %mask32 + %oldmasked = and i32 %old32, %notmask32 + %result = or i32 %newmasked, %oldmasked + + %resultvec = bitcast i32 %result to <4 x i8> + ',` + %m = trunc <4 x i64> %2 to <4 x i1> + %resultvec = select <4 x i1> %m, <4 x i8> %1, <4 x i8> %old + ') + store <4 x i8> %resultvec, <4 x i8> * %0, align 1 + ret void +} + +define void @__masked_store_blend_i16(<4 x i16>* nocapture, <4 x i16>, + <4 x i64>) nounwind alwaysinline { + %old = load <4 x i16> * %0, align 2 + ifelse(LLVM_VERSION,LLVM_3_0,` + %old64 = bitcast <4 x i16> %old to i64 + %new64 = bitcast <4 x i16> %1 to i64 + + %mask16 = trunc <4 x i64> %2 to <4 x i16> + %mask64 = bitcast <4 x i16> %mask16 to i64 + %notmask64 = xor i64 %mask64, -1 + + %newmasked = and i64 %new64, %mask64 + %oldmasked = and i64 %old64, %notmask64 + %result = or i64 %newmasked, %oldmasked + + %resultvec = bitcast i64 %result to <4 x i16> + ',` + %m = trunc <4 x i64> %2 to <4 x i1> + %resultvec = select <4 x i1> %m, <4 x i16> %1, <4 x i16> %old + ') + store <4 x i16> %resultvec, <4 x i16> * %0, align 2 + ret void +} +') + define(`masked_store_blend_8_16_by_8', ` define void @__masked_store_blend_i8(<8 x i8>* nocapture, <8 x i8>, <8 x i32>) nounwind alwaysinline { diff --git a/ispc.cpp b/ispc.cpp index 02c23568..046c64c4 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -446,14 +446,13 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskingIsFree = false; this->m_maskBitCount = 32; } - else if (!strcasecmp(isa, "avxh") ) { - fprintf(stderr, " ISA is avxh \n"); + else if (!strcasecmp(isa, "avx-i64x4") ) { this->m_isa = Target::AVX; this->m_nativeVectorWidth = 4; this->m_vectorWidth = 4; this->m_attributes = "+avx,+popcnt,+cmov"; this->m_maskingIsFree = false; - this->m_maskBitCount = 32; + this->m_maskBitCount = 64; } else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2") || diff --git a/llvmutil.cpp b/llvmutil.cpp index 180c8676..64691498 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -132,6 +132,10 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) { LLVMTypes::MaskType = LLVMTypes::BoolVectorType = llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.getVectorWidth()); break; + case 64: + LLVMTypes::MaskType = LLVMTypes::BoolVectorType = + llvm::VectorType::get(llvm::Type::getInt64Ty(*ctx), target.getVectorWidth()); + break; default: FATAL("Unhandled mask width for initializing MaskType"); } @@ -183,6 +187,10 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) { onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1, true /*signed*/); // 0xffffffff break; + case 64: + onMask = llvm::ConstantInt::get(llvm::Type::getInt64Ty(*ctx), -1, + true /*signed*/); // 0xffffffff + break; default: FATAL("Unhandled mask width for onMask"); } @@ -210,6 +218,10 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) { offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0, true /*signed*/); break; + case 64: + offMask = llvm::ConstantInt::get(llvm::Type::getInt64Ty(*ctx), 0, + true /*signed*/); + break; default: FATAL("Unhandled mask width for offMask"); } @@ -480,7 +492,10 @@ LLVMUInt64Vector(const uint64_t *ivec) { llvm::Constant * LLVMBoolVector(bool b) { llvm::Constant *v; - if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) + if (LLVMTypes::BoolVectorType == LLVMTypes::Int64VectorType) + v = llvm::ConstantInt::get(LLVMTypes::Int64Type, b ? 0xffffffffffffffffull : 0, + false /*unsigned*/); + else if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) v = llvm::ConstantInt::get(LLVMTypes::Int32Type, b ? 0xffffffff : 0, false /*unsigned*/); else if (LLVMTypes::BoolVectorType == LLVMTypes::Int16VectorType) @@ -506,7 +521,10 @@ LLVMBoolVector(const bool *bvec) { std::vector vals; for (int i = 0; i < g->target->getVectorWidth(); ++i) { llvm::Constant *v; - if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) + if (LLVMTypes::BoolVectorType == LLVMTypes::Int64VectorType) + v = llvm::ConstantInt::get(LLVMTypes::Int64Type, bvec[i] ? 0xffffffffffffffffull : 0, + false /*unsigned*/); + else if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) v = llvm::ConstantInt::get(LLVMTypes::Int32Type, bvec[i] ? 0xffffffff : 0, false /*unsigned*/); else if (LLVMTypes::BoolVectorType == LLVMTypes::Int16VectorType) diff --git a/parse.yy b/parse.yy index 5fc01cb0..9a2b4fc3 100644 --- a/parse.yy +++ b/parse.yy @@ -2183,6 +2183,9 @@ static void lAddMaskToSymbolTable(SourcePos pos) { case 32: t = AtomicType::VaryingUInt32; break; + case 64: + t = AtomicType::VaryingUInt64; + break; default: FATAL("Unhandled mask bitsize in lAddMaskToSymbolTable"); } diff --git a/stdlib.ispc b/stdlib.ispc index db9d7f36..6d7ee051 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -50,6 +50,9 @@ #elif (ISPC_MASK_BITS == 32) #define IntMaskType int32 #define UIntMaskType unsigned int32 +#elif (ISPC_MASK_BITS == 64) + #define IntMaskType int64 + #define UIntMaskType unsigned int64 #else #error Unknown value of ISPC_MASK_BITS #endif