diff --git a/builtins.cpp b/builtins.cpp index 1f1b5ca2..ffb05e6d 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -114,61 +114,39 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) { // pointers to uniform else if (t == LLVMTypes::Int8PointerType) - return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt8 : - AtomicType::UniformInt8, false); + return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt8 : + AtomicType::UniformInt8); else if (t == LLVMTypes::Int16PointerType) - return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt16 : - AtomicType::UniformInt16, false); + return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt16 : + AtomicType::UniformInt16); else if (t == LLVMTypes::Int32PointerType) - return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt32 : - AtomicType::UniformInt32, false); + return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt32 : + AtomicType::UniformInt32); else if (t == LLVMTypes::Int64PointerType) - return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt64 : - AtomicType::UniformInt64, false); + return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt64 : + AtomicType::UniformInt64); else if (t == LLVMTypes::FloatPointerType) - return new ReferenceType(AtomicType::UniformFloat, false); + return PointerType::GetUniform(AtomicType::UniformFloat); else if (t == LLVMTypes::DoublePointerType) - return new ReferenceType(AtomicType::UniformDouble, false); + return PointerType::GetUniform(AtomicType::UniformDouble); // pointers to varying else if (t == LLVMTypes::Int8VectorPointerType) - return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt8 : - AtomicType::VaryingInt8, false); + return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt8 : + AtomicType::VaryingInt8); else if (t == LLVMTypes::Int16VectorPointerType) - return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt16 : - AtomicType::VaryingInt16, false); + return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt16 : + AtomicType::VaryingInt16); else if (t == LLVMTypes::Int32VectorPointerType) - return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt32 : - AtomicType::VaryingInt32, false); + return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt32 : + AtomicType::VaryingInt32); else if (t == LLVMTypes::Int64VectorPointerType) - return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt64 : - AtomicType::VaryingInt64, false); + return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt64 : + AtomicType::VaryingInt64); else if (t == LLVMTypes::FloatVectorPointerType) - return new ReferenceType(AtomicType::VaryingFloat, false); + return PointerType::GetUniform(AtomicType::VaryingFloat); else if (t == LLVMTypes::DoubleVectorPointerType) - return new ReferenceType(AtomicType::VaryingDouble, false); - - // arrays - else if (llvm::isa(t)) { - const llvm::PointerType *pt = llvm::dyn_cast(t); - - // Is it a pointer to an unsized array of objects? If so, then - // create the equivalent ispc type. Note that it has to be a - // reference to an array, since ispc passes arrays to functions by - // reference. - const llvm::ArrayType *at = - llvm::dyn_cast(pt->getElementType()); - if (at != NULL) { - const Type *eltType = lLLVMTypeToISPCType(at->getElementType(), - intAsUnsigned); - if (eltType == NULL) - return NULL; - // FIXME: this needs to be fixed when arrays can have - // over 4G elements... - return new ReferenceType(new ArrayType(eltType, (int)at->getNumElements()), - false); - } - } + return PointerType::GetUniform(AtomicType::VaryingDouble); return NULL; } @@ -184,6 +162,9 @@ lCreateSymbol(const std::string &name, const Type *returnType, FunctionType *funcType = new FunctionType(returnType, argTypes, noPos); + Debug(noPos, "Created builtin symbol \"%s\" [%s]\n", name.c_str(), + funcType->GetString().c_str()); + Symbol *sym = new Symbol(name, noPos, funcType); sym->function = func; symbolTable->AddFunction(sym); @@ -244,7 +225,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) { // Iterate over the arguments and try to find their equivalent ispc // types. Track if any of the arguments has an integer type. - bool anyIntArgs = false, anyReferenceArgs = false; + bool anyIntArgs = false; std::vector argTypes; for (unsigned int j = 0; j < ftype->getNumParams(); ++j) { const llvm::Type *llvmArgType = ftype->getParamType(j); @@ -256,7 +237,6 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) { } anyIntArgs |= (Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false); - anyReferenceArgs |= (dynamic_cast(type) != NULL); argTypes.push_back(type); } @@ -264,19 +244,6 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) { // so that we get symbols for things with no integer types! if (i == 0 || anyIntArgs == true) lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable); - - // If there are any reference types, also make a variant of the - // symbol that has them as const references. This obviously - // doesn't make sense for many builtins, but we'll give the stdlib - // the option to call one if it needs one. - if (anyReferenceArgs == true) { - for (unsigned int j = 0; j < argTypes.size(); ++j) { - if (dynamic_cast(argTypes[j]) != NULL) - argTypes[j] = argTypes[j]->GetAsConstType(); - lCreateSymbol(name + "_refsconst", returnType, argTypes, - ftype, func, symbolTable); - } - } } return true; @@ -476,62 +443,10 @@ lSetInternalFunctions(llvm::Module *module) { "__packed_store_active", "__popcnt_int32", "__popcnt_int64", - "__prefetch_read_1_uniform_bool", - "__prefetch_read_1_uniform_double", - "__prefetch_read_1_uniform_float", - "__prefetch_read_1_uniform_int16", - "__prefetch_read_1_uniform_int32", - "__prefetch_read_1_uniform_int64", - "__prefetch_read_1_uniform_int8", - "__prefetch_read_1_varying_bool", - "__prefetch_read_1_varying_double", - "__prefetch_read_1_varying_float", - "__prefetch_read_1_varying_int16", - "__prefetch_read_1_varying_int32", - "__prefetch_read_1_varying_int64", - "__prefetch_read_1_varying_int8", - "__prefetch_read_2_uniform_bool", - "__prefetch_read_2_uniform_double", - "__prefetch_read_2_uniform_float", - "__prefetch_read_2_uniform_int16", - "__prefetch_read_2_uniform_int32", - "__prefetch_read_2_uniform_int64", - "__prefetch_read_2_uniform_int8", - "__prefetch_read_2_varying_bool", - "__prefetch_read_2_varying_double", - "__prefetch_read_2_varying_float", - "__prefetch_read_2_varying_int16", - "__prefetch_read_2_varying_int32", - "__prefetch_read_2_varying_int64", - "__prefetch_read_2_varying_int8", - "__prefetch_read_3_uniform_bool", - "__prefetch_read_3_uniform_double", - "__prefetch_read_3_uniform_float", - "__prefetch_read_3_uniform_int16", - "__prefetch_read_3_uniform_int32", - "__prefetch_read_3_uniform_int64", - "__prefetch_read_3_uniform_int8", - "__prefetch_read_3_varying_bool", - "__prefetch_read_3_varying_double", - "__prefetch_read_3_varying_float", - "__prefetch_read_3_varying_int16", - "__prefetch_read_3_varying_int32", - "__prefetch_read_3_varying_int64", - "__prefetch_read_3_varying_int8", - "__prefetch_read_nt_uniform_bool", - "__prefetch_read_nt_uniform_double", - "__prefetch_read_nt_uniform_float", - "__prefetch_read_nt_uniform_int16", - "__prefetch_read_nt_uniform_int32", - "__prefetch_read_nt_uniform_int64", - "__prefetch_read_nt_uniform_int8", - "__prefetch_read_nt_varying_bool", - "__prefetch_read_nt_varying_double", - "__prefetch_read_nt_varying_float", - "__prefetch_read_nt_varying_int16", - "__prefetch_read_nt_varying_int32", - "__prefetch_read_nt_varying_int64", - "__prefetch_read_nt_varying_int8", + "__prefetch_read_uniform_1", + "__prefetch_read_uniform_2", + "__prefetch_read_uniform_3", + "__prefetch_read_uniform_nt", "__rcp_uniform_float", "__rcp_varying_float", "__reduce_add_double", @@ -747,7 +662,7 @@ void DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module, bool includeStdlibISPC) { // Add the definitions from the compiled builtins-c.c file - if (g->target.is32bit) { + if (g->target.is32Bit) { extern unsigned char builtins_bitcode_c_32[]; extern int builtins_bitcode_c_32_length; AddBitcodeToModule(builtins_bitcode_c_32, builtins_bitcode_c_32_length, diff --git a/builtins.m4 b/builtins.m4 index affe3853..243f0de1 100644 --- a/builtins.m4 +++ b/builtins.m4 @@ -822,40 +822,6 @@ define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp, } ') -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; prefetch definitions - -; prefetch has a new parameter in LLVM3.0, to distinguish between instruction -; and data caches--the declaration is now: -; declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality, -; i32 %cachetype) (cachetype 1 == data cache) -; however, the version below seems to still work... - -declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality) - -define(`prefetch_read', ` -define void @__prefetch_read_1_$1($2 *) alwaysinline { - %ptr8 = bitcast $2 * %0 to i8 * - call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 3) - ret void -} -define void @__prefetch_read_2_$1($2 *) alwaysinline { - %ptr8 = bitcast $2 * %0 to i8 * - call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 2) - ret void -} -define void @__prefetch_read_3_$1($2 *) alwaysinline { - %ptr8 = bitcast $2 * %0 to i8 * - call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 1) - ret void -} -define void @__prefetch_read_nt_$1($2 *) alwaysinline { - %ptr8 = bitcast $2 * %0 to i8 * - call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 0) - ret void -} -') - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; define(`stdlib_core', ` @@ -916,15 +882,25 @@ declare void @__pseudo_masked_store_64(<$1 x i64> * nocapture, <$1 x i64>, <$1 x ; converts them to native gather functions or converts them to vector ; loads, if equivalent. -declare <$1 x i8> @__pseudo_gather_8([$1 x i8 *], <$1 x i32>) nounwind readonly -declare <$1 x i16> @__pseudo_gather_16([$1 x i8 *], <$1 x i32>) nounwind readonly -declare <$1 x i32> @__pseudo_gather_32([$1 x i8 *], <$1 x i32>) nounwind readonly -declare <$1 x i64> @__pseudo_gather_64([$1 x i8 *], <$1 x i32>) nounwind readonly +declare <$1 x i8> @__pseudo_gather32_8(<$1 x i32>, <$1 x i32>) nounwind readonly +declare <$1 x i16> @__pseudo_gather32_16(<$1 x i32>, <$1 x i32>) nounwind readonly +declare <$1 x i32> @__pseudo_gather32_32(<$1 x i32>, <$1 x i32>) nounwind readonly +declare <$1 x i64> @__pseudo_gather32_64(<$1 x i32>, <$1 x i32>) nounwind readonly -declare <$1 x i8> @__pseudo_gather_base_offsets_8(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly -declare <$1 x i16> @__pseudo_gather_base_offsets_16(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly -declare <$1 x i32> @__pseudo_gather_base_offsets_32(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly -declare <$1 x i64> @__pseudo_gather_base_offsets_64(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly +declare <$1 x i8> @__pseudo_gather64_8(<$1 x i64>, <$1 x i32>) nounwind readonly +declare <$1 x i16> @__pseudo_gather64_16(<$1 x i64>, <$1 x i32>) nounwind readonly +declare <$1 x i32> @__pseudo_gather64_32(<$1 x i64>, <$1 x i32>) nounwind readonly +declare <$1 x i64> @__pseudo_gather64_64(<$1 x i64>, <$1 x i32>) nounwind readonly + +declare <$1 x i8> @__pseudo_gather_base_offsets32_8(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly +declare <$1 x i16> @__pseudo_gather_base_offsets32_16(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly +declare <$1 x i32> @__pseudo_gather_base_offsets32_32(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly +declare <$1 x i64> @__pseudo_gather_base_offsets32_64(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly + +declare <$1 x i8> @__pseudo_gather_base_offsets64_8(i8 *, <$1 x i64>, <$1 x i32>) nounwind readonly +declare <$1 x i16> @__pseudo_gather_base_offsets64_16(i8 *, <$1 x i64>, <$1 x i32>) nounwind readonly +declare <$1 x i32> @__pseudo_gather_base_offsets64_32(i8 *, <$1 x i64>, <$1 x i32>) nounwind readonly +declare <$1 x i64> @__pseudo_gather_base_offsets64_64(i8 *, <$1 x i64>, <$1 x i32>) nounwind readonly ; Similarly to the pseudo-gathers defined above, we also declare undefined ; pseudo-scatter instructions with signatures: @@ -949,19 +925,33 @@ declare <$1 x i64> @__pseudo_gather_base_offsets_64(i8 *, <$1 x i32>, <$1 x i32> ; And the GSImprovementsPass in turn converts these to actual native ; scatters or masked stores. -declare void @__pseudo_scatter_8([$1 x i8 *], <$1 x i8>, <$1 x i32>) nounwind -declare void @__pseudo_scatter_16([$1 x i8 *], <$1 x i16>, <$1 x i32>) nounwind -declare void @__pseudo_scatter_32([$1 x i8 *], <$1 x i32>, <$1 x i32>) nounwind -declare void @__pseudo_scatter_64([$1 x i8 *], <$1 x i64>, <$1 x i32>) nounwind +declare void @__pseudo_scatter32_8(<$1 x i32>, <$1 x i8>, <$1 x i32>) nounwind +declare void @__pseudo_scatter32_16(<$1 x i32>, <$1 x i16>, <$1 x i32>) nounwind +declare void @__pseudo_scatter32_32(<$1 x i32>, <$1 x i32>, <$1 x i32>) nounwind +declare void @__pseudo_scatter32_64(<$1 x i32>, <$1 x i64>, <$1 x i32>) nounwind -declare void @__pseudo_scatter_base_offsets_8(i8 * nocapture, <$1 x i32>, - <$1 x i8>, <$1 x i32>) nounwind -declare void @__pseudo_scatter_base_offsets_16(i8 * nocapture, <$1 x i32>, - <$1 x i16>, <$1 x i32>) nounwind -declare void @__pseudo_scatter_base_offsets_32(i8 * nocapture, <$1 x i32>, - <$1 x i32>, <$1 x i32>) nounwind -declare void @__pseudo_scatter_base_offsets_64(i8 * nocapture, <$1 x i32>, - <$1 x i64>, <$1 x i32>) nounwind +declare void @__pseudo_scatter64_8(<$1 x i64>, <$1 x i8>, <$1 x i32>) nounwind +declare void @__pseudo_scatter64_16(<$1 x i64>, <$1 x i16>, <$1 x i32>) nounwind +declare void @__pseudo_scatter64_32(<$1 x i64>, <$1 x i32>, <$1 x i32>) nounwind +declare void @__pseudo_scatter64_64(<$1 x i64>, <$1 x i64>, <$1 x i32>) nounwind + +declare void @__pseudo_scatter_base_offsets32_8(i8 * nocapture, <$1 x i32>, + <$1 x i8>, <$1 x i32>) nounwind +declare void @__pseudo_scatter_base_offsets32_16(i8 * nocapture, <$1 x i32>, + <$1 x i16>, <$1 x i32>) nounwind +declare void @__pseudo_scatter_base_offsets32_32(i8 * nocapture, <$1 x i32>, + <$1 x i32>, <$1 x i32>) nounwind +declare void @__pseudo_scatter_base_offsets32_64(i8 * nocapture, <$1 x i32>, + <$1 x i64>, <$1 x i32>) nounwind + +declare void @__pseudo_scatter_base_offsets64_8(i8 * nocapture, <$1 x i64>, + <$1 x i8>, <$1 x i32>) nounwind +declare void @__pseudo_scatter_base_offsets64_16(i8 * nocapture, <$1 x i64>, + <$1 x i16>, <$1 x i32>) nounwind +declare void @__pseudo_scatter_base_offsets64_32(i8 * nocapture, <$1 x i64>, + <$1 x i32>, <$1 x i32>) nounwind +declare void @__pseudo_scatter_base_offsets64_64(i8 * nocapture, <$1 x i64>, + <$1 x i64>, <$1 x i32>) nounwind ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; vector ops @@ -1634,11 +1624,10 @@ define void ;; versions to be called from stdlib define void -@__aos_to_soa4_float([0 x float] * noalias %base, i32 %offset, +@__aos_to_soa4_float(float * noalias %pf, i32 %offset, <$1 x float> * noalias %out0, <$1 x float> * noalias %out1, <$1 x float> * noalias %out2, <$1 x float> * noalias %out3) nounwind alwaysinline { - %pf = bitcast [0 x float] * %base to float * %p = getelementptr float * %pf, i32 %offset %p0 = bitcast float * %p to <$1 x float> * %v0 = load <$1 x float> * %p0, align 4 @@ -1656,16 +1645,16 @@ define void define void -@__aos_to_soa4_int32([0 x i32] * noalias %base, i32 %offset, +@__aos_to_soa4_int32(i32 * noalias %base, i32 %offset, <$1 x i32> * noalias %out0, <$1 x i32> * noalias %out1, <$1 x i32> * noalias %out2, <$1 x i32> * noalias %out3) nounwind alwaysinline { - %fbase = bitcast [0 x i32] * %base to [0 x float] * + %fbase = bitcast i32 * %base to float * %fout0 = bitcast <$1 x i32> * %out0 to <$1 x float> * %fout1 = bitcast <$1 x i32> * %out1 to <$1 x float> * %fout2 = bitcast <$1 x i32> * %out2 to <$1 x float> * %fout3 = bitcast <$1 x i32> * %out3 to <$1 x float> * - call void @__aos_to_soa4_float([0 x float] * %fbase, i32 %offset, + call void @__aos_to_soa4_float(float * %fbase, i32 %offset, <$1 x float> * %fout0, <$1 x float> * %fout1, <$1 x float> * %fout2, <$1 x float> * %fout3) ret void @@ -1674,9 +1663,8 @@ define void define void @__soa_to_aos4_float(<$1 x float> %v0, <$1 x float> %v1, <$1 x float> %v2, - <$1 x float> %v3, [0 x float] * noalias %base, + <$1 x float> %v3, float * noalias %pf, i32 %offset) nounwind alwaysinline { - %pf = bitcast [0 x float] * %base to float * %p = getelementptr float * %pf, i32 %offset %out0 = bitcast float * %p to <$1 x float> * %out1 = getelementptr <$1 x float> * %out0, i32 1 @@ -1691,25 +1679,24 @@ define void define void @__soa_to_aos4_int32(<$1 x i32> %v0, <$1 x i32> %v1, <$1 x i32> %v2, - <$1 x i32> %v3, [0 x i32] * noalias %base, + <$1 x i32> %v3, i32 * noalias %base, i32 %offset) nounwind alwaysinline { %fv0 = bitcast <$1 x i32> %v0 to <$1 x float> %fv1 = bitcast <$1 x i32> %v1 to <$1 x float> %fv2 = bitcast <$1 x i32> %v2 to <$1 x float> %fv3 = bitcast <$1 x i32> %v3 to <$1 x float> - %fbase = bitcast [0 x i32] * %base to [0 x float] * + %fbase = bitcast i32 * %base to float * call void @__soa_to_aos4_float(<$1 x float> %fv0, <$1 x float> %fv1, - <$1 x float> %fv2, <$1 x float> %fv3, [0 x float] * %fbase, + <$1 x float> %fv2, <$1 x float> %fv3, float * %fbase, i32 %offset) ret void } define void -@__aos_to_soa3_float([0 x float] * noalias %base, i32 %offset, +@__aos_to_soa3_float(float * noalias %pf, i32 %offset, <$1 x float> * %out0, <$1 x float> * %out1, <$1 x float> * %out2) nounwind alwaysinline { - %pf = bitcast [0 x float] * %base to float * %p = getelementptr float * %pf, i32 %offset %p0 = bitcast float * %p to <$1 x float> * %v0 = load <$1 x float> * %p0, align 4 @@ -1725,14 +1712,14 @@ define void define void -@__aos_to_soa3_int32([0 x i32] * noalias %base, i32 %offset, +@__aos_to_soa3_int32(i32 * noalias %base, i32 %offset, <$1 x i32> * noalias %out0, <$1 x i32> * noalias %out1, <$1 x i32> * noalias %out2) nounwind alwaysinline { - %fbase = bitcast [0 x i32] * %base to [0 x float] * + %fbase = bitcast i32 * %base to float * %fout0 = bitcast <$1 x i32> * %out0 to <$1 x float> * %fout1 = bitcast <$1 x i32> * %out1 to <$1 x float> * %fout2 = bitcast <$1 x i32> * %out2 to <$1 x float> * - call void @__aos_to_soa3_float([0 x float] * %fbase, i32 %offset, + call void @__aos_to_soa3_float(float * %fbase, i32 %offset, <$1 x float> * %fout0, <$1 x float> * %fout1, <$1 x float> * %fout2) ret void } @@ -1740,8 +1727,7 @@ define void define void @__soa_to_aos3_float(<$1 x float> %v0, <$1 x float> %v1, <$1 x float> %v2, - [0 x float] * noalias %base, i32 %offset) nounwind alwaysinline { - %pf = bitcast [0 x float] * %base to float * + float * noalias %pf, i32 %offset) nounwind alwaysinline { %p = getelementptr float * %pf, i32 %offset %out0 = bitcast float * %p to <$1 x float> * %out1 = getelementptr <$1 x float> * %out0, i32 1 @@ -1755,13 +1741,13 @@ define void define void @__soa_to_aos3_int32(<$1 x i32> %v0, <$1 x i32> %v1, <$1 x i32> %v2, - [0 x i32] * noalias %base, i32 %offset) nounwind alwaysinline { + i32 * noalias %base, i32 %offset) nounwind alwaysinline { %fv0 = bitcast <$1 x i32> %v0 to <$1 x float> %fv1 = bitcast <$1 x i32> %v1 to <$1 x float> %fv2 = bitcast <$1 x i32> %v2 to <$1 x float> - %fbase = bitcast [0 x i32] * %base to [0 x float] * + %fbase = bitcast i32 * %base to float * call void @__soa_to_aos3_float(<$1 x float> %fv0, <$1 x float> %fv1, - <$1 x float> %fv2, [0 x float] * %fbase, i32 %offset) + <$1 x float> %fv2, float * %fbase, i32 %offset) ret void } @@ -1769,21 +1755,34 @@ define void ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; prefetching -prefetch_read(uniform_bool, i1) -prefetch_read(uniform_int8, i8) -prefetch_read(uniform_int16, i16) -prefetch_read(uniform_int32, i32) -prefetch_read(uniform_int64, i64) -prefetch_read(uniform_float, float) -prefetch_read(uniform_double, double) +; prefetch has a new parameter in LLVM3.0, to distinguish between instruction +; and data caches--the declaration is now: +; declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality, +; i32 %cachetype) (cachetype 1 == data cache) +; however, the version below seems to still work... + +declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality) + +define void @__prefetch_read_uniform_1(i8 *) alwaysinline { + call void @llvm.prefetch(i8 * %0, i32 0, i32 3) + ret void +} + +define void @__prefetch_read_uniform_2(i8 *) alwaysinline { + call void @llvm.prefetch(i8 * %0, i32 0, i32 2) + ret void +} + +define void @__prefetch_read_uniform_3(i8 *) alwaysinline { + call void @llvm.prefetch(i8 * %0, i32 0, i32 1) + ret void +} + +define void @__prefetch_read_uniform_nt(i8 *) alwaysinline { + call void @llvm.prefetch(i8 * %0, i32 0, i32 0) + ret void +} -prefetch_read(varying_bool, <$1 x i32>) -prefetch_read(varying_int8, <$1 x i8>) -prefetch_read(varying_int16, <$1 x i16>) -prefetch_read(varying_int32, <$1 x i32>) -prefetch_read(varying_int64, <$1 x i64>) -prefetch_read(varying_float, <$1 x float>) -prefetch_read(varying_double, <$1 x double>) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; assert @@ -2354,11 +2353,10 @@ define void @__masked_store_blend_16(<16 x i16>* nocapture, <16 x i16>, define(`packed_load_and_store', ` -define i32 @__packed_load_active([0 x i32] *, i32 %start_offset, <$1 x i32> * %val_ptr, +define i32 @__packed_load_active(i32 * %baseptr, i32 %start_offset, <$1 x i32> * %val_ptr, <$1 x i32> %full_mask) nounwind alwaysinline { entry: %mask = call i32 @__movmsk(<$1 x i32> %full_mask) - %baseptr = bitcast [0 x i32] * %0 to i32 * %startptr = getelementptr i32 * %baseptr, i32 %start_offset %mask_known = call i1 @__is_compile_time_constant_mask(<$1 x i32> %full_mask) br i1 %mask_known, label %known_mask, label %unknown_mask @@ -2410,11 +2408,10 @@ done: ret i32 %nextoffset } -define i32 @__packed_store_active([0 x i32] *, i32 %start_offset, <$1 x i32> %vals, +define i32 @__packed_store_active(i32 * %baseptr, i32 %start_offset, <$1 x i32> %vals, <$1 x i32> %full_mask) nounwind alwaysinline { entry: %mask = call i32 @__movmsk(<$1 x i32> %full_mask) - %baseptr = bitcast [0 x i32] * %0 to i32 * %startptr = getelementptr i32 * %baseptr, i32 %start_offset %mask_known = call i1 @__is_compile_time_constant_mask(<$1 x i32> %full_mask) br i1 %mask_known, label %known_mask, label %unknown_mask @@ -2686,8 +2683,8 @@ pl_done: define(`gen_gather', ` ;; Define the utility function to do the gather operation for a single element ;; of the type -define <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x $2> %ret, - i32 %lane) nounwind readonly alwaysinline { +define <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x $2> %ret, + i32 %lane) nounwind readonly alwaysinline { ; compute address for this one from the base %offset32 = extractelement <$1 x i32> %offsets, i32 %lane %ptroffset = getelementptr i8 * %ptr, i32 %offset32 @@ -2699,9 +2696,22 @@ define <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x $2> %ret ret <$1 x $2> %updatedret } +define <$1 x $2> @__gather_elt64_$2(i8 * %ptr, <$1 x i64> %offsets, <$1 x $2> %ret, + i32 %lane) nounwind readonly alwaysinline { + ; compute address for this one from the base + %offset32 = extractelement <$1 x i64> %offsets, i32 %lane + %ptroffset = getelementptr i8 * %ptr, i64 %offset32 + %ptrcast = bitcast i8 * %ptroffset to $2 * -define <$1 x $2> @__gather_base_offsets_$2(i8 * %ptr, <$1 x i32> %offsets, - <$1 x i32> %vecmask) nounwind readonly alwaysinline { + ; load value and insert into returned value + %val = load $2 *%ptrcast + %updatedret = insertelement <$1 x $2> %ret, $2 %val, i32 %lane + ret <$1 x $2> %updatedret +} + + +define <$1 x $2> @__gather_base_offsets32_$2(i8 * %ptr, <$1 x i32> %offsets, + <$1 x i32> %vecmask) nounwind readonly alwaysinline { ; We can be clever and avoid the per-lane stuff for gathers if we are willing ; to require that the 0th element of the array being gathered from is always ; legal to read from (and we do indeed require that, given the benefits!) @@ -2713,14 +2723,68 @@ define <$1 x $2> @__gather_base_offsets_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x i32> %vecmask) %newOffsets = load <$1 x i32> * %offsetsPtr - %ret0 = call <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %newOffsets, - <$1 x $2> undef, i32 0) + %ret0 = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %newOffsets, + <$1 x $2> undef, i32 0) forloop(lane, 1, eval($1-1), - `patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt_$2(i8 * %ptr, + `patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %newOffsets, <$1 x $2> %retPREV, i32 LANE) ', `LANE', lane), `PREV', eval(lane-1))') ret <$1 x $2> %ret`'eval($1-1) } + +define <$1 x $2> @__gather_base_offsets64_$2(i8 * %ptr, <$1 x i64> %offsets, + <$1 x i32> %vecmask) nounwind readonly alwaysinline { + ; We can be clever and avoid the per-lane stuff for gathers if we are willing + ; to require that the 0th element of the array being gathered from is always + ; legal to read from (and we do indeed require that, given the benefits!) + ; + ; Set the offset to zero for lanes that are off + %offsetsPtr = alloca <$1 x i64> + store <$1 x i64> zeroinitializer, <$1 x i64> * %offsetsPtr + call void @__masked_store_blend_64(<$1 x i64> * %offsetsPtr, <$1 x i64> %offsets, + <$1 x i32> %vecmask) + %newOffsets = load <$1 x i64> * %offsetsPtr + + %ret0 = call <$1 x $2> @__gather_elt64_$2(i8 * %ptr, <$1 x i64> %newOffsets, + <$1 x $2> undef, i32 0) + forloop(lane, 1, eval($1-1), + `patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt64_$2(i8 * %ptr, + <$1 x i64> %newOffsets, <$1 x $2> %retPREV, i32 LANE) + ', `LANE', lane), `PREV', eval(lane-1))') + ret <$1 x $2> %ret`'eval($1-1) +} + +; fully general 32-bit gather, takes array of pointers encoded as vector of i32s +define <$1 x $2> @__gather32_$2(<$1 x i32> %ptrs, + <$1 x i32> %vecmask) nounwind readonly alwaysinline { + %ret_ptr = alloca <$1 x $2> + per_lane($1, <$1 x i32> %vecmask, ` + %iptr_ID = extractelement <$1 x i32> %ptrs, i32 LANE + %ptr_ID = inttoptr i32 %iptr_ID to $2 * + %val_ID = load $2 * %ptr_ID + %store_ptr_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE + store $2 %val_ID, $2 * %store_ptr_ID + ') + + %ret = load <$1 x $2> * %ret_ptr + ret <$1 x $2> %ret +} + +; fully general 64-bit gather, takes array of pointers encoded as vector of i32s +define <$1 x $2> @__gather64_$2(<$1 x i64> %ptrs, + <$1 x i32> %vecmask) nounwind readonly alwaysinline { + %ret_ptr = alloca <$1 x $2> + per_lane($1, <$1 x i32> %vecmask, ` + %iptr_ID = extractelement <$1 x i64> %ptrs, i32 LANE + %ptr_ID = inttoptr i64 %iptr_ID to $2 * + %val_ID = load $2 * %ptr_ID + %store_ptr_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE + store $2 %val_ID, $2 * %store_ptr_ID + ') + + %ret = load <$1 x $2> * %ret_ptr + ret <$1 x $2> %ret +} ' ) @@ -2735,8 +2799,8 @@ define <$1 x $2> @__gather_base_offsets_$2(i8 * %ptr, <$1 x i32> %offsets, define(`gen_scatter', ` ;; Define the function that descripes the work to do to scatter a single ;; value -define void @__scatter_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values, - i32 %lane) nounwind alwaysinline { +define void @__scatter_elt32_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values, + i32 %lane) nounwind alwaysinline { %offset32 = extractelement <$1 x i32> %offsets, i32 %lane %offset64 = zext i32 %offset32 to i64 %ptrdelta = add i64 %ptr64, %offset64 @@ -2746,13 +2810,57 @@ define void @__scatter_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values ret void } -define void @__scatter_base_offsets_$2(i8* %base, <$1 x i32> %offsets, <$1 x $2> %values, - <$1 x i32> %mask) nounwind alwaysinline { +define void @__scatter_elt64_$2(i64 %ptr64, <$1 x i64> %offsets, <$1 x $2> %values, + i32 %lane) nounwind alwaysinline { + %offset64 = extractelement <$1 x i64> %offsets, i32 %lane + %ptrdelta = add i64 %ptr64, %offset64 + %ptr = inttoptr i64 %ptrdelta to $2 * + %storeval = extractelement <$1 x $2> %values, i32 %lane + store $2 %storeval, $2 * %ptr + ret void +} + +define void @__scatter_base_offsets32_$2(i8* %base, <$1 x i32> %offsets, <$1 x $2> %values, + <$1 x i32> %mask) nounwind alwaysinline { ;; And use the `per_lane' macro to do all of the per-lane work for scatter... %ptr64 = ptrtoint i8 * %base to i64 per_lane($1, <$1 x i32> %mask, ` - call void @__scatter_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values, i32 LANE)') + call void @__scatter_elt32_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values, i32 LANE)') ret void } + +define void @__scatter_base_offsets64_$2(i8* %base, <$1 x i64> %offsets, <$1 x $2> %values, + <$1 x i32> %mask) nounwind alwaysinline { + ;; And use the `per_lane' macro to do all of the per-lane work for scatter... + %ptr64 = ptrtoint i8 * %base to i64 + per_lane($1, <$1 x i32> %mask, ` + call void @__scatter_elt64_$2(i64 %ptr64, <$1 x i64> %offsets, <$1 x $2> %values, i32 LANE)') + ret void +} + +; fully general 32-bit scatter, takes array of pointers encoded as vector of i32s +define void @__scatter32_$2(<$1 x i32> %ptrs, <$1 x $2> %values, + <$1 x i32> %mask) nounwind alwaysinline { + per_lane($1, <$1 x i32> %mask, ` + %iptr_ID = extractelement <$1 x i32> %ptrs, i32 LANE + %ptr_ID = inttoptr i32 %iptr_ID to $2 * + %val_ID = extractelement <$1 x $2> %values, i32 LANE + store $2 %val_ID, $2 * %ptr_ID + ') + ret void +} + +; fully general 64-bit scatter, takes array of pointers encoded as vector of i64s +define void @__scatter64_$2(<$1 x i64> %ptrs, <$1 x $2> %values, + <$1 x i32> %mask) nounwind alwaysinline { + per_lane($1, <$1 x i32> %mask, ` + %iptr_ID = extractelement <$1 x i64> %ptrs, i32 LANE + %ptr_ID = inttoptr i64 %iptr_ID to $2 * + %val_ID = extractelement <$1 x $2> %values, i32 LANE + store $2 %val_ID, $2 * %ptr_ID + ') + ret void +} + ' ) diff --git a/ctx.cpp b/ctx.cpp index 0fd4a760..95840aab 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -239,7 +239,7 @@ FunctionEmitContext::GetInternalMask() { if (VaryingCFDepth() == 0) return LLVMMaskAllOn; else - return LoadInst(internalMaskPointer, NULL, NULL, "load_mask"); + return LoadInst(internalMaskPointer, "load_mask"); } @@ -374,9 +374,8 @@ FunctionEmitContext::EndIf() { // newMask = (oldMask & ~(breakLanes | continueLanes)) llvm::Value *oldMask = GetInternalMask(); - llvm::Value *breakLanes = LoadInst(breakLanesPtr, NULL, NULL, - "break_lanes"); - llvm::Value *continueLanes = LoadInst(continueLanesPtr, NULL, NULL, + llvm::Value *breakLanes = LoadInst(breakLanesPtr, "break_lanes"); + llvm::Value *continueLanes = LoadInst(continueLanesPtr, "continue_lanes"); llvm::Value *breakOrContinueLanes = BinaryOperator(llvm::Instruction::Or, breakLanes, continueLanes, @@ -453,7 +452,7 @@ FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) { // Restore the mask to the given old mask, but leave off any lanes that // executed a return statement. // newMask = (oldMask & ~returnedLanes) - llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, NULL, NULL, + llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, "returned_lanes"); llvm::Value *notReturned = NotOperator(returnedLanes, "~returned_lanes"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::And, @@ -486,7 +485,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) { // breakLanes = breakLanes | mask assert(breakLanesPtr != NULL); llvm::Value *mask = GetInternalMask(); - llvm::Value *breakMask = LoadInst(breakLanesPtr, NULL, NULL, + llvm::Value *breakMask = LoadInst(breakLanesPtr, "break_mask"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, breakMask, "mask|break_mask"); @@ -536,7 +535,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) { assert(continueLanesPtr); llvm::Value *mask = GetInternalMask(); llvm::Value *continueMask = - LoadInst(continueLanesPtr, NULL, NULL, "continue_mask"); + LoadInst(continueLanesPtr, "continue_mask"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continueMask"); StoreInst(newMask, continueLanesPtr); @@ -580,12 +579,11 @@ FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) { // Check to see if (returned lanes | continued lanes | break lanes) is // equal to the value of mask at the start of the loop iteration. If // so, everyone is done and we can jump to the given target - llvm::Value *returned = LoadInst(returnedLanesPtr, NULL, NULL, + llvm::Value *returned = LoadInst(returnedLanesPtr, "returned_lanes"); - llvm::Value *continued = LoadInst(continueLanesPtr, NULL, NULL, + llvm::Value *continued = LoadInst(continueLanesPtr, "continue_lanes"); - llvm::Value *breaked = LoadInst(breakLanesPtr, NULL, NULL, - "break_lanes"); + llvm::Value *breaked = LoadInst(breakLanesPtr, "break_lanes"); llvm::Value *returnedOrContinued = BinaryOperator(llvm::Instruction::Or, returned, continued, "returned|continued"); @@ -619,7 +617,7 @@ FunctionEmitContext::RestoreContinuedLanes() { // mask = mask & continueFlags llvm::Value *mask = GetInternalMask(); - llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, NULL, + llvm::Value *continueMask = LoadInst(continueLanesPtr, "continue_mask"); llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continue_mask"); @@ -663,7 +661,8 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { // in the return value memory; this preserves the return // values from other lanes that may have executed return // statements previously. - StoreInst(retVal, returnValuePtr, GetInternalMask(), returnType); + StoreInst(retVal, returnValuePtr, GetInternalMask(), + PointerType::GetUniform(returnType)); } } @@ -677,8 +676,8 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { else { // Otherwise we update the returnedLanes value by ANDing it with // the current lane mask. - llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, NULL, NULL, - "old_returned_lanes"); + llvm::Value *oldReturnedLanes = + LoadInst(returnedLanesPtr, "old_returned_lanes"); llvm::Value *newReturnedLanes = BinaryOperator(llvm::Instruction::Or, oldReturnedLanes, GetInternalMask(), "old_mask|returned_lanes"); @@ -733,7 +732,7 @@ FunctionEmitContext::LaneMask(llvm::Value *v) { // There should be one with signed int signature, one unsigned int. assert(mm && mm->size() == 2); llvm::Function *fmm = (*mm)[0]->function; - return CallInst(fmm, AtomicType::UniformInt32, v, "val_movmsk"); + return CallInst(fmm, NULL, v, "val_movmsk"); } @@ -777,6 +776,11 @@ FunctionEmitContext::CreateBasicBlock(const char *name) { llvm::Value * FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) { + if (b == NULL) { + assert(m->errorCount > 0); + return NULL; + } + LLVM_TYPE_CONST llvm::ArrayType *at = llvm::dyn_cast(b->getType()); if (at) { @@ -834,7 +838,7 @@ FunctionEmitContext::AddInstrumentationPoint(const char *note) { args.push_back(LaneMask(GetFullMask())); llvm::Function *finst = m->module->getFunction("ISPCInstrument"); - CallInst(finst, AtomicType::Void, args, ""); + CallInst(finst, NULL, args, ""); } @@ -952,17 +956,12 @@ lArrayVectorWidth(LLVM_TYPE_CONST llvm::Type *t) { if (arrayType == NULL) return 0; - // We shouldn't be seeing arrays of anything but vectors or pointers - // (for == and !=) being passed to things like - // FunctionEmitContext::BinaryOperator() as operands + // We shouldn't be seeing arrays of anything but vectors being passed + // to things like FunctionEmitContext::BinaryOperator() as operands. LLVM_TYPE_CONST llvm::VectorType *vectorElementType = llvm::dyn_cast(arrayType->getElementType()); - LLVM_TYPE_CONST llvm::PointerType *pointerElementType = - llvm::dyn_cast(arrayType->getElementType()); assert((vectorElementType != NULL && - (int)vectorElementType->getNumElements() == g->target.vectorWidth) || - (pointerElementType != NULL && - (int)arrayType->getNumElements() == g->target.vectorWidth)); + (int)vectorElementType->getNumElements() == g->target.vectorWidth)); return (int)arrayType->getNumElements(); } @@ -1034,9 +1033,9 @@ FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) { } -// Given the llvm Type that represents an ispc VectorType (or array of -// pointers), return an equally-shaped type with boolean elements. (This -// is the type that will be returned from CmpInst with ispc VectorTypes). +// Given the llvm Type that represents an ispc VectorType, return an +// equally-shaped type with boolean elements. (This is the type that will +// be returned from CmpInst with ispc VectorTypes). static LLVM_TYPE_CONST llvm::Type * lGetMatchingBoolVectorType(LLVM_TYPE_CONST llvm::Type *type) { LLVM_TYPE_CONST llvm::ArrayType *arrayType = @@ -1045,19 +1044,12 @@ lGetMatchingBoolVectorType(LLVM_TYPE_CONST llvm::Type *type) { LLVM_TYPE_CONST llvm::VectorType *vectorElementType = llvm::dyn_cast(arrayType->getElementType()); - if (vectorElementType != NULL) { - assert((int)vectorElementType->getNumElements() == g->target.vectorWidth); - LLVM_TYPE_CONST llvm::Type *base = - llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth); - return llvm::ArrayType::get(base, arrayType->getNumElements()); - } - else { - LLVM_TYPE_CONST llvm::PointerType *pointerElementType = - llvm::dyn_cast(arrayType->getElementType()); - assert(pointerElementType != NULL); - assert((int)arrayType->getNumElements() == g->target.vectorWidth); - return llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth); - } + assert(vectorElementType != NULL); + assert((int)vectorElementType->getNumElements() == g->target.vectorWidth); + + LLVM_TYPE_CONST llvm::Type *base = + llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth); + return llvm::ArrayType::get(base, arrayType->getNumElements()); } @@ -1096,7 +1088,7 @@ FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, llvm::Value * -FunctionEmitContext::SmearScalar(llvm::Value *value, const char *name) { +FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) { if (value == NULL) { assert(m->errorCount > 0); return NULL; @@ -1104,12 +1096,17 @@ FunctionEmitContext::SmearScalar(llvm::Value *value, const char *name) { llvm::Value *ret = NULL; LLVM_TYPE_CONST llvm::Type *eltType = value->getType(); + LLVM_TYPE_CONST llvm::PointerType *pt = llvm::dyn_cast(eltType); - if (pt != NULL) - ret = llvm::UndefValue::get(llvm::ArrayType::get(eltType, - g->target.vectorWidth)); + if (pt != NULL) { + // Varying pointers are represented as vectors of i32/i64s + ret = llvm::UndefValue::get(LLVMTypes::VoidPointerVectorType); + value = PtrToIntInst(value); + } else + // All other varying types are represented as vectors of the + // underlying type. ret = llvm::UndefValue::get(llvm::VectorType::get(eltType, g->target.vectorWidth)); @@ -1118,75 +1115,43 @@ FunctionEmitContext::SmearScalar(llvm::Value *value, const char *name) { llvm::Twine(i); ret = InsertInst(ret, value, i, n.str().c_str()); } + return ret; } llvm::Value * -FunctionEmitContext::BitCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, +FunctionEmitContext::BitCastInst(llvm::Value *value, + LLVM_TYPE_CONST llvm::Type *type, const char *name) { if (value == NULL) { assert(m->errorCount > 0); return NULL; } - LLVM_TYPE_CONST llvm::Type *valType = value->getType(); - LLVM_TYPE_CONST llvm::ArrayType *at = - llvm::dyn_cast(valType); - if (at && llvm::isa(at->getElementType())) { - // If we're bitcasting an array of pointers, we have a varying - // lvalue; apply the corresponding bitcast to each of the - // individual pointers and return the result array. - assert((int)at->getNumElements() == g->target.vectorWidth); - - llvm::Value *ret = - llvm::UndefValue::get(llvm::ArrayType::get(type, g->target.vectorWidth)); - for (int i = 0; i < g->target.vectorWidth; ++i) { - llvm::Value *elt = ExtractInst(value, i); - llvm::Value *bc = BitCastInst(elt, type, name); - ret = InsertInst(ret, bc, i); - } - return ret; - } - else { - llvm::Instruction *inst = - new llvm::BitCastInst(value, type, name ? name : "bitcast", bblock); - AddDebugPos(inst); - return inst; - } + llvm::Instruction *inst = + new llvm::BitCastInst(value, type, name ? name : "bitcast", bblock); + AddDebugPos(inst); + return inst; } llvm::Value * -FunctionEmitContext::PtrToIntInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, - const char *name) { +FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) { if (value == NULL) { assert(m->errorCount > 0); return NULL; } - LLVM_TYPE_CONST llvm::Type *valType = value->getType(); - LLVM_TYPE_CONST llvm::ArrayType *at = - llvm::dyn_cast(valType); - if (at && llvm::isa(at->getElementType())) { - // varying lvalue -> apply ptr to int to the individual pointers - assert((int)at->getNumElements() == g->target.vectorWidth); + if (llvm::isa(value->getType())) + // no-op for varying pointers; they're already vectors of ints + return value; - llvm::Value *ret = - llvm::UndefValue::get(llvm::ArrayType::get(type, g->target.vectorWidth)); - for (int i = 0; i < g->target.vectorWidth; ++i) { - llvm::Value *elt = ExtractInst(value, i); - llvm::Value *p2i = PtrToIntInst(elt, type, name); - ret = InsertInst(ret, p2i, i); - } - return ret; - } - else { - llvm::Instruction *inst = - new llvm::PtrToIntInst(value, type, name ? name : "ptr2int", bblock); - AddDebugPos(inst); - return inst; - } + LLVM_TYPE_CONST llvm::Type *type = LLVMTypes::PointerIntType; + llvm::Instruction *inst = + new llvm::PtrToIntInst(value, type, name ? name : "ptr2int", bblock); + AddDebugPos(inst); + return inst; } @@ -1198,28 +1163,14 @@ FunctionEmitContext::IntToPtrInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type return NULL; } - LLVM_TYPE_CONST llvm::Type *valType = value->getType(); - LLVM_TYPE_CONST llvm::ArrayType *at = - llvm::dyn_cast(valType); - if (at != NULL) { - // varying lvalue -> apply int to ptr to the individual pointers - assert((int)at->getNumElements() == g->target.vectorWidth); + if (llvm::isa(value->getType())) + // no-op for varying pointers; they're already vectors of ints + return value; - llvm::Value *ret = - llvm::UndefValue::get(llvm::ArrayType::get(type, g->target.vectorWidth)); - for (int i = 0; i < g->target.vectorWidth; ++i) { - llvm::Value *elt = ExtractInst(value, i); - llvm::Value *i2p = IntToPtrInst(elt, type, name); - ret = InsertInst(ret, i2p, i); - } - return ret; - } - else { - llvm::Instruction *inst = - new llvm::IntToPtrInst(value, type, name ? name : "int2ptr", bblock); - AddDebugPos(inst); - return inst; - } + llvm::Instruction *inst = + new llvm::IntToPtrInst(value, type, name ? name : "int2ptr", bblock); + AddDebugPos(inst); + return inst; } @@ -1240,48 +1191,6 @@ FunctionEmitContext::TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *t } -llvm::Value * -FunctionEmitContext::ArrayToVectorInst(llvm::Value *array) { - if (array == NULL) { - assert(m->errorCount > 0); - return NULL; - } - - LLVM_TYPE_CONST llvm::ArrayType *at = - llvm::dyn_cast(array->getType()); - assert(at != NULL); - - uint64_t count = at->getNumElements(); - LLVM_TYPE_CONST llvm::VectorType *vt = - llvm::VectorType::get(at->getElementType(), count); - llvm::Value *vec = llvm::UndefValue::get(vt); - for (uint64_t i = 0; i < count; ++i) - vec = InsertInst(vec, ExtractInst(array, i), i); - return vec; -} - - -llvm::Value * -FunctionEmitContext::VectorToArrayInst(llvm::Value *vector) { - if (vector == NULL) { - assert(m->errorCount > 0); - return NULL; - } - - LLVM_TYPE_CONST llvm::VectorType *vt = - llvm::dyn_cast(vector->getType()); - assert(vt != NULL); - - uint64_t count = vt->getNumElements(); - LLVM_TYPE_CONST llvm::ArrayType *at = - llvm::ArrayType::get(vt->getElementType(), count); - llvm::Value *array = llvm::UndefValue::get(at); - for (uint64_t i = 0; i < count; ++i) - array = InsertInst(array, ExtractInst(vector, i), i); - return array; -} - - llvm::Instruction * FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, const char *name) { @@ -1350,27 +1259,132 @@ FunctionEmitContext::ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *ty } +/** Utility routine used by the GetElementPtrInst() methods; given a + pointer to some type (either uniform or varying) and an index (also + either uniform or varying), this returns the new pointer (varying if + appropriate) given by offsetting the base pointer by the index times + the size of the object that the pointer points to. + */ +llvm::Value * +FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, + const Type *ptrType) { + // Find the scale factor for the index (i.e. the size of the object + // that the pointer(s) point(s) to. + const Type *scaleType = ptrType->GetBaseType(); + llvm::Value *scale = g->target.SizeOf(scaleType->LLVMType(g->ctx)); + + bool indexIsVarying = + llvm::isa(index->getType()); + llvm::Value *offset = NULL; + if (indexIsVarying == false) { + // Truncate or sign extend the index as appropriate to a 32 or + // 64-bit type. + if ((g->target.is32Bit || g->opt.force32BitAddressing) && + index->getType() == LLVMTypes::Int64Type) + index = TruncInst(index, LLVMTypes::Int32Type, "trunc_index"); + else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) && + index->getType() == LLVMTypes::Int32Type) + index = SExtInst(index, LLVMTypes::Int64Type, "sext_index"); + + // do a scalar multiply to get the offset as index * scale and then + // smear the result out to be a vector; this is more efficient than + // first promoting both the scale and the index to vectors and then + // multiplying. + offset = BinaryOperator(llvm::Instruction::Mul, scale, index); + offset = SmearUniform(offset, "offset_smear"); + } + else { + // Similarly, truncate or sign extend the index to be a 32 or 64 + // bit vector type + if ((g->target.is32Bit || g->opt.force32BitAddressing) && + index->getType() == LLVMTypes::Int64VectorType) + index = TruncInst(index, LLVMTypes::Int32VectorType, "trunc_index"); + else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) && + index->getType() == LLVMTypes::Int32VectorType) + index = SExtInst(index, LLVMTypes::Int64VectorType, "sext_index"); + + scale = SmearUniform(scale, "scale_smear"); + + // offset = index * scale + offset = BinaryOperator(llvm::Instruction::Mul, scale, index, "offset"); + } + + // For 64-bit targets, if we've been doing our offset calculations in + // 32 bits, we still have to convert to a 64-bit value before we + // actually add the offset to the pointer. + if (g->target.is32Bit == false && g->opt.force32BitAddressing == true) + offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64"); + + // Smear out the pointer to be varying; either the base pointer or the + // index must be varying for this method to be called. + bool baseIsUniform = + (llvm::isa(basePtr->getType())); + assert(baseIsUniform == false || indexIsVarying == true); + llvm::Value *varyingPtr = baseIsUniform ? + SmearUniform(basePtr, "ptr_smear") : basePtr; + + // newPtr = ptr + offset + return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset, "new_ptr"); +} + + +llvm::Value * +FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, + const Type *ptrType, const char *name) { + if (basePtr == NULL || index == NULL) { + assert(m->errorCount > 0); + return NULL; + } + + if (dynamic_cast(ptrType) != NULL) + ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget()); + assert(dynamic_cast(ptrType) != NULL); + + bool indexIsVaryingType = + llvm::isa(index->getType()); + + if (indexIsVaryingType == false && ptrType->IsUniformType() == true) { + // The easy case: both the base pointer and the indices are + // uniform, so just emit the regular LLVM GEP instruction + llvm::Value *ind[1] = { index }; +#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) + llvm::ArrayRef arrayRef(&ind[0], &ind[1]); + llvm::Instruction *inst = + llvm::GetElementPtrInst::Create(basePtr, arrayRef, + name ? name : "gep", bblock); +#else + llvm::Instruction *inst = + llvm::GetElementPtrInst::Create(basePtr, &ind[0], &ind[1], + name ? name : "gep", bblock); +#endif + AddDebugPos(inst); + return inst; + } + else + return applyVaryingGEP(basePtr, index, ptrType); +} + + llvm::Value * FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0, - llvm::Value *index1, const char *name) { + llvm::Value *index1, const Type *ptrType, + const char *name) { if (basePtr == NULL || index0 == NULL || index1 == NULL) { assert(m->errorCount > 0); return NULL; } - // FIXME: do we need need to handle the case of the first index being - // varying? It's not currently needed... - assert(!llvm::isa(index0->getType())); + if (dynamic_cast(ptrType) != NULL) + ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget()); + assert(dynamic_cast(ptrType) != NULL); - LLVM_TYPE_CONST llvm::Type *basePtrType = basePtr->getType(); - LLVM_TYPE_CONST llvm::ArrayType *baseArrayType = - llvm::dyn_cast(basePtrType); - bool baseIsVaryingTypePointer = (baseArrayType != NULL) && - llvm::isa(baseArrayType->getElementType()); - bool indexIsVaryingType = + bool index0IsVaryingType = + llvm::isa(index0->getType()); + bool index1IsVaryingType = llvm::isa(index1->getType()); - if (!indexIsVaryingType && !baseIsVaryingTypePointer) { + if (index0IsVaryingType == false && index1IsVaryingType == false && + ptrType->IsUniformType() == true) { // The easy case: both the base pointer and the indices are // uniform, so just emit the regular LLVM GEP instruction llvm::Value *indices[2] = { index0, index1 }; @@ -1388,148 +1402,183 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0 return inst; } else { - // We have a varying pointer and/or indices; emit the appropriate - // GEP for each of the program instances - llvm::Value *lret = NULL; - for (int i = 0; i < g->target.vectorWidth; ++i) { - // Get the index, either using the same one if it's uniform or - // the one for this lane if it's varying - llvm::Value *indexElt; - if (indexIsVaryingType) - indexElt = ExtractInst(index1, i, "get_array_index"); - else - indexElt = index1; + // Handle the first dimension with index0 + llvm::Value *ptr0 = GetElementPtrInst(basePtr, index0, ptrType); - // Similarly figure out the appropriate base pointer - llvm::Value *aptr; - if (baseIsVaryingTypePointer) - aptr = ExtractInst(basePtr, i, "get_array_index"); - else - aptr = basePtr; + // Now index into the second dimension with index1. First figure + // out the type of ptr0. + const Type *baseType = ptrType->GetBaseType(); + const SequentialType *st = dynamic_cast(baseType); + assert(st != NULL); - // Do the GEP for this lane - llvm::Value *eltPtr = GetElementPtrInst(aptr, index0, indexElt, name); + bool ptr0IsUniform = + llvm::isa(ptr0->getType()); + const Type *ptr0BaseType = st->GetElementType(); + const Type *ptr0Type = ptr0IsUniform ? + PointerType::GetUniform(ptr0BaseType) : + PointerType::GetVarying(ptr0BaseType); - if (lret == NULL) { - // This is kind of a hack: use the type from the GEP to - // figure out the return type and the first time through, - // create an undef value of that type here - LLVM_TYPE_CONST llvm::PointerType *elementPtrType = - llvm::dyn_cast(eltPtr->getType()); - LLVM_TYPE_CONST llvm::Type *elementType = - elementPtrType->getElementType(); - lret = llvm::UndefValue::get(LLVMPointerVectorType(elementType)); - } - - // And insert the result of the GEP into the return value - lret = InsertInst(lret, eltPtr, i, "elt_ptr_store"); - } - return lret; + return applyVaryingGEP(ptr0, index1, ptr0Type); } } llvm::Value * -FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, int v0, int v1, - const char *name) { - return GetElementPtrInst(basePtr, LLVMInt32(v0), LLVMInt32(v1), name); +FunctionEmitContext::AddElementOffset(llvm::Value *basePtr, int elementNum, + const Type *ptrType, const char *name) { + if (ptrType == NULL || ptrType->IsUniformType() || + dynamic_cast(ptrType) != NULL) { + // If the pointer is uniform or we have a reference (which is a + // uniform pointer in the end), we can use the regular LLVM GEP. + llvm::Value *offsets[2] = { LLVMInt32(0), LLVMInt32(elementNum) }; +#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) + llvm::ArrayRef arrayRef(&offsets[0], &offsets[2]); + return llvm::GetElementPtrInst::Create(basePtr, arrayRef, + name ? name : "struct_offset", bblock); +#else + return llvm::GetElementPtrInst::Create(basePtr, &offsets[0], &offsets[2], + name ? name : "struct_offset", bblock); +#endif + + } + + if (dynamic_cast(ptrType) != NULL) + ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget()); + assert(dynamic_cast(ptrType) != NULL); + + // Otherwise do the math to find the offset and add it to the given + // varying pointers + const StructType *st = + dynamic_cast(ptrType->GetBaseType()); + llvm::Value *offset = NULL; + if (st != NULL) + // If the pointer is to a structure, Target::StructOffset() gives + // us the offset in bytes to the given element of the structure + offset = g->target.StructOffset(st->LLVMType(g->ctx), elementNum); + else { + // Otherwise we should have a vector here and the offset is given + // by the element number times the size of the element type of the + // vector. + const VectorType *vt = + dynamic_cast(ptrType->GetBaseType()); + assert(vt != NULL); + llvm::Value *size = + g->target.SizeOf(vt->GetElementType()->LLVMType(g->ctx)); + llvm::Value *scale = (g->target.is32Bit || g->opt.force32BitAddressing) ? + LLVMInt32(elementNum) : LLVMInt64(elementNum); + offset = BinaryOperator(llvm::Instruction::Mul, size, scale); + } + + offset = SmearUniform(offset, "offset_smear"); + + if (g->target.is32Bit == false && g->opt.force32BitAddressing == true) + // If we're doing 32 bit addressing with a 64 bit target, although + // we did the math above in 32 bit, we need to go to 64 bit before + // we add the offset to the varying pointers. + offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64"); + + return BinaryOperator(llvm::Instruction::Add, basePtr, offset, + "struct_ptr_offset"); } llvm::Value * -FunctionEmitContext::LoadInst(llvm::Value *lvalue, llvm::Value *mask, - const Type *type, const char *name) { - if (lvalue == NULL) { +FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) { + if (ptr == NULL) { assert(m->errorCount > 0); return NULL; } - if (llvm::isa(lvalue->getType())) { - // If the lvalue is a straight up regular pointer, then just issue + LLVM_TYPE_CONST llvm::PointerType *pt = + llvm::dyn_cast(ptr->getType()); + assert(pt != NULL); + + // FIXME: it's not clear to me that we generate unaligned vector loads + // of varying stuff out of the front-end any more. (Only by the + // optimization passes that lower gathers to vector loads, I think..) + // So remove this?? + int align = 0; + if (llvm::isa(pt->getElementType())) + align = 1; + llvm::Instruction *inst = new llvm::LoadInst(ptr, name ? name : "load", + false /* not volatile */, + align, bblock); + AddDebugPos(inst); + return inst; +} + + +llvm::Value * +FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, + const Type *ptrType, const char *name) { + if (ptr == NULL) { + assert(m->errorCount > 0); + return NULL; + } + + assert(ptrType != NULL && mask != NULL); + + if (dynamic_cast(ptrType) != NULL) + ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget()); + + assert(dynamic_cast(ptrType) != NULL); + + if (ptrType->IsUniformType()) { + // FIXME: same issue as above load inst regarding alignment... + // + // If the ptr is a straight up regular pointer, then just issue // a regular load. First figure out the alignment; in general we // can just assume the natural alignment (0 here), but for varying // atomic types, we need to make sure that the compiler emits // unaligned vector loads, so we specify a reduced alignment here. int align = 0; - const AtomicType *atomicType = dynamic_cast(type); + const AtomicType *atomicType = + dynamic_cast(ptrType->GetBaseType()); if (atomicType != NULL && atomicType->IsVaryingType()) // We actually just want to align to the vector element // alignment, but can't easily get that here, so just tell LLVM // it's totally unaligned. (This shouldn't make any difference // vs the proper alignment in practice.) align = 1; - llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load", + llvm::Instruction *inst = new llvm::LoadInst(ptr, name ? name : "load", false /* not volatile */, align, bblock); AddDebugPos(inst); return inst; } else { - // Otherwise we should have a varying lvalue and it's time for a - // gather. The "type" parameter only has to be non-NULL for the - // gather path here (we can't reliably figure out all of the type - // information we need from the LLVM::Type, so have to carry the - // ispc type in through this path.. - assert(type != NULL && mask != NULL); - assert(llvm::isa(lvalue->getType())); - return gather(lvalue, mask, type, name); + // Otherwise we should have a varying ptr and it's time for a + // gather. + return gather(ptr, ptrType, mask, name); } } llvm::Value * -FunctionEmitContext::gather(llvm::Value *lvalue, llvm::Value *mask, - const Type *type, const char *name) { +FunctionEmitContext::gather(llvm::Value *ptr, const Type *ptrType, + llvm::Value *mask, const char *name) { // We should have a varying lvalue if we get here... - assert(llvm::dyn_cast(lvalue->getType())); + assert(ptrType->IsVaryingType() && + ptr->getType() == LLVMTypes::VoidPointerVectorType); - LLVM_TYPE_CONST llvm::Type *retType = type->LLVMType(g->ctx); + const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType(); + LLVM_TYPE_CONST llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx); + + const CollectionType *collectionType = + dynamic_cast(ptrType->GetBaseType()); + if (collectionType != NULL) { + // For collections, recursively gather element wise to find the + // result. + llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType); + for (int i = 0; i < collectionType->GetElementCount(); ++i) { + llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType); + const Type *eltPtrType = + PointerType::GetVarying(collectionType->GetElementType(i)); + eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType); - const StructType *st = dynamic_cast(type); - if (st) { - // If we're gathering structures, do an element-wise gather - // recursively. - llvm::Value *retValue = llvm::UndefValue::get(retType); - for (int i = 0; i < st->GetElementCount(); ++i) { - llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i); // This in turn will be another gather - llvm::Value *eltValues = - LoadInst(eltPtrs, mask, st->GetElementType(i), name); - retValue = InsertInst(retValue, eltValues, i, "set_value"); - } - return retValue; - } + llvm::Value *eltValues = LoadInst(eltPtr, mask, eltPtrType, name); - const VectorType *vt = dynamic_cast(type); - if (vt) { - // Similarly, if it's a vector type, do a gather for each of the - // vector elements - llvm::Value *retValue = llvm::UndefValue::get(retType); - // FIXME: yuck. Change lvalues to be pointers to arrays so that - // the GEP stuff in the loop below ends up computing pointers based - // on elements in the vectors rather than incorrectly advancing to - // the next vector... - LLVM_TYPE_CONST llvm::Type *eltType = - vt->GetBaseType()->GetAsUniformType()->LLVMType(g->ctx); - lvalue = BitCastInst(lvalue, llvm::PointerType::get(llvm::ArrayType::get(eltType, 0), 0)); - - for (int i = 0; i < vt->GetElementCount(); ++i) { - llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i); - llvm::Value *eltValues = LoadInst(eltPtrs, mask, vt->GetBaseType(), - name); - retValue = InsertInst(retValue, eltValues, i, "set_value"); - } - return retValue; - } - - const ArrayType *at = dynamic_cast(type); - if (at) { - // Arrays are also handled recursively and element-wise - llvm::Value *retValue = llvm::UndefValue::get(retType); - for (int i = 0; i < at->GetElementCount(); ++i) { - llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i); - llvm::Value *eltValues = LoadInst(eltPtrs, mask, - at->GetElementType(), name); retValue = InsertInst(retValue, eltValues, i, "set_value"); } return retValue; @@ -1539,48 +1588,41 @@ FunctionEmitContext::gather(llvm::Value *lvalue, llvm::Value *mask, // can go and do the actual gather AddInstrumentationPoint("gather"); - llvm::Function *gather = NULL; // Figure out which gather function to call based on the size of // the elements. - const PointerType *pt = dynamic_cast(type); - if (pt != NULL) { - if (g->target.is32bit) - gather = m->module->getFunction("__pseudo_gather_32"); - else - gather = m->module->getFunction("__pseudo_gather_64"); - } - else if (retType == LLVMTypes::DoubleVectorType || - retType == LLVMTypes::Int64VectorType) - gather = m->module->getFunction("__pseudo_gather_64"); - else if (retType == LLVMTypes::FloatVectorType || - retType == LLVMTypes::Int32VectorType) - gather = m->module->getFunction("__pseudo_gather_32"); - else if (retType == LLVMTypes::Int16VectorType) - gather = m->module->getFunction("__pseudo_gather_16"); + const PointerType *pt = dynamic_cast(returnType); + const char *funcName = NULL; + if (pt != NULL) + funcName = g->target.is32Bit ? "__pseudo_gather32_32" : + "__pseudo_gather64_64"; + else if (llvmReturnType == LLVMTypes::DoubleVectorType || + llvmReturnType == LLVMTypes::Int64VectorType) + funcName = g->target.is32Bit ? "__pseudo_gather32_64" : + "__pseudo_gather64_64"; + else if (llvmReturnType == LLVMTypes::FloatVectorType || + llvmReturnType == LLVMTypes::Int32VectorType) + funcName = g->target.is32Bit ? "__pseudo_gather32_32" : + "__pseudo_gather64_32"; + else if (llvmReturnType == LLVMTypes::Int16VectorType) + funcName = g->target.is32Bit ? "__pseudo_gather32_16" : + "__pseudo_gather64_16"; else { - assert(retType == LLVMTypes::Int8VectorType); - gather = m->module->getFunction("__pseudo_gather_8"); + assert(llvmReturnType == LLVMTypes::Int8VectorType); + funcName = g->target.is32Bit ? "__pseudo_gather32_8" : + "__pseudo_gather64_8"; } - assert(gather != NULL); - lvalue = addVaryingOffsetsIfNeeded(lvalue, type); + llvm::Function *gatherFunc = m->module->getFunction(funcName); + assert(gatherFunc != NULL); - llvm::Value *voidlvalue = BitCastInst(lvalue, LLVMTypes::VoidPointerType); - llvm::Value *call = CallInst(gather, type, voidlvalue, mask, name); + llvm::Value *call = CallInst(gatherFunc, NULL, ptr, mask, name); // Add metadata about the source file location so that the // optimization passes can print useful performance warnings if we // can't optimize out this gather addGSMetadata(call, currentPos); - if (pt != NULL) { - LLVM_TYPE_CONST llvm::Type *ptrType = - pt->GetAsUniformType()->LLVMType(g->ctx); - return IntToPtrInst(VectorToArrayInst(call), ptrType, - "gather_bitcast"); - } - else - return BitCastInst(call, retType, "gather_bitcast"); + return BitCastInst(call, llvmReturnType, "gather_bitcast"); } @@ -1617,8 +1659,9 @@ FunctionEmitContext::addGSMetadata(llvm::Value *v, SourcePos pos) { llvm::Value * -FunctionEmitContext::AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType, const char *name, - int align, bool atEntryBlock) { +FunctionEmitContext::AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType, + const char *name, int align, + bool atEntryBlock) { llvm::AllocaInst *inst = NULL; if (atEntryBlock) { // We usually insert it right before the jump instruction at the @@ -1657,91 +1700,89 @@ FunctionEmitContext::AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType, const char instance (that case is handled by scatters). */ void -FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue, - const Type *rvalueType, - llvm::Value *storeMask) { - if (rvalue == NULL || lvalue == NULL) { +FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr, + const Type *ptrType, llvm::Value *mask) { + if (value == NULL || ptr == NULL) { assert(m->errorCount > 0); return; } - assert(llvm::isa(lvalue->getType())); - + assert(dynamic_cast(ptrType) != NULL); + assert(ptrType->IsUniformType()); + + const Type *valueType = ptrType->GetBaseType(); const CollectionType *collectionType = - dynamic_cast(rvalueType); + dynamic_cast(valueType); if (collectionType != NULL) { // Assigning a structure / array / vector. Handle each element // individually with what turns into a recursive call to // makedStore() for (int i = 0; i < collectionType->GetElementCount(); ++i) { - llvm::Value *eltValue = ExtractInst(rvalue, i, "rvalue_member"); - llvm::Value *eltLValue = GetElementPtrInst(lvalue, 0, i, - "struct_lvalue_ptr"); - StoreInst(eltValue, eltLValue, storeMask, - collectionType->GetElementType(i)); + llvm::Value *eltValue = ExtractInst(value, i, "value_member"); + llvm::Value *eltPtr = + AddElementOffset(ptr, i, ptrType, "struct_ptr_ptr"); + const Type *eltPtrType = + PointerType::GetUniform(collectionType->GetElementType(i)); + StoreInst(eltValue, eltPtr, mask, eltPtrType); } return; } - const PointerType *pt = dynamic_cast(rvalueType); - if (pt != NULL) { - if (g->target.is32bit) { - rvalue = PtrToIntInst(rvalue, LLVMTypes::Int32Type, "ptr2int"); - rvalueType = AtomicType::VaryingInt32; - } - else { - rvalue = PtrToIntInst(rvalue, LLVMTypes::Int64Type, "ptr2int"); - rvalueType = AtomicType::VaryingInt64; - } - rvalue = ArrayToVectorInst(rvalue); - } - - // We must have a regular atomic or enumerator type at this point - assert(dynamic_cast(rvalueType) != NULL || - dynamic_cast(rvalueType) != NULL); - rvalueType = rvalueType->GetAsNonConstType(); + // We must have a regular atomic, enumerator, or pointer type at this + // point. + assert(dynamic_cast(valueType) != NULL || + dynamic_cast(valueType) != NULL || + dynamic_cast(valueType) != NULL); + valueType = valueType->GetAsNonConstType(); llvm::Function *maskedStoreFunc = NULL; // Figure out if we need a 8, 16, 32 or 64-bit masked store. - if (rvalueType == AtomicType::VaryingDouble || - rvalueType == AtomicType::VaryingInt64 || - rvalueType == AtomicType::VaryingUInt64) { + if (dynamic_cast(valueType) != NULL) { + if (g->target.is32Bit) + maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_32"); + else + maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_64"); + } + else if (valueType == AtomicType::VaryingDouble || + valueType == AtomicType::VaryingInt64 || + valueType == AtomicType::VaryingUInt64) { maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_64"); - lvalue = BitCastInst(lvalue, LLVMTypes::Int64VectorPointerType, - "lvalue_to_int64vecptr"); - rvalue = BitCastInst(rvalue, LLVMTypes::Int64VectorType, - "rvalue_to_int64"); + ptr = BitCastInst(ptr, LLVMTypes::Int64VectorPointerType, + "ptr_to_int64vecptr"); + value = BitCastInst(value, LLVMTypes::Int64VectorType, + "value_to_int64"); } - else if (rvalueType == AtomicType::VaryingFloat || - rvalueType == AtomicType::VaryingBool || - rvalueType == AtomicType::VaryingInt32 || - rvalueType == AtomicType::VaryingUInt32 || - dynamic_cast(rvalueType) != NULL) { + else if (valueType == AtomicType::VaryingFloat || + valueType == AtomicType::VaryingBool || + valueType == AtomicType::VaryingInt32 || + valueType == AtomicType::VaryingUInt32 || + dynamic_cast(valueType) != NULL) { maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_32"); - lvalue = BitCastInst(lvalue, LLVMTypes::Int32VectorPointerType, - "lvalue_to_int32vecptr"); - if (rvalueType == AtomicType::VaryingFloat) - rvalue = BitCastInst(rvalue, LLVMTypes::Int32VectorType, - "rvalue_to_int32"); + ptr = BitCastInst(ptr, LLVMTypes::Int32VectorPointerType, + "ptr_to_int32vecptr"); + if (valueType == AtomicType::VaryingFloat) + value = BitCastInst(value, LLVMTypes::Int32VectorType, + "value_to_int32"); } - else if (rvalueType == AtomicType::VaryingInt16 || - rvalueType == AtomicType::VaryingUInt16) { + else if (valueType == AtomicType::VaryingInt16 || + valueType == AtomicType::VaryingUInt16) { maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_16"); - lvalue = BitCastInst(lvalue, LLVMTypes::Int16VectorPointerType, - "lvalue_to_int16vecptr"); + ptr = BitCastInst(ptr, LLVMTypes::Int16VectorPointerType, + "ptr_to_int16vecptr"); } - else if (rvalueType == AtomicType::VaryingInt8 || - rvalueType == AtomicType::VaryingUInt8) { + else if (valueType == AtomicType::VaryingInt8 || + valueType == AtomicType::VaryingUInt8) { maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_8"); - lvalue = BitCastInst(lvalue, LLVMTypes::Int8VectorPointerType, - "lvalue_to_int8vecptr"); + ptr = BitCastInst(ptr, LLVMTypes::Int8VectorPointerType, + "ptr_to_int8vecptr"); } + assert(maskedStoreFunc != NULL); std::vector args; - args.push_back(lvalue); - args.push_back(rvalue); - args.push_back(storeMask); - CallInst(maskedStoreFunc, AtomicType::Void, args); + args.push_back(ptr); + args.push_back(value); + args.push_back(mask); + CallInst(maskedStoreFunc, NULL, args); } @@ -1753,143 +1794,127 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue, program instance are on. If they're off, don't do anything. */ void -FunctionEmitContext::scatter(llvm::Value *rvalue, llvm::Value *lvalue, - llvm::Value *storeMask, const Type *rvalueType) { - assert(rvalueType->IsVaryingType()); - assert(llvm::isa(lvalue->getType())); +FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr, + const Type *ptrType, llvm::Value *mask) { + assert(dynamic_cast(ptrType) != NULL); + assert(ptrType->IsVaryingType()); - const StructType *structType = dynamic_cast(rvalueType); - if (structType) { - // Scatter the struct elements individually - for (int i = 0; i < structType->GetElementCount(); ++i) { - llvm::Value *lv = GetElementPtrInst(lvalue, 0, i); - llvm::Value *rv = ExtractInst(rvalue, i); - scatter(rv, lv, storeMask, structType->GetElementType(i)); - } - return; - } - - const VectorType *vt = dynamic_cast(rvalueType); - if (vt) { - // FIXME: yuck. Change lvalues to be pointers to arrays so that - // the GEP stuff in the loop below ends up computing pointers based - // on elements in the vectors rather than incorrectly advancing to - // the next vector... - LLVM_TYPE_CONST llvm::Type *eltType = - vt->GetBaseType()->GetAsUniformType()->LLVMType(g->ctx); - lvalue = BitCastInst(lvalue, llvm::PointerType::get(llvm::ArrayType::get(eltType, 0), 0)); - - for (int i = 0; i < vt->GetElementCount(); ++i) { - llvm::Value *lv = GetElementPtrInst(lvalue, 0, i); - llvm::Value *rv = ExtractInst(rvalue, i); - scatter(rv, lv, storeMask, vt->GetElementType()); - } - return; - } + const Type *valueType = ptrType->GetBaseType(); // I think this should be impossible - assert(dynamic_cast(rvalueType) == NULL); + assert(dynamic_cast(valueType) == NULL); - const PointerType *pt = dynamic_cast(rvalueType); + const CollectionType *collectionType = dynamic_cast(valueType); + if (collectionType != NULL) { + // Scatter the collection elements individually + for (int i = 0; i < collectionType->GetElementCount(); ++i) { + llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType); + llvm::Value *eltValue = ExtractInst(value, i); + const Type *eltPtrType = + PointerType::GetVarying(collectionType->GetElementType(i)); + eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType); + scatter(eltValue, eltPtr, eltPtrType, mask); + } + return; + } + + const PointerType *pt = dynamic_cast(valueType); // And everything should be a pointer or atomic from here on out... assert(pt != NULL || - dynamic_cast(rvalueType) != NULL); + dynamic_cast(valueType) != NULL); - llvm::Function *func = NULL; - LLVM_TYPE_CONST llvm::Type *type = rvalue->getType(); - if (pt != NULL) { - if (g->target.is32bit) { - rvalue = PtrToIntInst(rvalue, LLVMTypes::Int32Type); - rvalue = ArrayToVectorInst(rvalue); - func = m->module->getFunction("__pseudo_scatter_32"); - } - else { - rvalue = PtrToIntInst(rvalue, LLVMTypes::Int64Type); - rvalue = ArrayToVectorInst(rvalue); - func = m->module->getFunction("__pseudo_scatter_64"); - } - } + LLVM_TYPE_CONST llvm::Type *type = value->getType(); + const char *funcName = NULL; + if (pt != NULL) + funcName = g->target.is32Bit ? "__pseudo_scatter32_32" : + "__pseudo_scatter64_64"; else if (type == LLVMTypes::DoubleVectorType || - type == LLVMTypes::Int64VectorType) { - func = m->module->getFunction("__pseudo_scatter_64"); - rvalue = BitCastInst(rvalue, LLVMTypes::Int64VectorType, "rvalue2int"); + type == LLVMTypes::Int64VectorType) { + funcName = g->target.is32Bit ? "__pseudo_scatter32_64" : + "__pseudo_scatter64_64"; + value = BitCastInst(value, LLVMTypes::Int64VectorType, "value2int"); } else if (type == LLVMTypes::FloatVectorType || type == LLVMTypes::Int32VectorType) { - func = m->module->getFunction("__pseudo_scatter_32"); - rvalue = BitCastInst(rvalue, LLVMTypes::Int32VectorType, "rvalue2int"); + funcName = g->target.is32Bit ? "__pseudo_scatter32_32" : + "__pseudo_scatter64_32"; + value = BitCastInst(value, LLVMTypes::Int32VectorType, "value2int"); } else if (type == LLVMTypes::Int16VectorType) - func = m->module->getFunction("__pseudo_scatter_16"); + funcName = g->target.is32Bit ? "__pseudo_scatter32_16" : + "__pseudo_scatter64_16"; else if (type == LLVMTypes::Int8VectorType) - func = m->module->getFunction("__pseudo_scatter_8"); - assert(func != NULL); + funcName = g->target.is32Bit ? "__pseudo_scatter32_8" : + "__pseudo_scatter64_8"; + + llvm::Function *scatterFunc = m->module->getFunction(funcName); + assert(scatterFunc != NULL); AddInstrumentationPoint("scatter"); - lvalue = addVaryingOffsetsIfNeeded(lvalue, rvalueType); - - llvm::Value *voidlvalue = BitCastInst(lvalue, LLVMTypes::VoidPointerType); std::vector args; - args.push_back(voidlvalue); - args.push_back(rvalue); - args.push_back(storeMask); - llvm::Value *inst = CallInst(func, AtomicType::Void, args); + args.push_back(ptr); + args.push_back(value); + args.push_back(mask); + llvm::Value *inst = CallInst(scatterFunc, NULL, args); addGSMetadata(inst, currentPos); } void -FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, - const char *name) { - if (rvalue == NULL || lvalue == NULL) { +FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) { + if (value == NULL || ptr == NULL) { // may happen due to error elsewhere assert(m->errorCount > 0); return; } llvm::Instruction *inst; - if (llvm::isa(rvalue->getType())) - // Specify an unaligned store, since we don't know that the lvalue + if (llvm::isa(value->getType())) + // FIXME: same for load--do we still need/want this?? + // Specify an unaligned store, since we don't know that the ptr // will in fact be aligned to a vector width here. (Actually // should be aligned to the alignment of the vector elment type...) - inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, + inst = new llvm::StoreInst(value, ptr, false /* not volatile */, 1, bblock); else - inst = new llvm::StoreInst(rvalue, lvalue, bblock); + inst = new llvm::StoreInst(value, ptr, bblock); AddDebugPos(inst); } void -FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, - llvm::Value *storeMask, const Type *rvalueType, - const char *name) { - if (rvalue == NULL || lvalue == NULL) { +FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, + llvm::Value *mask, const Type *ptrType) { + if (value == NULL || ptr == NULL) { // may happen due to error elsewhere assert(m->errorCount > 0); return; } + if (dynamic_cast(ptrType) != NULL) + ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget()); + // Figure out what kind of store we're doing here - if (rvalueType->IsUniformType()) { - // The easy case; a regular store, natural alignment is fine - llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, bblock); - AddDebugPos(si); + if (ptrType->IsUniformType()) { + if (ptrType->GetBaseType()->IsUniformType()) + // the easy case + StoreInst(value, ptr); + else if (mask == LLVMMaskAllOn) + // Otherwise it is a masked store unless we can determine that the + // mask is all on... (Unclear if this check is actually useful.) + StoreInst(value, ptr); + else + maskedStore(value, ptr, ptrType, mask); } - else if (llvm::isa(lvalue->getType())) - // We have a varying lvalue (an array of pointers), so it's time to + else { + assert(ptrType->IsVaryingType()); + // We have a varying ptr (an array of pointers), so it's time to // scatter - scatter(rvalue, lvalue, storeMask, rvalueType); - else if (storeMask == LLVMMaskAllOn) { - // Otherwise it is a masked store unless we can determine that the - // mask is all on... - StoreInst(rvalue, lvalue, name); + scatter(value, ptr, ptrType, mask); } - else - maskedStore(rvalue, lvalue, rvalueType, storeMask); } @@ -1983,33 +2008,32 @@ FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0, } -/* Given a value representing a function to be called or possibly-varying - pointer to a function to be called, figure out how many arguments the - function has. */ +/** Given a value representing a function to be called or possibly-varying + pointer to a function to be called, figure out how many arguments the + function has. */ static unsigned int -lCalleeArgCount(llvm::Value *callee) { +lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) { LLVM_TYPE_CONST llvm::FunctionType *ft = llvm::dyn_cast(callee->getType()); + if (ft == NULL) { LLVM_TYPE_CONST llvm::PointerType *pt = llvm::dyn_cast(callee->getType()); if (pt == NULL) { - // varying... - LLVM_TYPE_CONST llvm::ArrayType *at = - llvm::dyn_cast(callee->getType()); - assert(at != NULL); - pt = llvm::dyn_cast(at->getElementType()); - assert(pt != NULL); + // varying--in this case, it must be the version of the + // function that takes a mask + return funcType->GetNumParameters() + 1; } ft = llvm::dyn_cast(pt->getElementType()); } + assert(ft != NULL); return ft->getNumParams(); } llvm::Value * -FunctionEmitContext::CallInst(llvm::Value *func, const Type *returnType, +FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector &args, const char *name) { if (func == NULL) { @@ -2021,17 +2045,13 @@ FunctionEmitContext::CallInst(llvm::Value *func, const Type *returnType, // Most of the time, the mask is passed as the last argument. this // isn't the case for things like intrinsics, builtins, and extern "C" // functions from the application. Add the mask if it's needed. - unsigned int calleeArgCount = lCalleeArgCount(func); + unsigned int calleeArgCount = lCalleeArgCount(func, funcType); assert(argVals.size() + 1 == calleeArgCount || argVals.size() == calleeArgCount); if (argVals.size() + 1 == calleeArgCount) argVals.push_back(GetFullMask()); - LLVM_TYPE_CONST llvm::Type *funcType = func->getType(); - LLVM_TYPE_CONST llvm::ArrayType *funcArrayType = - llvm::dyn_cast(funcType); - - if (funcArrayType == NULL) { + if (llvm::isa(func->getType()) == false) { // Regular 'uniform' function call--just one function or function // pointer, so just emit the IR directly. #if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) @@ -2047,10 +2067,10 @@ FunctionEmitContext::CallInst(llvm::Value *func, const Type *returnType, } else { // Emit the code for a varying function call, where we have an - // array of function pointers, one for each program instance. The + // vector of function pointers, one for each program instance. The // basic strategy is that we go through the function pointers, and // for the executing program instances, for each unique function - // pointer that's in the array, call that function with a mask + // pointer that's in the vector, call that function with a mask // equal to the set of active program instances that also have that // function pointer. When all unique function pointers have been // called, we're done. @@ -2059,20 +2079,17 @@ FunctionEmitContext::CallInst(llvm::Value *func, const Type *returnType, llvm::BasicBlock *bbCall = CreateBasicBlock("varying_funcall_call"); llvm::BasicBlock *bbDone = CreateBasicBlock("varying_funcall_done"); + // Get the current mask value so we can restore it later llvm::Value *origMask = GetInternalMask(); - // First allocate memory to accumulate the various lanes' return - // values... + // First allocate memory to accumulate the various program + // instances' return values... + const Type *returnType = funcType->GetReturnType(); LLVM_TYPE_CONST llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx); llvm::Value *resultPtr = NULL; if (llvmReturnType->isVoidTy() == false) resultPtr = AllocaInst(llvmReturnType); - // Store the function pointers into an array so that we can index - // into them.. - llvm::Value *funcPtrArray = AllocaInst(funcType); - StoreInst(func, funcPtrArray); - // The memory pointed to by maskPointer tracks the set of program // instances for which we still need to call the function they are // pointing to. It starts out initialized with the mask of @@ -2087,7 +2104,7 @@ FunctionEmitContext::CallInst(llvm::Value *func, const Type *returnType, // bbTest: are any lanes of the mask still on? If so, jump to // bbCall SetCurrentBasicBlock(bbTest); { - llvm::Value *maskLoad = LoadInst(maskPtr, NULL, NULL); + llvm::Value *maskLoad = LoadInst(maskPtr); llvm::Value *any = Any(maskLoad); BranchInst(bbCall, bbDone, any); } @@ -2097,39 +2114,27 @@ FunctionEmitContext::CallInst(llvm::Value *func, const Type *returnType, SetCurrentBasicBlock(bbCall); { // Figure out the first lane that still needs its function // pointer to be called. - llvm::Value *currentMask = LoadInst(maskPtr, NULL, NULL); + llvm::Value *currentMask = LoadInst(maskPtr); llvm::Function *cttz = m->module->getFunction("__count_trailing_zeros"); assert(cttz != NULL); - llvm::Value *firstLane = CallInst(cttz, AtomicType::UniformInt32, - LaneMask(currentMask), "first_lane"); + llvm::Value *firstLane = CallInst(cttz, NULL, LaneMask(currentMask), + "first_lane"); // Get the pointer to the function we're going to call this time through: - // ftpr = funcPtrArray[firstLane] - llvm::Value *fpOffset = - GetElementPtrInst(funcPtrArray, LLVMInt32(0), firstLane, - "func_offset_ptr"); - llvm::Value *fptr = LoadInst(fpOffset, NULL, NULL); + // ftpr = func[firstLane] + llvm::Value *fptr = + llvm::ExtractElementInst::Create(func, firstLane, + "extract_fptr", bblock); // Smear it out into an array of function pointers - llvm::Value *fptrSmear = SmearScalar(fptr, "func_ptr"); - - // Now convert the smeared array of function pointers and the - // given array of function pointers to vectors of int32s or - // int64s, where the pointer has been cast to an int of the - // appropraite size for the compilation target. - LLVM_TYPE_CONST llvm::Type *ptrIntType = g->target.is32bit ? - LLVMTypes::Int32Type : LLVMTypes::Int64Type; - llvm::Value *fpSmearAsVec = - ArrayToVectorInst(PtrToIntInst(fptrSmear, ptrIntType)); - llvm::Value *fpOrigAsVec = - ArrayToVectorInst(PtrToIntInst(func, ptrIntType)); + llvm::Value *fptrSmear = SmearUniform(fptr, "func_ptr"); // fpOverlap = (fpSmearAsVec == fpOrigAsVec). This gives us a // mask for the set of program instances that have the same // value for their function pointer. llvm::Value *fpOverlap = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, - fpSmearAsVec, fpOrigAsVec); + fptrSmear, func); fpOverlap = I1VecToBoolVec(fpOverlap); // Figure out the mask to use when calling the function @@ -2144,14 +2149,23 @@ FunctionEmitContext::CallInst(llvm::Value *func, const Type *returnType, // Set the mask SetInternalMask(callMask); + // bitcast the i32/64 function pointer to the actual function + // pointer type (the variant that includes a mask). + LLVM_TYPE_CONST llvm::Type *llvmFuncType = + funcType->LLVMFunctionType(g->ctx, true); + LLVM_TYPE_CONST llvm::Type *llvmFPtrType = + llvm::PointerType::get(llvmFuncType, 0); + llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType); + // Call the function: callResult = call ftpr(args, args, call mask) - llvm::Value *callResult = CallInst(fptr, returnType, args, name); + llvm::Value *callResult = CallInst(fptrCast, funcType, args, name); // Now, do a masked store into the memory allocated to // accumulate the result using the call mask. if (callResult != NULL) { assert(resultPtr != NULL); - StoreInst(callResult, resultPtr, callMask, returnType); + StoreInst(callResult, resultPtr, callMask, + PointerType::GetUniform(returnType)); } else assert(resultPtr == NULL); @@ -2175,28 +2189,28 @@ FunctionEmitContext::CallInst(llvm::Value *func, const Type *returnType, // accumulated in the result memory. SetCurrentBasicBlock(bbDone); SetInternalMask(origMask); - return LoadInst(resultPtr, NULL, NULL); + return resultPtr ? LoadInst(resultPtr) : NULL; } } llvm::Value * -FunctionEmitContext::CallInst(llvm::Value *func, const Type *returnType, +FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg, const char *name) { std::vector args; args.push_back(arg); - return CallInst(func, returnType, args, name); + return CallInst(func, funcType, args, name); } llvm::Value * -FunctionEmitContext::CallInst(llvm::Value *func, const Type *returnType, +FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg0, llvm::Value *arg1, const char *name) { std::vector args; args.push_back(arg0); args.push_back(arg1); - return CallInst(func, returnType, args, name); + return CallInst(func, funcType, args, name); } @@ -2210,8 +2224,7 @@ FunctionEmitContext::ReturnInst() { if (returnValuePtr != NULL) { // We have value(s) to return; load them from their storage // location - llvm::Value *retVal = LoadInst(returnValuePtr, NULL, NULL, - "return_value"); + llvm::Value *retVal = LoadInst(returnValuePtr, "return_value"); rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock); } else { @@ -2249,10 +2262,17 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee, llvm::Function *falloc = m->module->getFunction("ISPCAlloc"); assert(falloc != NULL); + llvm::Value *structSize = g->target.SizeOf(argStructType); + if (structSize->getType() != LLVMTypes::Int64Type) + // ISPCAlloc expects the size as an uint64_t, but on 32-bit + // targets, SizeOf returns a 32-bit value + structSize = ZExtInst(structSize, LLVMTypes::Int64Type, + "struct_size_to_64"); int align = 4 * RoundUpPow2(g->target.nativeVectorWidth); + std::vector allocArgs; allocArgs.push_back(launchGroupHandlePtr); - allocArgs.push_back(g->target.SizeOf(argStructType)); + allocArgs.push_back(structSize); allocArgs.push_back(LLVMInt32(align)); llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr"); llvm::Value *argmem = BitCastInst(voidmem, pt); @@ -2260,15 +2280,15 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee, // Copy the values of the parameters into the appropriate place in // the argument block for (unsigned int i = 0; i < argVals.size(); ++i) { - llvm::Value *ptr = GetElementPtrInst(argmem, 0, i, "funarg"); + llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg"); // don't need to do masked store here, I think StoreInst(argVals[i], ptr); } // copy in the mask llvm::Value *mask = GetFullMask(); - llvm::Value *ptr = GetElementPtrInst(argmem, 0, argVals.size(), - "funarg_mask"); + llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL, + "funarg_mask"); StoreInst(mask, ptr); // And emit the call to the user-supplied task launch function, passing @@ -2282,13 +2302,13 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee, args.push_back(fptr); args.push_back(voidmem); args.push_back(launchCount); - return CallInst(flaunch, AtomicType::Void, args, ""); + return CallInst(flaunch, NULL, args, ""); } void FunctionEmitContext::SyncInst() { - llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr, NULL, NULL); + llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr); llvm::Value *nullPtrValue = llvm::Constant::getNullValue(LLVMTypes::VoidPointerType); llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, @@ -2301,7 +2321,7 @@ FunctionEmitContext::SyncInst() { llvm::Function *fsync = m->module->getFunction("ISPCSync"); if (fsync == NULL) FATAL("Couldn't find ISPCSync declaration?!"); - CallInst(fsync, AtomicType::Void, launchGroupHandle, ""); + CallInst(fsync, NULL, launchGroupHandle, ""); BranchInst(bPostSync); SetCurrentBasicBlock(bPostSync); @@ -2309,43 +2329,46 @@ FunctionEmitContext::SyncInst() { /** When we gathering from or scattering to a varying atomic type, we need - to add an appropraite toffset to the final address for each lane right + to add an appropraite offset to the final address for each lane right before we use it. Given a varying pointer we're about to use and its type, this function determines whether these offsets are needed and returns an updated pointer that incorporates these offsets if needed. */ llvm::Value * -FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *type) { - // We should only have varying pointers here, which are represented as - // arrays of pointers in ispc. - LLVM_TYPE_CONST llvm::ArrayType *at = - llvm::dyn_cast(ptr->getType()); - assert(at != NULL); - LLVM_TYPE_CONST llvm::PointerType *pt = - llvm::dyn_cast(at->getElementType()); - assert(pt != NULL); +FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, + const Type *ptrType) { + // This should only be called for varying pointers + const PointerType *pt = dynamic_cast(ptrType); + assert(pt && pt->IsVaryingType()); - // If we have pointers to vector types, e.g. [8 x <8 x float> *], then - // the data we're gathering from/scattering to is varying in memory. - // If we have pointers to scalar types, e.g. [8 x float *], then the - // data is uniform in memory and doesn't need any additional offsets. - if (pt->getElementType()->isIntegerTy() || - pt->getElementType()->isFloatingPointTy() || - pt->getElementType()->isPointerTy()) + const Type *baseType = ptrType->GetBaseType(); + assert(dynamic_cast(baseType) != NULL || + dynamic_cast(baseType) != NULL || + dynamic_cast(baseType)); + if (baseType->IsUniformType()) return ptr; + + // Find the size of a uniform element of the varying type + LLVM_TYPE_CONST llvm::Type *llvmBaseUniformType = + baseType->GetAsUniformType()->LLVMType(g->ctx); + llvm::Value *unifSize = g->target.SizeOf(llvmBaseUniformType); + unifSize = SmearUniform(unifSize); - llvm::Value *varyingOffsets = llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int i = 0; i < g->target.vectorWidth; ++i) - varyingOffsets = InsertInst(varyingOffsets, LLVMInt32(i), i, - "varying_delta"); + // Compute offset = <0, 1, .. > * unifSize + llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType()); + for (int i = 0; i < g->target.vectorWidth; ++i) { + llvm::Value *iValue = (g->target.is32Bit || g->opt.force32BitAddressing) ? + LLVMInt32(i) : LLVMInt64(i); + varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta"); + } + llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize, + varyingOffsets); + + if (g->opt.force32BitAddressing == true && g->target.is32Bit == false) + // On 64-bit targets where we're doing 32-bit addressing + // calculations, we need to convert to an i64 vector before adding + // to the pointer + offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64"); - // Cast the pointer type to the corresponding uniform type--e.g. cast - // <8 x float> * to float *s. - LLVM_TYPE_CONST llvm::Type *unifType = type->GetAsUniformType()->LLVMType(g->ctx); - LLVM_TYPE_CONST llvm::PointerType *ptrCastType = - llvm::PointerType::get(llvm::ArrayType::get(unifType, 0), 0); - ptr = BitCastInst(ptr, ptrCastType, "ptr2unif"); - - // And now we can do the per-lane offsets... - return GetElementPtrInst(ptr, LLVMInt32(0), varyingOffsets); + return BinaryOperator(llvm::Instruction::Add, ptr, offset); } diff --git a/ctx.h b/ctx.h index b4677acb..b4472f1c 100644 --- a/ctx.h +++ b/ctx.h @@ -311,20 +311,13 @@ public: /** Given a scalar value, return a vector of the same type (or an array, for pointer types). */ - llvm::Value *SmearScalar(llvm::Value *value, const char *name = NULL); + llvm::Value *SmearUniform(llvm::Value *value, const char *name = NULL); llvm::Value *BitCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, const char *name = NULL); - llvm::Value *PtrToIntInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, - const char *name = NULL); + llvm::Value *PtrToIntInst(llvm::Value *value, const char *name = NULL); llvm::Value *IntToPtrInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, const char *name = NULL); - /** Given a value of some array type, return the corresponding value of - vector type. */ - llvm::Value *ArrayToVectorInst(llvm::Value *value); - /** Given a value of some vector type, return the corresponding value of - array type. */ - llvm::Value *VectorToArrayInst(llvm::Value *value); llvm::Instruction *TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, const char *name = NULL); @@ -337,26 +330,37 @@ public: llvm::Instruction *ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, const char *name = NULL); - /** This GEP method is a generalization of the standard one in LLVM; it - supports both uniform and varying basePtr values (an array of - pointers) as well as uniform and varying index values (arrays of - indices). */ + /** These GEP methods are generalizations of the standard ones in LLVM; + they support both uniform and varying basePtr values as well as + uniform and varying index values (arrays of indices). Varying base + pointers are expected to come in as vectors of i32/i64 (depending + on the target), since LLVM doesn't currently support vectors of + pointers. The underlying type of the base pointer must be provided + via the ptrType parameter */ + llvm::Value *GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, + const Type *ptrType, const char *name = NULL); llvm::Value *GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0, - llvm::Value *index1, const char *name = NULL); - - /** This is a convenience method to generate a GEP instruction with - indices with values with known constant values as the ispc program - is being compiled. */ - llvm::Value *GetElementPtrInst(llvm::Value *basePtr, int v0, int v1, + llvm::Value *index1, const Type *ptrType, const char *name = NULL); + /** This method returns a new pointer that represents offsetting the + given base pointer to point at the given element number of the + structure type that the base pointer points to. (The provided + pointer must be a pointer to a structure type. The ptrType gives + the type of the pointer, though it may be NULL if the base pointer + is uniform. */ + llvm::Value *AddElementOffset(llvm::Value *basePtr, int elementNum, + const Type *ptrType, const char *name = NULL); + /** Load from the memory location(s) given by lvalue, using the given mask. The lvalue may be varying, in which case this corresponds to a gather from the multiple memory locations given by the array of pointer values given by the lvalue. If the lvalue is not varying, then both the mask pointer and the type pointer may be NULL. */ - llvm::Value *LoadInst(llvm::Value *lvalue, llvm::Value *mask, - const Type *type, const char *name = NULL); + llvm::Value *LoadInst(llvm::Value *ptr, llvm::Value *mask, + const Type *ptrType, const char *name = NULL); + + llvm::Value *LoadInst(llvm::Value *ptr, const char *name = NULL); /** Emits an alloca instruction to allocate stack storage for the given type. If a non-zero alignment is specified, the object is also @@ -370,16 +374,14 @@ public: /** Standard store instruction; for this variant, the lvalue must be a single pointer, not a varying lvalue. */ - void StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, - const char *name = NULL); + void StoreInst(llvm::Value *value, llvm::Value *ptr); /** In this variant of StoreInst(), the lvalue may be varying. If so, this corresponds to a scatter. Whether the lvalue is uniform of varying, the given storeMask is used to mask the stores so that they only execute for the active program instances. */ - void StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, - llvm::Value *storeMask, const Type *rvalueType, - const char *name = NULL); + void StoreInst(llvm::Value *value, llvm::Value *ptr, + llvm::Value *storeMask, const Type *ptrType); void BranchInst(llvm::BasicBlock *block); void BranchInst(llvm::BasicBlock *trueBlock, llvm::BasicBlock *falseBlock, @@ -401,20 +403,22 @@ public: llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, const char *name = NULL); - /** Emits IR to do a function call with the given arguments. The - function return type must be provided in returnType. */ - llvm::Value *CallInst(llvm::Value *func, const Type *returnType, + /** Emits IR to do a function call with the given arguments. If the + function type is a varying function pointer type, its full type + must be provided in funcType. funcType can be NULL if func is a + uniform function pointer. */ + llvm::Value *CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector &args, const char *name = NULL); /** This is a convenience method that issues a call instruction to a function that takes just a single argument. */ - llvm::Value *CallInst(llvm::Value *func, const Type *returnType, + llvm::Value *CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg, const char *name = NULL); /** This is a convenience method that issues a call instruction to a function that takes two arguments. */ - llvm::Value *CallInst(llvm::Value *func, const Type *returnType, + llvm::Value *CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg0, llvm::Value *arg1, const char *name = NULL); @@ -530,15 +534,18 @@ private: void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target); llvm::Value *emitGatherCallback(llvm::Value *lvalue, llvm::Value *retPtr); + llvm::Value *applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index, + const Type *ptrType); + void restoreMaskGivenReturns(llvm::Value *oldMask); - void scatter(llvm::Value *rvalue, llvm::Value *lvalue, - llvm::Value *maskPtr, const Type *rvalueType); - llvm::Value *gather(llvm::Value *lvalue, llvm::Value *mask, - const Type *type, const char *name); - void maskedStore(llvm::Value *rvalue, llvm::Value *lvalue, - const Type *rvalueType, llvm::Value *maskPtr); - llvm::Value *addVaryingOffsetsIfNeeded(llvm::Value *value, const Type *type); + void scatter(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, + llvm::Value *mask); + void maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, + llvm::Value *mask); + llvm::Value *gather(llvm::Value *ptr, const Type *ptrType, llvm::Value *mask, + const char *name); + llvm::Value *addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType); }; #endif // ISPC_CTX_H diff --git a/decl.cpp b/decl.cpp index 1dad7571..e17ed88c 100644 --- a/decl.cpp +++ b/decl.cpp @@ -46,12 +46,14 @@ #include #include +/** Given a Type and a set of type qualifiers, apply the type qualifiers to + the type, returning the type that is the result. +*/ static const Type * lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) { if (type == NULL) return NULL; - // Account for 'unsigned' and 'const' qualifiers in the type if ((typeQualifiers & TYPEQUAL_UNSIGNED) != 0) { const Type *unsignedType = type->GetAsUnsignedType(); if (unsignedType != NULL) @@ -60,23 +62,21 @@ lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) { Error(pos, "\"unsigned\" qualifier is illegal with \"%s\" type.", type->GetString().c_str()); } + if ((typeQualifiers & TYPEQUAL_CONST) != 0) type = type->GetAsConstType(); - // if uniform/varying is specified explicitly, then go with that - if (dynamic_cast(type) == NULL) { - if ((typeQualifiers & TYPEQUAL_UNIFORM) != 0) + if ((typeQualifiers & TYPEQUAL_UNIFORM) != 0) + type = type->GetAsUniformType(); + else if ((typeQualifiers & TYPEQUAL_VARYING) != 0) + type = type->GetAsVaryingType(); + else { + // otherwise, structs are uniform by default and everything + // else is varying by default + if (dynamic_cast(type->GetBaseType()) != NULL) type = type->GetAsUniformType(); - else if ((typeQualifiers & TYPEQUAL_VARYING) != 0) + else type = type->GetAsVaryingType(); - else { - // otherwise, structs are uniform by default and everything - // else is varying by default - if (dynamic_cast(type->GetBaseType()) != NULL) - type = type->GetAsUniformType(); - else - type = type->GetAsVaryingType(); - } } return type; @@ -127,7 +127,6 @@ DeclSpecs::Print() const { if (typeQualifiers & TYPEQUAL_UNIFORM) printf("uniform "); if (typeQualifiers & TYPEQUAL_VARYING) printf("varying "); if (typeQualifiers & TYPEQUAL_TASK) printf("task "); - if (typeQualifiers & TYPEQUAL_REFERENCE) printf("reference "); if (typeQualifiers & TYPEQUAL_UNSIGNED) printf("unsigned "); printf("%s", baseType->GetString().c_str()); @@ -161,8 +160,10 @@ Declarator::InitFromDeclSpecs(DeclSpecs *ds) { Symbol * -Declarator::GetSymbol() { - Declarator *d = this; +Declarator::GetSymbol() const { + // The symbol lives at the last child in the chain, so walk down there + // and return the one there. + const Declarator *d = this; while (d->child != NULL) d = d->child; return d->sym; @@ -171,7 +172,12 @@ Declarator::GetSymbol() { void Declarator::Print() const { - printf("%s", sym->name.c_str()); + Symbol *sym = GetSymbol(); + if (sym != NULL) + printf("%s", sym->name.c_str()); + else + printf("(null symbol)"); + if (initExpr != NULL) { printf(" = ("); initExpr->Print(); @@ -181,28 +187,39 @@ Declarator::Print() const { } -void -Declarator::GetFunctionInfo(DeclSpecs *ds, Symbol **funSym, - std::vector *funArgs) { - // Get the symbol for the function from the symbol table. (It should - // already have been added to the symbol table by AddGlobal() by the - // time we get here.) +Symbol * +Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector *funArgs) { const FunctionType *type = dynamic_cast(GetType(ds)); if (type == NULL) - return; + return NULL; + Symbol *declSym = GetSymbol(); assert(declSym != NULL); - *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type); - if (*funSym != NULL) - // May be NULL due to error earlier in compilation - (*funSym)->pos = pos; - for (unsigned int i = 0; i < functionArgs.size(); ++i) { - Declaration *pdecl = functionArgs[i]; + // Get the symbol for the function from the symbol table. (It should + // already have been added to the symbol table by AddGlobal() by the + // time we get here.) + Symbol *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type); + if (funSym != NULL) + // May be NULL due to error earlier in compilation + funSym->pos = pos; + + // Walk down to the declarator for the function. (We have to get past + // the stuff that specifies the function's return type before we get to + // the function's declarator.) + Declarator *d = this; + while (d != NULL && d->kind != DK_FUNCTION) + d = d->child; + assert(d != NULL); + + for (unsigned int i = 0; i < d->functionParams.size(); ++i) { + Declaration *pdecl = d->functionParams[i]; assert(pdecl->declarators.size() == 1); funArgs->push_back(pdecl->declarators[0]->GetSymbol()); } + + return funSym; } @@ -211,7 +228,6 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { bool hasUniformQual = ((typeQualifiers & TYPEQUAL_UNIFORM) != 0); bool hasVaryingQual = ((typeQualifiers & TYPEQUAL_VARYING) != 0); bool isTask = ((typeQualifiers & TYPEQUAL_TASK) != 0); - bool isReference = ((typeQualifiers & TYPEQUAL_REFERENCE) != 0); bool isConst = ((typeQualifiers & TYPEQUAL_CONST) != 0); if (hasUniformQual && hasVaryingQual) { @@ -224,13 +240,36 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { const Type *type = base; switch (kind) { case DK_BASE: + // All of the type qualifiers should be in the DeclSpecs for the + // base declarator assert(typeQualifiers == 0); assert(child == NULL); return type; case DK_POINTER: type = new PointerType(type, hasUniformQual, isConst); - if (child) + if (child != NULL) + return child->GetType(type, ds); + else + return type; + break; + + case DK_REFERENCE: + if (hasUniformQual) + Error(pos, "\"uniform\" qualifier is illegal to apply to references."); + if (hasVaryingQual) + Error(pos, "\"varying\" qualifier is illegal to apply to references."); + if (isConst) + Error(pos, "\"const\" qualifier is to illegal apply to references."); + + // The parser should disallow this already, but double check. + if (dynamic_cast(type) != NULL) { + Error(pos, "References to references are illegal."); + return NULL; + } + + type = new ReferenceType(type); + if (child != NULL) return child->GetType(type, ds); else return type; @@ -250,10 +289,12 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { std::vector argDefaults; std::vector argPos; - // Loop over the function arguments and get names and types for - // each one in the args and argNames arrays - for (unsigned int i = 0; i < functionArgs.size(); ++i) { - Declaration *d = functionArgs[i]; + // Loop over the function arguments and store the names, types, + // default values (if any), and source file positions each one in + // the corresponding vector. + for (unsigned int i = 0; i < functionParams.size(); ++i) { + Declaration *d = functionParams[i]; + char buf[32]; Symbol *sym; if (d->declarators.size() == 0) { @@ -266,6 +307,8 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { else { sym = d->declarators[0]->GetSymbol(); if (sym == NULL) { + // Handle more complex anonymous declarations like + // float (float **). sprintf(buf, "__anon_parameter_%d", i); sym = new Symbol(buf, pos); sym->type = d->declarators[0]->GetType(d->declSpecs); @@ -274,9 +317,15 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { const ArrayType *at = dynamic_cast(sym->type); if (at != NULL) { - // Arrays are passed by reference, so convert array - // parameters to be references here. - sym->type = new ReferenceType(sym->type, sym->type->IsConstType()); + // As in C, arrays are passed to functions as pointers to + // their element type. We'll just immediately make this + // change now. (One shortcoming of losing the fact that + // the it was originally an array is that any warnings or + // errors later issued that print the function type will + // report this differently than it was originally declared + // in the function, but it's not clear that this is a + // significant problem.) + sym->type = PointerType::GetUniform(at->GetElementType()); // Make sure there are no unsized arrays (other than the // first dimension) in function parameter lists. @@ -296,6 +345,8 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { ConstExpr *init = NULL; if (d->declarators.size()) { + // Try to find an initializer expression; if there is one, + // it lives down to the base declarator. Declarator *decl = d->declarators[0]; while (decl->child != NULL) { assert(decl->initExpr == NULL); @@ -314,11 +365,6 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { argDefaults.push_back(init); } - if (isReference) { - Error(pos, "Function return types can't be reference types."); - return NULL; - } - const Type *returnType = type; if (returnType == NULL) { Error(pos, "No return type provided in function declaration."); @@ -328,6 +374,23 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const { bool isExported = ds && (ds->storageClass == SC_EXPORT); bool isExternC = ds && (ds->storageClass == SC_EXTERN_C); bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0); + + if (isExported && isTask) { + Error(pos, "Function can't have both \"task\" and \"export\" " + "qualifiers"); + return NULL; + } + if (isExternC && isTask) { + Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" " + "qualifiers"); + return NULL; + } + if (isExternC && isExported) { + Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" " + "qualifiers"); + return NULL; + } + Type *functionType = new FunctionType(returnType, args, pos, argNames, argDefaults, argPos, isTask, isExported, isExternC); @@ -367,12 +430,6 @@ const Type * Declarator::GetType(DeclSpecs *ds) const { const Type *baseType = ds->GetBaseType(pos); const Type *type = GetType(baseType, ds); - - if ((ds->typeQualifiers & TYPEQUAL_REFERENCE) != 0) { - bool hasConstQual = ((ds->typeQualifiers & TYPEQUAL_CONST) != 0); - type = new ReferenceType(type, hasConstQual); - } - return type; } @@ -392,7 +449,7 @@ Declaration::Declaration(DeclSpecs *ds, std::vector *dlist) { Declaration::Declaration(DeclSpecs *ds, Declarator *d) { declSpecs = ds; - if (d) { + if (d != NULL) { d->InitFromDeclSpecs(ds); declarators.push_back(d); } @@ -409,6 +466,8 @@ Declaration::GetVariableDeclarations() const { continue; Declarator *decl = declarators[i]; if (decl == NULL || decl->kind == DK_FUNCTION) + // Ignore earlier errors or external function declarations + // inside other functions. continue; Symbol *sym = decl->GetSymbol(); @@ -452,14 +511,18 @@ GetStructTypesNamesPositions(const std::vector &sd, Declarator *d = (*sd[i]->declarators)[j]; d->InitFromDeclSpecs(&ds); - // if it's an unsized array, make it a reference to an unsized - // array, so the caller can pass a pointer... Symbol *sym = d->GetSymbol(); - const ArrayType *at = dynamic_cast(sym->type); - if (at && at->GetElementCount() == 0) - sym->type = new ReferenceType(sym->type, type->IsConstType()); - elementTypes->push_back(sym->type); + const ArrayType *arrayType = + dynamic_cast(sym->type); + if (arrayType != NULL && arrayType->GetElementCount() == 0) { + Error(d->pos, "Unsized arrays aren't allowed in struct " + "definitions."); + elementTypes->push_back(NULL); + } + else + elementTypes->push_back(sym->type); + elementNames->push_back(sym->name); elementPositions->push_back(sym->pos); } diff --git a/decl.h b/decl.h index 019b251e..de966f41 100644 --- a/decl.h +++ b/decl.h @@ -79,9 +79,8 @@ enum StorageClass { #define TYPEQUAL_UNIFORM (1<<1) #define TYPEQUAL_VARYING (1<<2) #define TYPEQUAL_TASK (1<<3) -#define TYPEQUAL_REFERENCE (1<<4) -#define TYPEQUAL_UNSIGNED (1<<5) -#define TYPEQUAL_INLINE (1<<6) +#define TYPEQUAL_UNSIGNED (1<<4) +#define TYPEQUAL_INLINE (1<<5) /** @brief Representation of the declaration specifiers in a declaration. @@ -100,7 +99,7 @@ public: int typeQualifiers; /** The basic type provided in the declaration; this should be an - AtomicType, a StructType, or a VectorType; other types (like + AtomicType, EnumType, StructType, or VectorType; other types (like ArrayTypes) will end up being created if a particular declaration has an array size, etc. */ @@ -123,6 +122,7 @@ public: enum DeclaratorKind { DK_BASE, DK_POINTER, + DK_REFERENCE, DK_ARRAY, DK_FUNCTION }; @@ -142,33 +142,51 @@ public: void InitFromDeclSpecs(DeclSpecs *ds); /** Get the actual type of the combination of Declarator and the given - DeclSpecs */ + DeclSpecs. If an explicit base type is provided, the declarator is + applied to that type; otherwise the base type from the DeclSpecs is + used. */ const Type *GetType(DeclSpecs *ds) const; const Type *GetType(const Type *base, DeclSpecs *ds) const; - void GetFunctionInfo(DeclSpecs *ds, Symbol **sym, - std::vector *args); + /** Returns the symbol corresponding to the function declared by this + declarator and symbols for its arguments in *args. */ + Symbol *GetFunctionInfo(DeclSpecs *ds, std::vector *args); - Symbol *GetSymbol(); + /** Returns the symbol associated with the declarator. */ + Symbol *GetSymbol() const; void Print() const; + /** Position of the declarator in the source program. */ const SourcePos pos; + /** The kind of this declarator; complex declarations are assembled as + a hierarchy of Declarators. (For example, a pointer to an int + would have a root declarator with kind DK_POINTER and with the + Declarator::child member pointing to a DK_BASE declarator for the + int). */ const DeclaratorKind kind; + /** Child pointer if needed; this can only be non-NULL if the + declarator's kind isn't DK_BASE. */ Declarator *child; + /** Type qualifiers provided with the declarator. */ int typeQualifiers; + /** For array declarators, this gives the declared size of the array. + Unsized arrays have arraySize == 0. */ int arraySize; + /** Symbol associated with the declarator. */ Symbol *sym; /** Initialization expression for the variable. May be NULL. */ Expr *initExpr; - std::vector functionArgs; + /** For function declarations, this holds the Declaration *s for the + funciton's parameters. */ + std::vector functionParams; }; @@ -182,6 +200,11 @@ public: void Print() const; + /** This method walks through all of the Declarators in a declaration + and returns a fully-initialized Symbol and (possibly) and + initialization expression for each one. (This allows the rest of + the system to not have to worry about the mess of the general + Declarator representation.) */ std::vector GetVariableDeclarations() const; DeclSpecs *declSpecs; diff --git a/examples/aobench/ao.ispc b/examples/aobench/ao.ispc index e48a544e..3deaa340 100644 --- a/examples/aobench/ao.ispc +++ b/examples/aobench/ao.ispc @@ -75,7 +75,7 @@ static inline vec vcross(vec v0, vec v1) { return ret; } -static inline void vnormalize(reference vec v) { +static inline void vnormalize(vec &v) { float len2 = dot(v, v); float invlen = rsqrt(len2); v *= invlen; @@ -83,8 +83,7 @@ static inline void vnormalize(reference vec v) { static inline void -ray_plane_intersect(reference Isect isect, reference Ray ray, - reference Plane plane) { +ray_plane_intersect(Isect &isect, Ray &ray, Plane &plane) { float d = -dot(plane.p, plane.n); float v = dot(ray.dir, plane.n); @@ -104,8 +103,7 @@ ray_plane_intersect(reference Isect isect, reference Ray ray, static inline void -ray_sphere_intersect(reference Isect isect, reference Ray ray, - reference Sphere sphere) { +ray_sphere_intersect(Isect &isect, Ray &ray, Sphere &sphere) { vec rs = ray.org - sphere.center; float B = dot(rs, ray.dir); @@ -127,7 +125,7 @@ ray_sphere_intersect(reference Isect isect, reference Ray ray, static inline void -orthoBasis(reference vec basis[3], vec n) { +orthoBasis(vec basis[3], vec n) { basis[2] = n; basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0; @@ -150,8 +148,8 @@ orthoBasis(reference vec basis[3], vec n) { static inline float -ambient_occlusion(reference Isect isect, reference Plane plane, - reference Sphere spheres[3], reference RNGState rngstate) { +ambient_occlusion(Isect &isect, Plane &plane, Sphere spheres[3], + RNGState &rngstate) { float eps = 0.0001f; vec p, n; vec basis[3]; @@ -168,8 +166,8 @@ ambient_occlusion(reference Isect isect, reference Plane plane, Ray ray; Isect occIsect; - float theta = sqrt(frandom(rngstate)); - float phi = 2.0f * M_PI * frandom(rngstate); + float theta = sqrt(frandom(&rngstate)); + float phi = 2.0f * M_PI * frandom(&rngstate); float x = cos(phi) * theta; float y = sin(phi) * theta; float z = sqrt(1.0 - theta * theta); @@ -205,7 +203,7 @@ ambient_occlusion(reference Isect isect, reference Plane plane, */ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h, uniform int nsubsamples, - reference uniform float image[]) { + uniform float image[]) { static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } }; static Sphere spheres[3] = { { { -2.0f, 0.0f, -3.5f }, 0.5f }, @@ -213,7 +211,7 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w, { { 1.0f, 0.0f, -2.2f }, 0.5f } }; RNGState rngstate; - seed_rng(rngstate, y0); + seed_rng(&rngstate, y0); // Compute the mapping between the 'programCount'-wide program // instances running in parallel and samples in the image. diff --git a/examples/aobench_instrumented/ao.ispc b/examples/aobench_instrumented/ao.ispc index 192e0666..3deaa340 100644 --- a/examples/aobench_instrumented/ao.ispc +++ b/examples/aobench_instrumented/ao.ispc @@ -75,7 +75,7 @@ static inline vec vcross(vec v0, vec v1) { return ret; } -static inline void vnormalize(reference vec v) { +static inline void vnormalize(vec &v) { float len2 = dot(v, v); float invlen = rsqrt(len2); v *= invlen; @@ -83,8 +83,7 @@ static inline void vnormalize(reference vec v) { static inline void -ray_plane_intersect(reference Isect isect, reference Ray ray, - reference Plane plane) { +ray_plane_intersect(Isect &isect, Ray &ray, Plane &plane) { float d = -dot(plane.p, plane.n); float v = dot(ray.dir, plane.n); @@ -104,8 +103,7 @@ ray_plane_intersect(reference Isect isect, reference Ray ray, static inline void -ray_sphere_intersect(reference Isect isect, reference Ray ray, - reference Sphere sphere) { +ray_sphere_intersect(Isect &isect, Ray &ray, Sphere &sphere) { vec rs = ray.org - sphere.center; float B = dot(rs, ray.dir); @@ -127,7 +125,7 @@ ray_sphere_intersect(reference Isect isect, reference Ray ray, static inline void -orthoBasis(reference vec basis[3], vec n) { +orthoBasis(vec basis[3], vec n) { basis[2] = n; basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0; @@ -150,8 +148,8 @@ orthoBasis(reference vec basis[3], vec n) { static inline float -ambient_occlusion(reference Isect isect, reference Plane plane, - reference Sphere spheres[3], reference RNGState rngstate) { +ambient_occlusion(Isect &isect, Plane &plane, Sphere spheres[3], + RNGState &rngstate) { float eps = 0.0001f; vec p, n; vec basis[3]; @@ -168,8 +166,8 @@ ambient_occlusion(reference Isect isect, reference Plane plane, Ray ray; Isect occIsect; - float theta = sqrt(frandom(rngstate)); - float phi = 2.0f * M_PI * frandom(rngstate); + float theta = sqrt(frandom(&rngstate)); + float phi = 2.0f * M_PI * frandom(&rngstate); float x = cos(phi) * theta; float y = sin(phi) * theta; float z = sqrt(1.0 - theta * theta); @@ -203,8 +201,9 @@ ambient_occlusion(reference Isect isect, reference Plane plane, /* Compute the image for the scanlines from [y0,y1), for an overall image of width w and height h. */ -void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h, - uniform int nsubsamples, reference uniform float image[]) { +static void ao_scanlines(uniform int y0, uniform int y1, uniform int w, + uniform int h, uniform int nsubsamples, + uniform float image[]) { static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } }; static Sphere spheres[3] = { { { -2.0f, 0.0f, -3.5f }, 0.5f }, @@ -212,7 +211,7 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h, { { 1.0f, 0.0f, -2.2f }, 0.5f } }; RNGState rngstate; - seed_rng(rngstate, y0); + seed_rng(&rngstate, y0); // Compute the mapping between the 'programCount'-wide program // instances running in parallel and samples in the image. @@ -231,6 +230,9 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h, // direction we do per iteration and ny the number in y. uniform int nx = 1, ny = 1; + // FIXME: We actually need ny to be 1 regardless of the decomposition, + // since the task decomposition is one scanline high. + if (programCount == 8) { // Do two pixels at once in the x direction nx = 2; @@ -239,19 +241,21 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h, ++du; } else if (programCount == 16) { - // Two at once in both x and y - nx = ny = 2; - if ((programIndex >= 4 && programIndex < 8) || programIndex >= 12) + nx = 4; + ny = 1; + if (programIndex >= 4 && programIndex < 8) ++du; - if (programIndex >= 8) - ++dv; + if (programIndex >= 8 && programIndex < 12) + du += 2; + if (programIndex >= 12) + du += 3; } // Now loop over all of the pixels, stepping in x and y as calculated // above. (Assumes that ny divides y and nx divides x...) for (uniform int y = y0; y < y1; y += ny) { for (uniform int x = 0; x < w; x += nx) { - // Figur out x,y pixel in NDC + // Figure out x,y pixel in NDC float px = (x + du - (w / 2.0f)) / (w / 2.0f); float py = -(y + dv - (h / 2.0f)) / (h / 2.0f); float ret = 0.f; @@ -293,7 +297,7 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h, // offset to the first pixel in the image uniform int offset = 3 * (y * w + x); - for (uniform int p = 0; p < programCount; p += 4, ++offset) { + for (uniform int p = 0; p < programCount; p += 4, offset += 3) { // Get the four sample values for this pixel uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] + retArray[p+3]; @@ -315,3 +319,15 @@ export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples, uniform float image[]) { ao_scanlines(0, h, w, h, nsubsamples, image); } + + +static void task ao_task(uniform int width, uniform int height, + uniform int nsubsamples, uniform float image[]) { + ao_scanlines(taskIndex, taskIndex+1, width, height, nsubsamples, image); +} + + +export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples, + uniform float image[]) { + launch[h] < ao_task(w, h, nsubsamples, image) >; +} diff --git a/examples/deferred/kernels.ispc b/examples/deferred/kernels.ispc index 65fa1547..6ade1d82 100644 --- a/examples/deferred/kernels.ispc +++ b/examples/deferred/kernels.ispc @@ -35,22 +35,22 @@ struct InputDataArrays { - uniform float zBuffer[]; - uniform unsigned int16 normalEncoded_x[]; // half float - uniform unsigned int16 normalEncoded_y[]; // half float - uniform unsigned int16 specularAmount[]; // half float - uniform unsigned int16 specularPower[]; // half float - uniform unsigned int8 albedo_x[]; // unorm8 - uniform unsigned int8 albedo_y[]; // unorm8 - uniform unsigned int8 albedo_z[]; // unorm8 - uniform float lightPositionView_x[]; - uniform float lightPositionView_y[]; - uniform float lightPositionView_z[]; - uniform float lightAttenuationBegin[]; - uniform float lightColor_x[]; - uniform float lightColor_y[]; - uniform float lightColor_z[]; - uniform float lightAttenuationEnd[]; + uniform float * uniform zBuffer; + uniform unsigned int16 * uniform normalEncoded_x; // half float + uniform unsigned int16 * uniform normalEncoded_y; // half float + uniform unsigned int16 * uniform specularAmount; // half float + uniform unsigned int16 * uniform specularPower; // half float + uniform unsigned int8 * uniform albedo_x; // unorm8 + uniform unsigned int8 * uniform albedo_y; // unorm8 + uniform unsigned int8 * uniform albedo_z; // unorm8 + uniform float * uniform lightPositionView_x; + uniform float * uniform lightPositionView_y; + uniform float * uniform lightPositionView_z; + uniform float * uniform lightAttenuationBegin; + uniform float * uniform lightColor_x; + uniform float * uniform lightColor_y; + uniform float * uniform lightColor_z; + uniform float * uniform lightAttenuationEnd; }; struct InputHeader @@ -77,8 +77,7 @@ dot3(float x, float y, float z, float a, float b, float c) { static inline void -normalize3(float x, float y, float z, reference float ox, - reference float oy, reference float oz) { +normalize3(float x, float y, float z, float &ox, float &oy, float &oz) { float n = rsqrt(x*x + y*y + z*z); ox = x * n; oy = y * n; @@ -110,8 +109,8 @@ ComputeZBounds( uniform float cameraProj_33, uniform float cameraProj_43, uniform float cameraNear, uniform float cameraFar, // Output - reference uniform float minZ, - reference uniform float maxZ + uniform float &minZ, + uniform float &maxZ ) { // Find Z bounds @@ -156,7 +155,7 @@ IntersectLightsWithTileMinMax( uniform float light_positionView_z_array[], uniform float light_attenuationEnd_array[], // Output - reference uniform int32 tileLightIndices[] + uniform int32 tileLightIndices[] ) { uniform float gBufferScale_x = 0.5f * (float)gBufferWidth; @@ -268,7 +267,7 @@ IntersectLightsWithTile( uniform float light_positionView_z_array[], uniform float light_attenuationEnd_array[], // Output - reference uniform int32 tileLightIndices[] + uniform int32 tileLightIndices[] ) { uniform float minZ, maxZ; @@ -293,19 +292,19 @@ ShadeTile( uniform int32 tileStartX, uniform int32 tileEndX, uniform int32 tileStartY, uniform int32 tileEndY, uniform int32 gBufferWidth, uniform int32 gBufferHeight, - reference uniform InputDataArrays inputData, + uniform InputDataArrays &inputData, // Camera data uniform float cameraProj_11, uniform float cameraProj_22, uniform float cameraProj_33, uniform float cameraProj_43, // Light list - reference uniform int32 tileLightIndices[], + uniform int32 tileLightIndices[], uniform int32 tileNumLights, // UI uniform bool visualizeLightCount, // Output - reference uniform unsigned int8 framebuffer_r[], - reference uniform unsigned int8 framebuffer_g[], - reference uniform unsigned int8 framebuffer_b[] + uniform unsigned int8 framebuffer_r[], + uniform unsigned int8 framebuffer_g[], + uniform unsigned int8 framebuffer_b[] ) { if (tileNumLights == 0 || visualizeLightCount) { @@ -478,13 +477,13 @@ ShadeTile( task void RenderTile(uniform int num_groups_x, uniform int num_groups_y, - reference uniform InputHeader inputHeader, - reference uniform InputDataArrays inputData, + uniform InputHeader &inputHeader, + uniform InputDataArrays &inputData, uniform int visualizeLightCount, // Output - reference uniform unsigned int8 framebuffer_r[], - reference uniform unsigned int8 framebuffer_g[], - reference uniform unsigned int8 framebuffer_b[]) { + uniform unsigned int8 framebuffer_r[], + uniform unsigned int8 framebuffer_g[], + uniform unsigned int8 framebuffer_b[]) { uniform int32 group_y = taskIndex / num_groups_x; uniform int32 group_x = taskIndex % num_groups_x; uniform int32 tile_start_x = group_x * MIN_TILE_WIDTH; @@ -526,13 +525,13 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y, export void -RenderStatic(reference uniform InputHeader inputHeader, - reference uniform InputDataArrays inputData, +RenderStatic(uniform InputHeader &inputHeader, + uniform InputDataArrays &inputData, uniform int visualizeLightCount, // Output - reference uniform unsigned int8 framebuffer_r[], - reference uniform unsigned int8 framebuffer_g[], - reference uniform unsigned int8 framebuffer_b[]) { + uniform unsigned int8 framebuffer_r[], + uniform unsigned int8 framebuffer_g[], + uniform unsigned int8 framebuffer_b[]) { uniform int num_groups_x = (inputHeader.framebufferWidth + MIN_TILE_WIDTH - 1) / MIN_TILE_WIDTH; uniform int num_groups_y = (inputHeader.framebufferHeight + @@ -564,8 +563,8 @@ ComputeZBoundsRow( uniform float cameraProj_33, uniform float cameraProj_43, uniform float cameraNear, uniform float cameraFar, // Output - reference uniform float minZArray[], - reference uniform float maxZArray[] + uniform float minZArray[], + uniform float maxZArray[] ) { for (uniform int32 tileX = 0; tileX < numTilesX; ++tileX) { @@ -596,7 +595,7 @@ SplitTileMinMax( // Camera data uniform float cameraProj_11, uniform float cameraProj_22, // Light Data - reference uniform int32 lightIndices[], + uniform int32 lightIndices[], uniform int32 numLights, uniform float light_positionView_x_array[], uniform float light_positionView_y_array[], @@ -605,9 +604,9 @@ SplitTileMinMax( // Outputs // TODO: ISPC doesn't currently like multidimensionsal arrays so we'll do the // indexing math ourselves - reference uniform int32 subtileIndices[], + uniform int32 subtileIndices[], uniform int32 subtileIndicesPitch, - reference uniform int32 subtileNumLights[] + uniform int32 subtileNumLights[] ) { uniform float gBufferScale_x = 0.5f * (float)gBufferWidth; diff --git a/examples/mandelbrot/mandelbrot.ispc b/examples/mandelbrot/mandelbrot.ispc index ecbb4fc1..9243b52a 100644 --- a/examples/mandelbrot/mandelbrot.ispc +++ b/examples/mandelbrot/mandelbrot.ispc @@ -51,7 +51,7 @@ export void mandelbrot_ispc(uniform float x0, uniform float y0, uniform float x1, uniform float y1, uniform int width, uniform int height, uniform int maxIterations, - reference uniform int output[]) + uniform int output[]) { float dx = (x1 - x0) / width; float dy = (y1 - y0) / height; diff --git a/examples/mandelbrot_tasks/mandelbrot.ispc b/examples/mandelbrot_tasks/mandelbrot.ispc index e52725df..d4ffeff5 100644 --- a/examples/mandelbrot_tasks/mandelbrot.ispc +++ b/examples/mandelbrot_tasks/mandelbrot.ispc @@ -57,7 +57,7 @@ mandelbrot_scanlines(uniform int ybase, uniform int span, uniform float x0, uniform float dx, uniform float y0, uniform float dy, uniform int width, uniform int maxIterations, - reference uniform int output[]) { + uniform int output[]) { uniform int ystart = ybase + taskIndex * span; uniform int yend = ystart + span; @@ -77,7 +77,7 @@ task void mandelbrot_chunk(uniform float x0, uniform float dx, uniform float y0, uniform float dy, uniform int width, uniform int height, - uniform int maxIterations, reference uniform int output[]) { + uniform int maxIterations, uniform int output[]) { uniform int ystart = taskIndex * (height/taskCount); uniform int yend = (taskIndex+1) * (height/taskCount); uniform int span = 1; @@ -91,7 +91,7 @@ export void mandelbrot_ispc(uniform float x0, uniform float y0, uniform float x1, uniform float y1, uniform int width, uniform int height, - uniform int maxIterations, reference uniform int output[]) { + uniform int maxIterations, uniform int output[]) { uniform float dx = (x1 - x0) / width; uniform float dy = (y1 - y0) / height; diff --git a/examples/rt/rt.ispc b/examples/rt/rt.ispc index 88a4a7f6..47abee80 100644 --- a/examples/rt/rt.ispc +++ b/examples/rt/rt.ispc @@ -73,7 +73,7 @@ static inline float Dot(const float3 a, const float3 b) { static void generateRay(uniform const float raster2camera[4][4], uniform const float camera2world[4][4], - float x, float y, reference Ray ray) { + float x, float y, Ray &ray) { ray.mint = 0.f; ray.maxt = 1e30f; @@ -105,7 +105,7 @@ static void generateRay(uniform const float raster2camera[4][4], static inline bool BBoxIntersect(const uniform float bounds[2][3], - const reference Ray ray) { + const Ray &ray) { uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] }; uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] }; float t0 = ray.mint, t1 = ray.maxt; @@ -143,7 +143,7 @@ static inline bool BBoxIntersect(const uniform float bounds[2][3], -static inline bool TriIntersect(const reference Triangle tri, reference Ray ray) { +static inline bool TriIntersect(const Triangle &tri, Ray &ray) { uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] }; uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] }; uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] }; @@ -184,7 +184,7 @@ static inline bool TriIntersect(const reference Triangle tri, reference Ray ray) bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[], - reference Ray r) { + Ray &r) { Ray ray = r; bool hit = false; // Follow ray through BVH nodes to find primitive intersections diff --git a/examples/volume_rendering/Makefile b/examples/volume_rendering/Makefile index fa8ff753..0f3f83b2 100644 --- a/examples/volume_rendering/Makefile +++ b/examples/volume_rendering/Makefile @@ -8,7 +8,7 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o))) CXX=g++ CXXFLAGS=-Iobjs/ -O3 -Wall -m64 ISPC=ispc -ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64 +ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64 --opt=32-bit-addressing OBJS=objs/volume.o objs/volume_serial.o $(TASK_OBJ) objs/volume_ispc.o \ objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o diff --git a/examples/volume_rendering/volume.ispc b/examples/volume_rendering/volume.ispc index 39a5a734..c4bc0c1a 100644 --- a/examples/volume_rendering/volume.ispc +++ b/examples/volume_rendering/volume.ispc @@ -41,7 +41,7 @@ struct Ray { static void generateRay(const uniform float raster2camera[4][4], const uniform float camera2world[4][4], - float x, float y, reference Ray ray) { + float x, float y, Ray &ray) { // transform raster coordinate (x, y, 0) to camera space float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3]; float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3]; @@ -70,7 +70,7 @@ Inside(float3 p, float3 pMin, float3 pMax) { static bool -IntersectP(Ray ray, float3 pMin, float3 pMax, reference float hit0, reference float hit1) { +IntersectP(Ray ray, float3 pMin, float3 pMax, float &hit0, float &hit1) { float t0 = -1e30, t1 = 1e30; float3 tNear = (pMin - ray.origin) / ray.dir; @@ -141,7 +141,7 @@ static inline float3 Offset(float3 p, float3 pMin, float3 pMax) { static inline float Density(float3 Pobj, float3 pMin, float3 pMax, uniform float density[], uniform int nVoxels[3], - reference uniform bool checkForSameVoxel) { + uniform bool &checkForSameVoxel) { if (!Inside(Pobj, pMin, pMax)) return 0; // Compute voxel coordinates and offsets for _Pobj_ @@ -155,8 +155,8 @@ static inline float Density(float3 Pobj, float3 pMin, float3 pMax, // Trilinearly interpolate density values to compute local density float d00, d10, d01, d11; uniform int uvx, uvy, uvz; - if (checkForSameVoxel && reduce_equal(vx, uvx) && reduce_equal(vy, uvy) && - reduce_equal(vz, uvz)) { + if (checkForSameVoxel && reduce_equal(vx, &uvx) && reduce_equal(vy, &uvy) && + reduce_equal(vz, &uvz)) { // If all of the program instances are inside the same voxel, then // we'll call the 'uniform' variant of the voxel density lookup // function, thus doing a single load for each value rather than a diff --git a/examples/volume_rendering/volume.vcxproj b/examples/volume_rendering/volume.vcxproj index 12298017..423c26fc 100644 --- a/examples/volume_rendering/volume.vcxproj +++ b/examples/volume_rendering/volume.vcxproj @@ -1,4 +1,4 @@ - + @@ -158,13 +158,13 @@ Document ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 --opt=32-bit-addressing $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 --opt=32-bit-addressing $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/expr.cpp b/expr.cpp index efa202f7..1ddabb44 100644 --- a/expr.cpp +++ b/expr.cpp @@ -68,6 +68,14 @@ Expr::GetLValue(FunctionEmitContext *ctx) const { } +const Type * +Expr::GetLValueType() const { + // This also only needs to be overrided by Exprs that implement the + // GetLValue() method. + return NULL; +} + + llvm::Constant * Expr::GetConstant(const Type *type) const { // The default is failure; just return NULL @@ -78,7 +86,7 @@ Expr::GetConstant(const Type *type) const { Symbol * Expr::GetBaseSymbol() const { // Not all expressions can do this, so provide a generally-useful - // default + // default implementation. return NULL; } @@ -155,14 +163,6 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, return false; } - if (toType->IsUniformType() && fromType->IsVaryingType()) { - if (!failureOk) - Error(pos, "Can't convert from varying type \"%s\" to uniform " - "type \"%s\" for %s.", fromType->GetString().c_str(), - toType->GetString().c_str(), errorMsgBase); - return false; - } - const ArrayType *toArrayType = dynamic_cast(toType); const ArrayType *fromArrayType = dynamic_cast(fromType); const VectorType *toVectorType = dynamic_cast(toType); @@ -173,34 +173,78 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, const EnumType *fromEnumType = dynamic_cast(fromType); const AtomicType *toAtomicType = dynamic_cast(toType); const AtomicType *fromAtomicType = dynamic_cast(fromType); - const PointerType *fromPointerType = dynamic_cast(fromType); const PointerType *toPointerType = dynamic_cast(toType); + + // Do this early, since for the case of a conversion like + // "float foo[10]" -> "float * uniform foo", we have what's seemingly + // a varying to uniform conversion (but not really) + if (fromArrayType != NULL && toPointerType != NULL) { + // array to pointer to array element type + const Type *eltType = fromArrayType->GetElementType(); + if (toPointerType->GetBaseType()->IsConstType()) + eltType = eltType->GetAsConstType(); + if (Type::Equal(toPointerType, + new PointerType(eltType, + toPointerType->IsUniformType(), + toPointerType->IsConstType()))) + goto typecast_ok; + else { + if (!failureOk) + Error(pos, "Can't convert from incompatible array type \"%s\" " + "to pointer type \"%s\" for %s.", + fromType->GetString().c_str(), + toType->GetString().c_str(), errorMsgBase); + return false; + } + } + + if (toType->IsUniformType() && fromType->IsVaryingType()) { + if (!failureOk) + Error(pos, "Can't convert from varying type \"%s\" to uniform " + "type \"%s\" for %s.", fromType->GetString().c_str(), + toType->GetString().c_str(), errorMsgBase); + return false; + } + if (fromPointerType != NULL) { if (dynamic_cast(toType) != NULL && toType->IsBoolType()) // Allow implicit conversion of pointers to bools goto typecast_ok; + if (toArrayType != NULL && + Type::Equal(fromType->GetBaseType(), toArrayType->GetElementType())) { + // Can convert pointers to arrays of the same type + goto typecast_ok; + } if (toPointerType == NULL) { if (!failureOk) Error(pos, "Can't convert between from pointer type " - "\"%s\" to non-pointer type \"%s\".", + "\"%s\" to non-pointer type \"%s\" for %s.", fromType->GetString().c_str(), - toType->GetString().c_str()); + toType->GetString().c_str(), errorMsgBase); return false; } - else if (Type::Equal(fromPointerType->GetAsUniformType()->GetAsConstType(), - PointerType::Void)) { - // void *s can be converted to any other pointer type + else if (PointerType::IsVoidPointer(toPointerType)) { + // any pointer type can be converted to a void * + goto typecast_ok; + } + else if (PointerType::IsVoidPointer(fromPointerType) && + expr != NULL && + dynamic_cast(*expr) != NULL) { + // and a NULL convert to any other pointer type goto typecast_ok; } else if (!Type::Equal(fromPointerType->GetBaseType(), + toPointerType->GetBaseType()) && + !Type::Equal(fromPointerType->GetBaseType()->GetAsConstType(), toPointerType->GetBaseType())) { if (!failureOk) Error(pos, "Can't convert between incompatible pointer types " - "\"%s\" and \"%s\".", fromPointerType->GetString().c_str(), - toPointerType->GetString().c_str()); + "\"%s\" and \"%s\" for %s.", + fromPointerType->GetString().c_str(), + toPointerType->GetString().c_str(), errorMsgBase); return false; } @@ -269,8 +313,8 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, return false; } else - return lDoTypeConv(new ReferenceType(fromType, toType->IsConstType()), - toType, NULL, failureOk, errorMsgBase, pos); + return lDoTypeConv(new ReferenceType(fromType), toType, NULL, + failureOk, errorMsgBase, pos); } else if (Type::Equal(toType, fromType->GetAsNonConstType())) // convert: const T -> T (as long as T isn't a reference) @@ -278,25 +322,18 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, fromType = fromType->GetReferenceTarget(); toType = toType->GetReferenceTarget(); - if (toArrayType && fromArrayType) { if (Type::Equal(toArrayType->GetElementType(), fromArrayType->GetElementType())) { // the case of different element counts should have returned - // out earlier, yes?? + // successfully earlier, yes?? assert(toArrayType->GetElementCount() != fromArrayType->GetElementCount()); - if (expr != NULL) - *expr = new TypeCastExpr(new ReferenceType(toType, false), - *expr, false, pos); - return true; + goto typecast_ok; } else if (Type::Equal(toArrayType->GetElementType(), fromArrayType->GetElementType()->GetAsConstType())) { // T[x] -> const T[x] - if (expr != NULL) - *expr = new TypeCastExpr(new ReferenceType(toType, false), - *expr, false, pos); - return true; + goto typecast_ok; } else { if (!failureOk) @@ -324,8 +361,8 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, fromStructType->GetAsUniformType()->GetAsConstType())) { if (!failureOk) Error(pos, "Can't convert between different struct types " - "\"%s\" -> \"%s\".", fromStructType->GetString().c_str(), - toStructType->GetString().c_str()); + "\"%s\" and \"%s\" for %s.", fromStructType->GetString().c_str(), + toStructType->GetString().c_str(), errorMsgBase); return false; } goto typecast_ok; @@ -333,12 +370,12 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, if (toEnumType != NULL && fromEnumType != NULL) { // No implicit conversions between different enum types - if (!Type::Equal(toEnumType->GetAsUniformType()->GetAsConstType(), - fromEnumType->GetAsUniformType()->GetAsConstType())) { + if (!Type::EqualIgnoringConst(toEnumType->GetAsUniformType(), + fromEnumType->GetAsUniformType())) { if (!failureOk) Error(pos, "Can't convert between different enum types " - "\"%s\" -> \"%s\".", fromEnumType->GetString().c_str(), - toEnumType->GetString().c_str()); + "\"%s\" and \"%s\" for %s", fromEnumType->GetString().c_str(), + toEnumType->GetString().c_str(), errorMsgBase); return false; } goto typecast_ok; @@ -382,8 +419,10 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, bool -CanConvertTypes(const Type *fromType, const Type *toType) { - return lDoTypeConv(fromType, toType, NULL, true, NULL, SourcePos()); +CanConvertTypes(const Type *fromType, const Type *toType, + const char *errorMsgBase, SourcePos pos) { + return lDoTypeConv(fromType, toType, NULL, errorMsgBase == NULL, + errorMsgBase, pos); } @@ -431,10 +470,12 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) { const AtomicType *atomicType = dynamic_cast(type); const EnumType *enumType = dynamic_cast(type); const VectorType *vectorType = dynamic_cast(type); + const PointerType *pointerType = dynamic_cast(type); // This function is only called with, and only works for atomic, enum, // and vector types. - assert(atomicType != NULL || enumType != NULL || vectorType != NULL); + assert(atomicType != NULL || enumType != NULL || vectorType != NULL || + pointerType != NULL); if (atomicType != NULL || enumType != NULL) { // If it's an atomic or enuemrator type, then figure out which of @@ -500,42 +541,56 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) { return NULL; } } - - // For vector types, first get the LLVM constant for the basetype with - // a recursive call to lLLVMConstantValue(). - const Type *baseType = vectorType->GetBaseType(); - llvm::Constant *constElement = lLLVMConstantValue(baseType, ctx, value); - LLVM_TYPE_CONST llvm::Type *llvmVectorType = vectorType->LLVMType(ctx); - - // Now create a constant version of the corresponding LLVM type that we - // use to represent the VectorType. - // FIXME: this is a little ugly in that the fact that ispc represents - // uniform VectorTypes as LLVM VectorTypes and varying VectorTypes as - // LLVM ArrayTypes leaks into the code here; it feels like this detail - // should be better encapsulated? - if (baseType->IsUniformType()) { - LLVM_TYPE_CONST llvm::VectorType *lvt = - llvm::dyn_cast(llvmVectorType); - assert(lvt != NULL); - std::vector vals; - for (unsigned int i = 0; i < lvt->getNumElements(); ++i) - vals.push_back(constElement); - return llvm::ConstantVector::get(vals); + else if (pointerType != NULL) { + assert(value == 0); + if (pointerType->IsUniformType()) + return llvm::Constant::getNullValue(LLVMTypes::VoidPointerType); + else + return llvm::Constant::getNullValue(LLVMTypes::VoidPointerVectorType); } else { - LLVM_TYPE_CONST llvm::ArrayType *lat = - llvm::dyn_cast(llvmVectorType); - assert(lat != NULL); - std::vector vals; - for (unsigned int i = 0; i < lat->getNumElements(); ++i) - vals.push_back(constElement); - return llvm::ConstantArray::get(lat, vals); + // For vector types, first get the LLVM constant for the basetype with + // a recursive call to lLLVMConstantValue(). + const Type *baseType = vectorType->GetBaseType(); + llvm::Constant *constElement = lLLVMConstantValue(baseType, ctx, value); + LLVM_TYPE_CONST llvm::Type *llvmVectorType = vectorType->LLVMType(ctx); + + // Now create a constant version of the corresponding LLVM type that we + // use to represent the VectorType. + // FIXME: this is a little ugly in that the fact that ispc represents + // uniform VectorTypes as LLVM VectorTypes and varying VectorTypes as + // LLVM ArrayTypes leaks into the code here; it feels like this detail + // should be better encapsulated? + if (baseType->IsUniformType()) { + LLVM_TYPE_CONST llvm::VectorType *lvt = + llvm::dyn_cast(llvmVectorType); + assert(lvt != NULL); + std::vector vals; + for (unsigned int i = 0; i < lvt->getNumElements(); ++i) + vals.push_back(constElement); + return llvm::ConstantVector::get(vals); + } + else { + LLVM_TYPE_CONST llvm::ArrayType *lat = + llvm::dyn_cast(llvmVectorType); + assert(lat != NULL); + std::vector vals; + for (unsigned int i = 0; i < lat->getNumElements(); ++i) + vals.push_back(constElement); + return llvm::ConstantArray::get(lat, vals); + } } } static llvm::Value * lMaskForSymbol(Symbol *baseSym, FunctionEmitContext *ctx) { + if (dynamic_cast(baseSym->type) != NULL) + // FIXME: we really only want to do this for dereferencing the + // pointer, not for things like pointer arithmetic, when we may be + // able to use the internal mask, depending on context... + return ctx->GetFullMask(); + llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() && baseSym->storageClass != SC_STATIC) ? ctx->GetInternalMask() : ctx->GetFullMask(); @@ -546,14 +601,15 @@ lMaskForSymbol(Symbol *baseSym, FunctionEmitContext *ctx) { /** Store the result of an assignment to the given location. */ static void -lStoreAssignResult(llvm::Value *rv, llvm::Value *lv, const Type *type, +lStoreAssignResult(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, FunctionEmitContext *ctx, Symbol *baseSym) { assert(baseSym != NULL && baseSym->varyingCFDepth <= ctx->VaryingCFDepth()); if (!g->opt.disableMaskedStoreToStore && baseSym->varyingCFDepth == ctx->VaryingCFDepth() && baseSym->storageClass != SC_STATIC && - dynamic_cast(baseSym->type) == NULL) { + dynamic_cast(baseSym->type) == NULL && + dynamic_cast(baseSym->type) == NULL) { // If the variable is declared at the same varying control flow // depth as where it's being assigned, then we don't need to do any // masking but can just do the assignment as if all the lanes were @@ -562,10 +618,10 @@ lStoreAssignResult(llvm::Value *rv, llvm::Value *lv, const Type *type, // never be accessed, since those lanes aren't executing, and won't // be executing at this scope or any other one before the variable // goes out of scope. - ctx->StoreInst(rv, lv, LLVMMaskAllOn, type); + ctx->StoreInst(value, ptr, LLVMMaskAllOn, ptrType); } else { - ctx->StoreInst(rv, lv, lMaskForSymbol(baseSym, ctx), type); + ctx->StoreInst(value, ptr, lMaskForSymbol(baseSym, ctx), ptrType); } } @@ -577,10 +633,14 @@ static llvm::Value * lEmitPrePostIncDec(UnaryExpr::Op op, Expr *expr, SourcePos pos, FunctionEmitContext *ctx) { const Type *type = expr->GetType(); + if (type == NULL) + return NULL; // Get both the lvalue and the rvalue of the given expression llvm::Value *lvalue = NULL, *rvalue = NULL; + const Type *lvalueType = NULL; if (dynamic_cast(type) != NULL) { + lvalueType = type; type = type->GetReferenceTarget(); lvalue = expr->GetValue(ctx); @@ -589,14 +649,17 @@ lEmitPrePostIncDec(UnaryExpr::Op op, Expr *expr, SourcePos pos, } else { lvalue = expr->GetLValue(ctx); + lvalueType = expr->GetLValueType(); rvalue = expr->GetValue(ctx); } if (lvalue == NULL) { // If we can't get a lvalue, then we have an error here - Error(pos, "Can't %s-%s non-lvalues.", - (op == UnaryExpr::PreInc || op == UnaryExpr::PreDec) ? "pre" : "post", - (op == UnaryExpr::PreInc || op == UnaryExpr::PostInc) ? "increment" : "decrement"); + const char *prepost = (op == UnaryExpr::PreInc || + op == UnaryExpr::PreDec) ? "pre" : "post"; + const char *incdec = (op == UnaryExpr::PreInc || + op == UnaryExpr::PostInc) ? "increment" : "decrement"; + Error(pos, "Can't %s-%s non-lvalues.", prepost, incdec); return NULL; } @@ -605,25 +668,35 @@ lEmitPrePostIncDec(UnaryExpr::Op op, Expr *expr, SourcePos pos, ctx->SetDebugPos(pos); llvm::Value *binop = NULL; int delta = (op == UnaryExpr::PreInc || op == UnaryExpr::PostInc) ? 1 : -1; - llvm::Constant *dval = lLLVMConstantValue(type, g->ctx, delta); - if (!type->IsFloatType()) - binop = ctx->BinaryOperator(llvm::Instruction::Add, rvalue, - dval, "val_inc_or_dec"); - else - binop = ctx->BinaryOperator(llvm::Instruction::FAdd, rvalue, - dval, "val_inc_or_dec"); + + if (dynamic_cast(type) != NULL) { + const Type *incType = type->IsUniformType() ? AtomicType::UniformInt32 : + AtomicType::VaryingInt32; + llvm::Constant *dval = lLLVMConstantValue(incType, g->ctx, delta); + binop = ctx->GetElementPtrInst(rvalue, dval, type, "ptr_inc_or_dec"); + } + else { + llvm::Constant *dval = lLLVMConstantValue(type, g->ctx, delta); + if (type->IsFloatType()) + binop = ctx->BinaryOperator(llvm::Instruction::FAdd, rvalue, + dval, "val_inc_or_dec"); + else + binop = ctx->BinaryOperator(llvm::Instruction::Add, rvalue, + dval, "val_inc_or_dec"); + } #if 0 if (type->IsUniformType()) { if (ctx->VaryingCFDepth() > 0) Warning(expr->pos, - "Modifying \"uniform\" value under \"varying\" control flow. Beware."); + "Modifying \"uniform\" value under \"varying\" control " + "flow."); } #endif // And store the result out to the lvalue Symbol *baseSym = expr->GetBaseSymbol(); - lStoreAssignResult(binop, lvalue, type, ctx, baseSym); + lStoreAssignResult(binop, lvalue, lvalueType, ctx, baseSym); // And then if it's a pre increment/decrement, return the final // computed result; otherwise return the previously-grabbed expression @@ -646,10 +719,12 @@ lEmitNegate(Expr *arg, SourcePos pos, FunctionEmitContext *ctx) { llvm::Value *zero = lLLVMConstantValue(type, g->ctx, 0.); ctx->SetDebugPos(pos); if (type->IsFloatType()) - return ctx->BinaryOperator(llvm::Instruction::FSub, zero, argVal, "fnegate"); + return ctx->BinaryOperator(llvm::Instruction::FSub, zero, argVal, + "fnegate"); else { assert(type->IsIntType()); - return ctx->BinaryOperator(llvm::Instruction::Sub, zero, argVal, "inegate"); + return ctx->BinaryOperator(llvm::Instruction::Sub, zero, argVal, + "inegate"); } } @@ -755,10 +830,10 @@ UnaryExpr::Optimize() { // An error will be issued elsewhere... return this; case Negate: { - // Since we currently only handle int32 and floats here, it's safe - // to stuff whatever we have into a double, do the negate as a - // double, and then return a ConstExpr with the same type as the - // original... + // Since we currently only handle int32, floats, and doubles here, + // it's safe to stuff whatever we have into a double, do the negate + // as a double, and then return a ConstExpr with the same type as + // the original... double v[ISPC_MAX_NVEC]; int count = constExpr->AsDouble(v); for (int i = 0; i < count; ++i) @@ -821,11 +896,27 @@ UnaryExpr::TypeCheck() { return NULL; if (op == PreInc || op == PreDec || op == PostInc || op == PostDec) { - if (!type->IsNumericType()) { - Error(expr->pos, "Can only pre/post increment float and integer " - "types, not \"%s\".", type->GetString().c_str()); + if (type->IsConstType()) { + Error(pos, "Can't assign to type \"%s\" on left-hand side of " + "expression.", type->GetString().c_str()); return NULL; } + + if (type->IsNumericType()) + return this; + + if (dynamic_cast(type) == NULL) { + Error(expr->pos, "Can only pre/post increment numeric and " + "pointer types, not \"%s\".", type->GetString().c_str()); + return NULL; + } + + if (PointerType::IsVoidPointer(type)) { + Error(expr->pos, "Illegal to pre/post increment \"%s\" type.", + type->GetString().c_str()); + return NULL; + } + return this; } @@ -950,42 +1041,110 @@ lEmitBinaryBitOp(BinaryExpr::Op op, llvm::Value *arg0Val, BinaryExpr::Op. */ static llvm::Value * -lEmitBinaryArith(BinaryExpr::Op op, llvm::Value *e0Val, llvm::Value *e1Val, - const Type *type, FunctionEmitContext *ctx, SourcePos pos) { - llvm::Instruction::BinaryOps inst; - bool isFloatOp = type->IsFloatType(); - bool isUnsignedOp = type->IsUnsignedType(); +lEmitBinaryArith(BinaryExpr::Op op, llvm::Value *value0, llvm::Value *value1, + const Type *type0, const Type *type1, + FunctionEmitContext *ctx, SourcePos pos) { + const PointerType *ptrType = dynamic_cast(type0); - switch (op) { - case BinaryExpr::Add: - inst = isFloatOp ? llvm::Instruction::FAdd : llvm::Instruction::Add; - break; - case BinaryExpr::Sub: - inst = isFloatOp ? llvm::Instruction::FSub : llvm::Instruction::Sub; - break; - case BinaryExpr::Mul: - inst = isFloatOp ? llvm::Instruction::FMul : llvm::Instruction::Mul; - break; - case BinaryExpr::Div: - if (type->IsVaryingType() && !isFloatOp) - PerformanceWarning(pos, "Division with varying integer types is " - "very inefficient."); - inst = isFloatOp ? llvm::Instruction::FDiv : - (isUnsignedOp ? llvm::Instruction::UDiv : llvm::Instruction::SDiv); - break; - case BinaryExpr::Mod: - if (type->IsVaryingType() && !isFloatOp) - PerformanceWarning(pos, "Modulus operator with varying types is " - "very inefficient."); - inst = isFloatOp ? llvm::Instruction::FRem : - (isUnsignedOp ? llvm::Instruction::URem : llvm::Instruction::SRem); - break; - default: - FATAL("Invalid op type passed to lEmitBinaryArith()"); - return NULL; + if (ptrType != NULL) { + switch (op) { + case BinaryExpr::Add: + // ptr + integer + return ctx->GetElementPtrInst(value0, value1, ptrType, "ptrmath"); + break; + case BinaryExpr::Sub: { + if (dynamic_cast(type1) != NULL) { + // ptr - ptr + if (ptrType->IsUniformType()) { + value0 = ctx->PtrToIntInst(value0); + value1 = ctx->PtrToIntInst(value1); + } + + // Compute the difference in bytes + llvm::Value *delta = + ctx->BinaryOperator(llvm::Instruction::Sub, value0, value1, + "ptr_diff"); + + // Now divide by the size of the type that the pointer + // points to in order to return the difference in elements. + LLVM_TYPE_CONST llvm::Type *llvmElementType = + ptrType->GetBaseType()->LLVMType(g->ctx); + llvm::Value *size = g->target.SizeOf(llvmElementType); + if (ptrType->IsVaryingType()) + size = ctx->SmearUniform(size); + + if (g->target.is32Bit == false && + g->opt.force32BitAddressing == true) { + // If we're doing 32-bit addressing math on a 64-bit + // target, then trunc the delta down to a 32-bit value. + // (Thus also matching what will be a 32-bit value + // returned from SizeOf above.) + if (ptrType->IsUniformType()) + delta = ctx->TruncInst(delta, LLVMTypes::Int32Type, + "trunc_ptr_delta"); + else + delta = ctx->TruncInst(delta, LLVMTypes::Int32VectorType, + "trunc_ptr_delta"); + } + + // And now do the actual division + return ctx->BinaryOperator(llvm::Instruction::SDiv, delta, size, + "element_diff"); + } + else { + // ptr - integer + llvm::Value *zero = lLLVMConstantValue(type1, g->ctx, 0.); + llvm::Value *negOffset = + ctx->BinaryOperator(llvm::Instruction::Sub, zero, value1, + "negate"); + // Do a GEP as ptr + -integer + return ctx->GetElementPtrInst(value0, negOffset, ptrType, + "ptrmath"); + } + } + default: + FATAL("Logic error in lEmitBinaryArith() for pointer type case"); + return NULL; + } } + else { + assert(Type::EqualIgnoringConst(type0, type1)); - return ctx->BinaryOperator(inst, e0Val, e1Val, "binop"); + llvm::Instruction::BinaryOps inst; + bool isFloatOp = type0->IsFloatType(); + bool isUnsignedOp = type0->IsUnsignedType(); + + switch (op) { + case BinaryExpr::Add: + inst = isFloatOp ? llvm::Instruction::FAdd : llvm::Instruction::Add; + break; + case BinaryExpr::Sub: + inst = isFloatOp ? llvm::Instruction::FSub : llvm::Instruction::Sub; + break; + case BinaryExpr::Mul: + inst = isFloatOp ? llvm::Instruction::FMul : llvm::Instruction::Mul; + break; + case BinaryExpr::Div: + if (type0->IsVaryingType() && !isFloatOp) + PerformanceWarning(pos, "Division with varying integer types is " + "very inefficient."); + inst = isFloatOp ? llvm::Instruction::FDiv : + (isUnsignedOp ? llvm::Instruction::UDiv : llvm::Instruction::SDiv); + break; + case BinaryExpr::Mod: + if (type0->IsVaryingType() && !isFloatOp) + PerformanceWarning(pos, "Modulus operator with varying types is " + "very inefficient."); + inst = isFloatOp ? llvm::Instruction::FRem : + (isUnsignedOp ? llvm::Instruction::URem : llvm::Instruction::SRem); + break; + default: + FATAL("Invalid op type passed to lEmitBinaryArith()"); + return NULL; + } + + return ctx->BinaryOperator(inst, value0, value1, "binop"); + } } @@ -1052,8 +1211,8 @@ BinaryExpr::GetValue(FunctionEmitContext *ctx) const { if (!arg0 || !arg1) return NULL; - llvm::Value *e0Val = arg0->GetValue(ctx); - llvm::Value *e1Val = arg1->GetValue(ctx); + llvm::Value *value0 = arg0->GetValue(ctx); + llvm::Value *value1 = arg1->GetValue(ctx); ctx->SetDebugPos(pos); switch (op) { @@ -1062,14 +1221,15 @@ BinaryExpr::GetValue(FunctionEmitContext *ctx) const { case Mul: case Div: case Mod: - return lEmitBinaryArith(op, e0Val, e1Val, arg0->GetType(), ctx, pos); + return lEmitBinaryArith(op, value0, value1, arg0->GetType(), arg1->GetType(), + ctx, pos); case Lt: case Gt: case Le: case Ge: case Equal: case NotEqual: - return lEmitBinaryCmp(op, e0Val, e1Val, arg0->GetType(), ctx, pos); + return lEmitBinaryCmp(op, value0, value1, arg0->GetType(), ctx, pos); case Shl: case Shr: case BitAnd: @@ -1079,17 +1239,17 @@ BinaryExpr::GetValue(FunctionEmitContext *ctx) const { dynamic_cast(arg1) == NULL) PerformanceWarning(pos, "Shift right is extremely inefficient for " "varying shift amounts."); - return lEmitBinaryBitOp(op, e0Val, e1Val, + return lEmitBinaryBitOp(op, value0, value1, arg0->GetType()->IsUnsignedType(), ctx); } case LogicalAnd: - return ctx->BinaryOperator(llvm::Instruction::And, e0Val, e1Val, + return ctx->BinaryOperator(llvm::Instruction::And, value0, value1, "logical_and"); case LogicalOr: - return ctx->BinaryOperator(llvm::Instruction::Or, e0Val, e1Val, + return ctx->BinaryOperator(llvm::Instruction::Or, value0, value1, "logical_or"); case Comma: - return e1Val; + return value1; default: FATAL("logic error"); return NULL; @@ -1106,28 +1266,42 @@ BinaryExpr::GetType() const { if (type0 == NULL || type1 == NULL) return NULL; -#if 0 - // FIXME: I think these are redundant given the checks in - // BinaryExpr::TypeCheck(). They should either be removed or updated - // to handle the cases where pointer == and != tests are ok. - if (!type0->IsBoolType() && !type0->IsNumericType()) { - Error(arg0->pos, "First operand to binary operator \"%s\" is of invalid " - "type \"%s\".", lOpString(op), type0->GetString().c_str()); - return NULL; - } - if (!type1->IsBoolType() && !type1->IsNumericType()) { - Error(arg1->pos, - "Second operand to binary operator \"%s\" is of invalid " - "type \"%s\".", lOpString(op), type1->GetString().c_str()); - return NULL; - } -#endif + // If this hits, it means that our TypeCheck() method hasn't been + // called before GetType() was called; adding two pointers is illegal + // and will fail type checking and (int + ptr) should be canonicalized + // into (ptr + int) by type checking. + if (op == Add) + assert(dynamic_cast(type1) == NULL); - const Type *promotedType = Type::MoreGeneralType(type0, type1, pos, - lOpString(op)); + if (op == Comma) + return arg1->GetType(); + + if (dynamic_cast(type0) != NULL) { + if (op == Add) + // ptr + int -> ptr + return type0; + else if (op == Sub) { + if (dynamic_cast(type1) != NULL) { + // ptr - ptr -> ~ptrdiff_t + const Type *diffType = (g->target.is32Bit || + g->opt.force32BitAddressing) ? + AtomicType::UniformInt32 : AtomicType::UniformInt64; + if (type0->IsVaryingType() || type1->IsVaryingType()) + diffType = diffType->GetAsVaryingType(); + return diffType; + } + else + // ptr - int -> ptr + return type0; + } + // otherwise fall through for these two... + assert(op == Equal || op == NotEqual); + } + + const Type *exprType = Type::MoreGeneralType(type0, type1, pos, lOpString(op)); // I don't think that MoreGeneralType should be able to fail after the - // type checks above. - assert(promotedType != NULL); + // checks done in BinaryExpr::TypeCheck(). + assert(exprType != NULL); switch (op) { case Add: @@ -1135,7 +1309,7 @@ BinaryExpr::GetType() const { case Mul: case Div: case Mod: - return promotedType; + return exprType; case Lt: case Gt: case Le: @@ -1144,16 +1318,16 @@ BinaryExpr::GetType() const { case NotEqual: case LogicalAnd: case LogicalOr: - return lMatchingBoolType(promotedType); + return lMatchingBoolType(exprType); case Shl: case Shr: return type1->IsVaryingType() ? type0->GetAsVaryingType() : type0; case BitAnd: case BitXor: case BitOr: - return promotedType; + return exprType; case Comma: - return arg1->GetType(); + // handled above, so fall through here just in case default: FATAL("logic error in BinaryExpr::GetType()"); return NULL; @@ -1341,8 +1515,7 @@ BinaryExpr::Optimize() { if (constArg0 == NULL || constArg1 == NULL) return this; - assert(Type::Equal(arg0->GetType()->GetAsNonConstType(), - arg1->GetType()->GetAsNonConstType())); + assert(Type::EqualIgnoringConst(arg0->GetType(), arg1->GetType())); const Type *type = arg0->GetType()->GetAsNonConstType(); if (type == AtomicType::UniformFloat || type == AtomicType::VaryingFloat) { float v0[ISPC_MAX_NVEC], v1[ISPC_MAX_NVEC]; @@ -1439,6 +1612,70 @@ BinaryExpr::TypeCheck() { assert(type1 != NULL); } + const PointerType *pt0 = dynamic_cast(type0); + const PointerType *pt1 = dynamic_cast(type1); + if (pt0 != NULL && pt1 != NULL && op == Sub) { + if (PointerType::IsVoidPointer(type0)) { + Error(pos, "Illegal to perform pointer arithmetic " + "on \"%s\" type.", type0->GetString().c_str()); + return NULL; + } + if (PointerType::IsVoidPointer(type1)) { + Error(pos, "Illegal to perform pointer arithmetic " + "on \"%s\" type.", type1->GetString().c_str()); + return NULL; + } + + const Type *t = Type::MoreGeneralType(type0, type1, pos, "-"); + if (t == NULL) + return NULL; + arg0 = TypeConvertExpr(arg0, t, "pointer subtraction"); + arg1 = TypeConvertExpr(arg1, t, "pointer subtraction"); + if (arg0 == NULL || arg1 == NULL) + return NULL; + + return this; + } + else if (((pt0 != NULL || pt1 != NULL) && op == Add) || + (pt0 != NULL && op == Sub)) { + // Handle ptr + int, int + ptr, ptr - int + if (pt0 != NULL && pt1 != NULL) { + Error(pos, "Illegal to add two pointer types \"%s\" and \"%s\".", + pt0->GetString().c_str(), pt1->GetString().c_str()); + return NULL; + } + else if (pt1 != NULL) { + // put in canonical order with the pointer as the first operand + // for GetValue() + std::swap(arg0, arg1); + std::swap(pt0, pt1); + } + + assert(pt0 != NULL); + + if (PointerType::IsVoidPointer(pt0)) { + Error(pos, "Illegal to perform pointer arithmetic " + "on \"%s\" type.", pt0->GetString().c_str()); + return NULL; + } + + const Type *offsetType = g->target.is32Bit ? + AtomicType::UniformInt32 : AtomicType::UniformInt64; + if (pt0->IsVaryingType()) + offsetType = offsetType->GetAsVaryingType(); + if (type1->IsVaryingType()) { + arg0 = TypeConvertExpr(arg0, type0->GetAsVaryingType(), + "pointer addition"); + assert(arg0 != NULL); + } + + arg1 = TypeConvertExpr(arg1, offsetType, lOpString(op)); + if (arg1 == NULL) + return NULL; + + return this; + } + switch (op) { case Shl: case Shr: @@ -1618,6 +1855,26 @@ BinaryExpr::Print() const { /////////////////////////////////////////////////////////////////////////// // AssignExpr +static const char * +lOpString(AssignExpr::Op op) { + switch (op) { + case AssignExpr::Assign: return "="; + case AssignExpr::MulAssign: return "*="; + case AssignExpr::DivAssign: return "/="; + case AssignExpr::ModAssign: return "%%="; + case AssignExpr::AddAssign: return "+="; + case AssignExpr::SubAssign: return "-="; + case AssignExpr::ShlAssign: return "<<="; + case AssignExpr::ShrAssign: return ">>="; + case AssignExpr::AndAssign: return "&="; + case AssignExpr::XorAssign: return "^="; + case AssignExpr::OrAssign: return "|="; + default: + FATAL("Missing op in lOpstring"); + return ""; + } +} + /** Emit code to do an "assignment + operation" operator, e.g. "+=". */ static llvm::Value * @@ -1630,13 +1887,16 @@ lEmitOpAssign(AssignExpr::Op op, Expr *arg0, Expr *arg1, const Type *type, Error(pos, "Can't assign to left-hand side of expression."); return NULL; } + const Type *lvalueType = arg0->GetLValueType(); + if (lvalueType == NULL) + return NULL; // Get the value on the right-hand side of the assignment+operation // operator and load the current value on the left-hand side. llvm::Value *rvalue = arg1->GetValue(ctx); ctx->SetDebugPos(pos); llvm::Value *mask = lMaskForSymbol(baseSym, ctx); - llvm::Value *oldLHS = ctx->LoadInst(lv, mask, type, "opassign_load"); + llvm::Value *oldLHS = ctx->LoadInst(lv, mask, lvalueType, "opassign_load"); // Map the operator to the corresponding BinaryExpr::Op operator BinaryExpr::Op basicop; @@ -1664,7 +1924,8 @@ lEmitOpAssign(AssignExpr::Op op, Expr *arg0, Expr *arg1, const Type *type, case AssignExpr::ModAssign: case AssignExpr::AddAssign: case AssignExpr::SubAssign: - newValue = lEmitBinaryArith(basicop, oldLHS, rvalue, type, ctx, pos); + newValue = lEmitBinaryArith(basicop, oldLHS, rvalue, type, + arg1->GetType(), ctx, pos); break; case AssignExpr::ShlAssign: case AssignExpr::ShrAssign: @@ -1680,7 +1941,7 @@ lEmitOpAssign(AssignExpr::Op op, Expr *arg0, Expr *arg1, const Type *type, } // And store the result back to the lvalue. - lStoreAssignResult(newValue, lv, type, ctx, baseSym); + lStoreAssignResult(newValue, lv, lvalueType, ctx, baseSym); return newValue; } @@ -1703,7 +1964,7 @@ AssignExpr::GetValue(FunctionEmitContext *ctx) const { #if 0 if (ctx->VaryingCFDepth() > 0 && type->IsUniformType()) - Warning(pos, "Modifying \"uniform\" value under \"varying\" control flow. Beware."); + Warning(pos, "Modifying \"uniform\" value under \"varying\" control flow."); #endif Symbol *baseSym = lvalue->GetBaseSymbol(); @@ -1717,15 +1978,21 @@ AssignExpr::GetValue(FunctionEmitContext *ctx) const { switch (op) { case Assign: { llvm::Value *lv = lvalue->GetLValue(ctx); - if (!lv) { - // FIXME: another, I believe, now unnecessary test? - Error(lvalue->pos, "Can't assign to left-hand side of expression."); + if (lv == NULL) { + assert(m->errorCount > 0); + return NULL; + } + const Type *lvalueType = lvalue->GetLValueType(); + if (lvalueType == NULL) { + assert(m->errorCount > 0); return NULL; } llvm::Value *rv = rvalue->GetValue(ctx); - if (rv == NULL) + if (rv == NULL) { + assert(m->errorCount > 0); return NULL; + } ctx->SetDebugPos(pos); @@ -1750,7 +2017,7 @@ AssignExpr::GetValue(FunctionEmitContext *ctx) const { } #endif - lStoreAssignResult(rv, lv, type, ctx, baseSym); + lStoreAssignResult(rv, lv, lvalueType, ctx, baseSym); return rv; } @@ -1795,6 +2062,37 @@ AssignExpr::GetType() const { } +/** Recursively checks a structure type to see if it (or any struct type + that it holds) has a const-qualified member. */ +static bool +lCheckForConstStructMember(SourcePos pos, const StructType *structType, + const StructType *initialType) { + for (int i = 0; i < structType->GetElementCount(); ++i) { + const Type *t = structType->GetElementType(i); + if (t->IsConstType()) { + if (structType == initialType) + Error(pos, "Illegal to assign to type \"%s\" due to element " + "\"%s\" with type \"%s\".", structType->GetString().c_str(), + structType->GetElementName(i).c_str(), + t->GetString().c_str()); + else + Error(pos, "Illegal to assign to type \"%s\" in type \"%s\" " + "due to element \"%s\" with type \"%s\".", + structType->GetString().c_str(), + initialType->GetString().c_str(), + structType->GetElementName(i).c_str(), + t->GetString().c_str()); + return true; + } + + const StructType *st = dynamic_cast(t); + if (st != NULL && lCheckForConstStructMember(pos, st, initialType)) + return true; + } + return false; +} + + Expr * AssignExpr::TypeCheck() { if (lvalue != NULL) @@ -1806,13 +2104,7 @@ AssignExpr::TypeCheck() { bool lvalueIsReference = dynamic_cast(lvalue->GetType()) != NULL; - bool rvalueIsReference = - dynamic_cast(rvalue->GetType()) != NULL; - - // hack to allow asigning array references e.g. in a struct... - if (lvalueIsReference && - !(rvalueIsReference && - dynamic_cast(rvalue->GetType()->GetReferenceTarget()))) + if (lvalueIsReference) lvalue = new DereferenceExpr(lvalue, lvalue->pos); FunctionSymbolExpr *fse; @@ -1828,23 +2120,58 @@ AssignExpr::TypeCheck() { lvalue->GetType()->GetString().c_str()); return NULL; } - if (!fse->ResolveOverloads(ftype->GetArgumentTypes())) { + + std::vector paramTypes; + for (int i = 0; i < ftype->GetNumParameters(); ++i) + paramTypes.push_back(ftype->GetParameterType(i)); + + if (!fse->ResolveOverloads(paramTypes)) { Error(pos, "Unable to find overloaded function for function " "pointer assignment."); return NULL; } } - rvalue = TypeConvertExpr(rvalue, lvalue->GetType(), "assignment"); + const Type *lhsType = lvalue->GetType(); + if (dynamic_cast(lhsType) != NULL) { + if (op == AddAssign || op == SubAssign) { + if (PointerType::IsVoidPointer(lhsType)) { + Error(pos, "Illegal to perform pointer arithmetic on \"%s\" " + "type.", lhsType->GetString().c_str()); + return NULL; + } + + const Type *deltaType = g->target.is32Bit ? AtomicType::UniformInt32 : + AtomicType::UniformInt64; + if (lhsType->IsVaryingType()) + deltaType = deltaType->GetAsVaryingType(); + rvalue = TypeConvertExpr(rvalue, deltaType, lOpString(op)); + } + else if (op == Assign) + rvalue = TypeConvertExpr(rvalue, lhsType, "assignment"); + else { + Error(pos, "Assignment operator \"%s\" is illegal with pointer types.", + lOpString(op)); + return NULL; + } + } + else + rvalue = TypeConvertExpr(rvalue, lhsType, lOpString(op)); + if (rvalue == NULL) return NULL; - if (lvalue->GetType()->IsConstType()) { - Error(pos, "Can't assign to type \"%s\" on left-hand size of " - "expression.", lvalue->GetType()->GetString().c_str()); + if (lhsType->IsConstType()) { + Error(pos, "Can't assign to type \"%s\" on left-hand side of " + "expression.", lhsType->GetString().c_str()); return NULL; } + // Make sure we're not assigning to a struct that has a constant member + const StructType *st = dynamic_cast(lhsType); + if (st != NULL && lCheckForConstStructMember(pos, st, st)) + return NULL; + return this; } @@ -1870,19 +2197,7 @@ AssignExpr::Print() const { printf("[%s] assign (", GetType()->GetString().c_str()); lvalue->Print(); - printf(" "); - if (op == Assign) printf("="); - if (op == MulAssign) printf("*="); - if (op == DivAssign) printf("/="); - if (op == ModAssign) printf("%%="); - if (op == AddAssign) printf("+="); - if (op == SubAssign) printf("-="); - if (op == ShlAssign) printf("<<="); - if (op == ShrAssign) printf(">>="); - if (op == AndAssign) printf("&="); - if (op == XorAssign) printf("^="); - if (op == OrAssign) printf("|="); - printf(" "); + printf(" %s ", lOpString(op)); rvalue->Print(); printf(")"); pos.Print(); @@ -1911,8 +2226,10 @@ lEmitVaryingSelect(FunctionEmitContext *ctx, llvm::Value *test, // Don't need to worry about masking here ctx->StoreInst(expr2, resultPtr); // Use masking to conditionally store the expr1 values - ctx->StoreInst(expr1, resultPtr, test, type); - return ctx->LoadInst(resultPtr, LLVMMaskAllOn, type, "selectexpr_final"); + assert(resultPtr->getType() == + PointerType::GetUniform(type)->LLVMType(g->ctx)); + ctx->StoreInst(expr1, resultPtr, test, PointerType::GetUniform(type)); + return ctx->LoadInst(resultPtr, "selectexpr_final"); } @@ -1996,15 +2313,15 @@ SelectExpr::GetValue(FunctionEmitContext *ctx) const { // Do an element-wise select llvm::Value *result = llvm::UndefValue::get(type->LLVMType(g->ctx)); for (int i = 0; i < vt->GetElementCount(); ++i) { - llvm::Value *ti = ctx->ExtractInst(testVal, i, ""); - llvm::Value *e1i = ctx->ExtractInst(expr1Val, i, ""); - llvm::Value *e2i = ctx->ExtractInst(expr2Val, i, ""); + llvm::Value *ti = ctx->ExtractInst(testVal, i); + llvm::Value *e1i = ctx->ExtractInst(expr1Val, i); + llvm::Value *e2i = ctx->ExtractInst(expr2Val, i); llvm::Value *sel = NULL; if (testType->IsUniformType()) sel = ctx->SelectInst(ti, e1i, e2i); else sel = lEmitVaryingSelect(ctx, ti, e1i, e2i, vt->GetElementType()); - result = ctx->InsertInst(result, sel, i, ""); + result = ctx->InsertInst(result, sel, i); } return result; } @@ -2181,41 +2498,27 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { // GetLValue call below needs a FunctionEmitContext, which is // problematic...) std::vector callargs = args->exprs; - const std::vector &argTypes = ft->GetArgumentTypes(); bool err = false; + + // Specifically, this can happen if there's an error earlier during + // overload resolution. + if ((int)callargs.size() > ft->GetNumParameters()) { + assert(m->errorCount > 0); + return NULL; + } + for (unsigned int i = 0; i < callargs.size(); ++i) { Expr *argExpr = callargs[i]; if (argExpr == NULL) continue; - // All arrays should already have been converted to reference types - assert(dynamic_cast(argTypes[i]) == NULL); - - if (dynamic_cast(argTypes[i])) { - if (!dynamic_cast(argExpr->GetType())) { - // The function wants a reference type but the argument - // being passed isn't already a reference. - if (argExpr->GetLValue(ctx) == NULL) { - // If it doesn't have an lvalue, then we can't make it - // a reference, so issue an error. - // FIXME: for const reference parameters, we could - // store the expr's value to alloca'ed memory and then - // pass a reference to that... - Error(pos, "Can't pass non-lvalue as \"reference\" parameter \"%s\" " - "to function.", ft->GetArgumentName(i).c_str()); - err = true; - } - else - argExpr = new ReferenceExpr(argExpr, argExpr->pos); - } - } + const Type *paramType = ft->GetParameterType(i); // Do whatever type conversion is needed - argExpr = TypeConvertExpr(argExpr, argTypes[i], + argExpr = TypeConvertExpr(argExpr, paramType, "function call argument"); - // The function overload resolution code should have ensured that - // we can successfully do any type conversions needed here. - assert(argExpr != NULL); + if (argExpr == NULL) + return NULL; callargs[i] = argExpr; } if (err) @@ -2223,61 +2526,33 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { // Fill in any default argument values needed. // FIXME: should we do this during type checking? - const std::vector &argumentDefaults = ft->GetArgumentDefaults(); - for (unsigned int i = callargs.size(); i < argumentDefaults.size(); ++i) { - Expr * d = TypeConvertExpr(argumentDefaults[i], argTypes[i], - "function call default argument"); + for (int i = callargs.size(); i < ft->GetNumParameters(); ++i) { + Expr *paramDefault = ft->GetParameterDefault(i); + const Type *paramType = ft->GetParameterType(i); + // FIXME: this type conv should happen when we create the function + // type! + Expr *d = TypeConvertExpr(paramDefault, paramType, + "function call default argument"); if (d == NULL) return NULL; callargs.push_back(d); } - // Now evaluate the values of all of the parameters being passed. We - // need to evaluate these first here, since their GetValue() calls may - // change the current basic block (e.g. if one of these is itself a - // function call expr...); we need to basic blocks to stay consistent - // below when we emit the code that does the actual funciton call. + // Now evaluate the values of all of the parameters being passed. std::vector argVals; - std::vector storedArgValPtrs, argValLValues; for (unsigned int i = 0; i < callargs.size(); ++i) { Expr *argExpr = callargs[i]; - if (!argExpr) + if (argExpr == NULL) // give up; we hit an error earlier return NULL; llvm::Value *argValue = argExpr->GetValue(ctx); - if (!argValue) + if (argValue == NULL) // something went wrong in evaluating the argument's // expression, so give up on this return NULL; - if (dynamic_cast(argTypes[i]) && - !llvm::isa(argValue->getType())) { - assert(llvm::isa(argValue->getType())); - // if the parameter is a reference and the lvalue needs a - // gather to pull it together, then do the gather here and - // store the result to local memory, so that we can pass the - // single pointer to the local memory that is needed for the - // reference. Below, we'll copy the result back to the varying - // lvalue pointer we have here. (== pass by value/result) - const ReferenceType *rt = - dynamic_cast(argExpr->GetType()); - assert(rt != NULL); - const Type *type = rt->GetReferenceTarget(); - - llvm::Value *ptr = ctx->AllocaInst(type->LLVMType(g->ctx), "arg"); - llvm::Value *mask = lMaskForSymbol(argExpr->GetBaseSymbol(), ctx); - llvm::Value *val = ctx->LoadInst(argValue, mask, type); - ctx->StoreInst(val, ptr); - storedArgValPtrs.push_back(ptr); - argValLValues.push_back(argValue); - argVals.push_back(ptr); - } - else { - argVals.push_back(argValue); - storedArgValPtrs.push_back(NULL); - argValLValues.push_back(NULL); - } + argVals.push_back(argValue); } @@ -2290,25 +2565,9 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { ctx->LaunchInst(callee, argVals, launchCount); } else - retVal = ctx->CallInst(callee, ft->GetReturnType(), argVals, + retVal = ctx->CallInst(callee, ft, argVals, isVoidFunc ? "" : "calltmp"); - // For anything we had to do as pass by value/result, copy the - // corresponding reference values back out - for (unsigned int i = 0; i < storedArgValPtrs.size(); ++i) { - llvm::Value *ptr = storedArgValPtrs[i]; - if (ptr != NULL) { - const ReferenceType *rt = - dynamic_cast(callargs[i]->GetType()); - assert(rt != NULL); - llvm::Value *load = ctx->LoadInst(ptr, NULL, rt->GetReferenceTarget(), - "load_ref"); - Symbol *baseSym = callargs[i]->GetBaseSymbol(); - lStoreAssignResult(load, argValLValues[i], rt->GetReferenceTarget(), - ctx, baseSym); - } - } - if (isVoidFunc) return NULL; else @@ -2583,39 +2842,14 @@ ExprList::Print() const { IndexExpr::IndexExpr(Expr *a, Expr *i, SourcePos p) : Expr(p) { - arrayOrVector = a; + baseExpr = a; index = i; } -// FIXME: This is an ugly hack--if we're indexing into a uniform ispc -// VectorType, then this bitcasts the corresponding llvm::VectorType value -// to be a pointer to the vector's element type, so that a GEP to index -// from the pointer indices elements of the llvm::VectorType and doesn't -// incorrectly try to index into an array of llvm::VectorType instances. - -static llvm::Value * -lCastUniformVectorBasePtr(llvm::Value *ptr, FunctionEmitContext *ctx) { - LLVM_TYPE_CONST llvm::PointerType *baseType = - llvm::dyn_cast(ptr->getType()); - if (!baseType) - return ptr; - - LLVM_TYPE_CONST llvm::VectorType *baseEltVecType = - llvm::dyn_cast(baseType->getElementType()); - if (!baseEltVecType) - return ptr; - - LLVM_TYPE_CONST llvm::Type *vecEltType = baseEltVecType->getElementType(); - int numElts = baseEltVecType->getNumElements(); - LLVM_TYPE_CONST llvm::Type *castType = - llvm::PointerType::get(llvm::ArrayType::get(vecEltType, numElts), 0); - return ctx->BitCastInst(ptr, castType); -} - - /** When computing pointer values, we need to apply a per-lane offset when - we're indexing into varying data. Consdier the following ispc code: + we have a varying pointer that is itself indexing into varying data. + Consdier the following ispc code: uniform float u[] = ...; float v[] = ...; @@ -2635,38 +2869,23 @@ lCastUniformVectorBasePtr(llvm::Value *ptr, FunctionEmitContext *ctx) { */ static llvm::Value * lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr, - const Type *returnType, const Type *indexedType) { - // If the result of the indexing isn't a varying atomic type, then - // nothing to do here. - if (returnType->IsVaryingType() == false || - (dynamic_cast(returnType) == NULL && - dynamic_cast(returnType) == NULL)) + const Type *ptrType) { + if (dynamic_cast(ptrType) != NULL) + // References are uniform pointers, so no offsetting is needed return ptr; - // We should now have an array of pointer values, represing in a - // varying pointer. - LLVM_TYPE_CONST llvm::ArrayType *at = - llvm::dyn_cast(ptr->getType()); - if (at == NULL) - return ptr; - LLVM_TYPE_CONST llvm::PointerType *pt = - llvm::dyn_cast(at->getElementType()); - assert(pt != NULL); - - // If the pointers are to uniform types (e.g. ptr->getType() == - // [8 x float *]), then we have the u[index] situation from the comment - // above, and no additional offset is needed. Otherwise we have - // pointers to varying atomic types--e.g. ptr->getType() == - // [8 x <8 x float> *] - if (pt->getElementType()->isIntegerTy() || - pt->getElementType()->isFloatingPointTy() || - pt->getElementType()->isPointerTy()) + assert(dynamic_cast(ptrType) != NULL); + if (ptrType->IsUniformType()) return ptr; - // But not so fast: if the reason we have a vector of pointers is that - // we're indexing into an array of uniform short-vector types, then we - // don't need the offsets. - if (dynamic_cast(indexedType) != NULL) + const Type *baseType = ptrType->GetBaseType(); + if (baseType->IsUniformType()) + return ptr; + + // must be indexing into varying atomic, enum, or pointer types + if (dynamic_cast(baseType) == NULL && + dynamic_cast(baseType) == NULL && + dynamic_cast(baseType) == NULL) return ptr; // Onward: compute the per lane offsets. @@ -2676,46 +2895,52 @@ lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr, varyingOffsets = ctx->InsertInst(varyingOffsets, LLVMInt32(i), i, "varying_delta"); - // Cast the pointer to the corresponding uniform pointer - // type--e.g. from [8 x <8 x float> *] to [8 x float *]. - LLVM_TYPE_CONST llvm::Type *unifType = - returnType->GetAsUniformType()->LLVMType(g->ctx); - LLVM_TYPE_CONST llvm::PointerType *ptrCastType = - llvm::PointerType::get(llvm::ArrayType::get(unifType, 0), 0); - ptr = ctx->BitCastInst(ptr, ptrCastType, "ptr2unif"); - - // And finally add the per-lane offsets. - return ctx->GetElementPtrInst(ptr, LLVMInt32(0), varyingOffsets); + // And finally add the per-lane offsets. Note that we lie to the GEP + // call and tell it that the pointers are to uniform elements and not + // varying elements, so that the offsets in terms of (0,1,2,...) will + // end up turning into the correct step in bytes... + const Type *uniformElementType = baseType->GetAsUniformType(); + const Type *ptrUnifType = PointerType::GetVarying(uniformElementType); + return ctx->GetElementPtrInst(ptr, varyingOffsets, ptrUnifType); } llvm::Value * IndexExpr::GetValue(FunctionEmitContext *ctx) const { - const Type *arrayOrVectorType; - if (arrayOrVector == NULL || index == NULL || - ((arrayOrVectorType = arrayOrVector->GetType()) == NULL)) + const Type *baseExprType; + if (baseExpr == NULL || index == NULL || + ((baseExprType = baseExpr->GetType()) == NULL)) return NULL; ctx->SetDebugPos(pos); + llvm::Value *lvalue = GetLValue(ctx); llvm::Value *mask = NULL; + const Type *lvalueType = GetLValueType(); if (lvalue == NULL) { // We may be indexing into a temporary that hasn't hit memory, so // get the full value and stuff it into temporary alloca'd space so // that we can index from there... - llvm::Value *val = arrayOrVector->GetValue(ctx); + llvm::Value *val = baseExpr->GetValue(ctx); if (val == NULL) { assert(m->errorCount > 0); return NULL; } ctx->SetDebugPos(pos); - llvm::Value *ptr = ctx->AllocaInst(arrayOrVectorType->LLVMType(g->ctx), + llvm::Value *ptr = ctx->AllocaInst(baseExprType->LLVMType(g->ctx), "array_tmp"); ctx->StoreInst(val, ptr); - ptr = lCastUniformVectorBasePtr(ptr, ctx); - lvalue = ctx->GetElementPtrInst(ptr, LLVMInt32(0), index->GetValue(ctx)); - lvalue = lAddVaryingOffsetsIfNeeded(ctx, lvalue, GetType(), - arrayOrVectorType); + + lvalue = ctx->GetElementPtrInst(ptr, LLVMInt32(0), index->GetValue(ctx), + PointerType::GetUniform(baseExprType)); + + const SequentialType *st = + dynamic_cast(baseExprType); + assert(st != NULL); + lvalueType = PointerType::GetUniform(st->GetElementType()); + + lvalue = lAddVaryingOffsetsIfNeeded(ctx, lvalue, lvalueType); + mask = LLVMMaskAllOn; } else { @@ -2725,58 +2950,82 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { } ctx->SetDebugPos(pos); - return ctx->LoadInst(lvalue, mask, GetType(), "index"); + return ctx->LoadInst(lvalue, mask, lvalueType, "index"); } const Type * IndexExpr::GetType() const { - const Type *arrayOrVectorType, *indexType; - if (!arrayOrVector || !index || - ((arrayOrVectorType = arrayOrVector->GetType()) == NULL) || + const Type *baseExprType, *indexType; + if (!baseExpr || !index || + ((baseExprType = baseExpr->GetType()) == NULL) || ((indexType = index->GetType()) == NULL)) return NULL; - const SequentialType *sequentialType = - dynamic_cast(arrayOrVectorType->GetReferenceTarget()); - // Typechecking should have caught this... - assert(sequentialType != NULL); + const Type *elementType = NULL; + const PointerType *pointerType = + dynamic_cast(baseExprType); + if (pointerType != NULL) + // ptr[index] -> type that the pointer points to + elementType = pointerType->GetBaseType(); + else { + // sequential type[index] -> element type of the sequential type + const SequentialType *sequentialType = + dynamic_cast(baseExprType->GetReferenceTarget()); + // Typechecking should have caught this... + assert(sequentialType != NULL); + elementType = sequentialType->GetElementType(); + } - const Type *elementType = sequentialType->GetElementType(); if (indexType->IsUniformType()) // If the index is uniform, the resulting type is just whatever the // element type is return elementType; else - // A varying index into uniform array/vector -> varying type (and - // same for varying array of course...) + // A varying index into even a uniform base type -> varying type return elementType->GetAsVaryingType(); } Symbol * IndexExpr::GetBaseSymbol() const { - return arrayOrVector ? arrayOrVector->GetBaseSymbol() : NULL; + return baseExpr ? baseExpr->GetBaseSymbol() : NULL; } llvm::Value * IndexExpr::GetLValue(FunctionEmitContext *ctx) const { - const Type *arrayOrVectorType; - if (arrayOrVector == NULL || index == NULL || - ((arrayOrVectorType = arrayOrVector->GetType()) == NULL)) + const Type *baseExprType; + if (baseExpr == NULL || index == NULL || + ((baseExprType = baseExpr->GetType()) == NULL)) return NULL; ctx->SetDebugPos(pos); + if (dynamic_cast(baseExprType) != NULL) { + // We're indexing off of a base pointer + llvm::Value *baseValue = baseExpr->GetValue(ctx); + llvm::Value *indexValue = index->GetValue(ctx); + if (baseValue == NULL || indexValue == NULL) + return NULL; + ctx->SetDebugPos(pos); + return ctx->GetElementPtrInst(baseValue, indexValue, + baseExprType, "ptr_offset"); + } + + // Otherwise it's an array or vector llvm::Value *basePtr = NULL; - if (dynamic_cast(arrayOrVectorType) || - dynamic_cast(arrayOrVectorType)) - basePtr = arrayOrVector->GetLValue(ctx); + const Type *basePtrType = NULL; + if (dynamic_cast(baseExprType) || + dynamic_cast(baseExprType)) { + basePtr = baseExpr->GetLValue(ctx); + basePtrType = baseExpr->GetLValueType(); + } else { - arrayOrVectorType = arrayOrVectorType->GetReferenceTarget(); - assert(dynamic_cast(arrayOrVectorType) || - dynamic_cast(arrayOrVectorType)); - basePtr = arrayOrVector->GetValue(ctx); + baseExprType = baseExprType->GetReferenceTarget(); + assert(dynamic_cast(baseExprType) || + dynamic_cast(baseExprType)); + basePtr = baseExpr->GetValue(ctx); + basePtrType = baseExpr->GetType(); } if (!basePtr) return NULL; @@ -2785,37 +3034,71 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const { // may lead to an out-of-bounds access. ConstExpr *ce = dynamic_cast(index); const SequentialType *seqType = - dynamic_cast(arrayOrVectorType); - assert(seqType != NULL); - int nElements = seqType->GetElementCount(); - if (ce != NULL && nElements > 0) { - int32_t indices[ISPC_MAX_NVEC]; - int count = ce->AsInt32(indices); - for (int i = 0; i < count; ++i) { - if (indices[i] < 0 || indices[i] >= nElements) - Warning(index->pos, "Array index \"%d\" may be out of bounds for " - "\"%d\" element array.", indices[i], nElements); + dynamic_cast(baseExprType); + if (seqType != NULL) { + int nElements = seqType->GetElementCount(); + if (ce != NULL && nElements > 0) { + int32_t indices[ISPC_MAX_NVEC]; + int count = ce->AsInt32(indices); + for (int i = 0; i < count; ++i) { + if (indices[i] < 0 || indices[i] >= nElements) + Warning(index->pos, "Array index \"%d\" may be out of bounds for " + "\"%d\" element array.", indices[i], nElements); + } } } - basePtr = lCastUniformVectorBasePtr(basePtr, ctx); - ctx->SetDebugPos(pos); - llvm::Value *ptr = ctx->GetElementPtrInst(basePtr, LLVMInt32(0), - index->GetValue(ctx)); - ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, GetType(), arrayOrVectorType); - + llvm::Value *ptr = + ctx->GetElementPtrInst(basePtr, LLVMInt32(0), index->GetValue(ctx), + basePtrType); + ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, GetLValueType()); return ptr; } +const Type * +IndexExpr::GetLValueType() const { + const Type *baseExprLValueType, *indexType; + if (baseExpr == NULL || index == NULL || + ((baseExprLValueType = baseExpr->GetLValueType()) == NULL) || + ((indexType = index->GetType()) == NULL)) + return NULL; + + if (dynamic_cast(baseExprLValueType) != NULL) + baseExprLValueType = PointerType::GetUniform(baseExprLValueType->GetReferenceTarget()); + assert(dynamic_cast(baseExprLValueType) != NULL); + + // FIXME: can we do something in the type system that unifies the + // concept of a sequential type's element type and a pointer type's + // base type? The code below is identical but for handling that + // difference. IndexableType? + const SequentialType *st = + dynamic_cast(baseExprLValueType->GetBaseType()); + if (st != NULL) { + if (baseExprLValueType->IsUniformType() && indexType->IsUniformType()) + return PointerType::GetUniform(st->GetElementType()); + else + return PointerType::GetVarying(st->GetElementType()); + } + + const PointerType *pt = + dynamic_cast(baseExprLValueType->GetBaseType()); + assert(pt != NULL); + if (baseExprLValueType->IsUniformType() && indexType->IsUniformType()) + return PointerType::GetUniform(pt->GetBaseType()); + else + return PointerType::GetVarying(pt->GetBaseType()); +} + + Expr * IndexExpr::Optimize() { - if (arrayOrVector) - arrayOrVector = arrayOrVector->Optimize(); + if (baseExpr) + baseExpr = baseExpr->Optimize(); if (index) index = index->Optimize(); - if (arrayOrVector == NULL || index == NULL) + if (baseExpr == NULL || index == NULL) return NULL; return this; @@ -2824,21 +3107,22 @@ IndexExpr::Optimize() { Expr * IndexExpr::TypeCheck() { - if (arrayOrVector) - arrayOrVector = arrayOrVector->TypeCheck(); + if (baseExpr) + baseExpr = baseExpr->TypeCheck(); if (index) index = index->TypeCheck(); - if (!arrayOrVector || !index || !index->GetType()) + if (!baseExpr || !index || !index->GetType()) return NULL; - const Type *arrayOrVectorType = arrayOrVector->GetType(); - if (!arrayOrVectorType) + const Type *baseExprType = baseExpr->GetType(); + if (!baseExprType) return NULL; - if (dynamic_cast(arrayOrVectorType->GetReferenceTarget()) == NULL) { - Error(pos, "Trying to index into non-array or vector type \"%s\".", - arrayOrVectorType->GetString().c_str()); + if (!dynamic_cast(baseExprType->GetReferenceTarget()) && + !dynamic_cast(baseExprType)) { + Error(pos, "Trying to index into non-array, vector, or pointer " + "type \"%s\".", baseExprType->GetString().c_str()); return NULL; } @@ -2856,8 +3140,17 @@ IndexExpr::TypeCheck() { int IndexExpr::EstimateCost() const { - // be pessimistic - if (index && index->GetType()->IsVaryingType()) + if (index == NULL || baseExpr == NULL) + return 0; + + const Type *indexType = index->GetType(); + const Type *baseExprType = baseExpr->GetType(); + + if ((indexType != NULL && indexType->IsVaryingType()) || + (dynamic_cast(baseExprType) != NULL && + baseExprType->IsVaryingType())) + // be pessimistic; some of these will later turn out to be vector + // loads/stores, but it's too early for us to know that here. return COST_GATHER; else return COST_LOAD; @@ -2866,11 +3159,11 @@ IndexExpr::EstimateCost() const { void IndexExpr::Print() const { - if (!arrayOrVector || !index || !GetType()) + if (!baseExpr || !index || !GetType()) return; printf("[%s] index ", GetType()->GetString().c_str()); - arrayOrVector->Print(); + baseExpr->Print(); printf("["); index->Print(); printf("]"); @@ -2913,49 +3206,58 @@ class StructMemberExpr : public MemberExpr { public: StructMemberExpr(Expr *e, const char *id, SourcePos p, - SourcePos idpos, const StructType *structType); + SourcePos idpos, bool derefLValue); const Type *GetType() const; int getElementNumber() const; const Type *getElementType() const; private: - const StructType *exprStructType; + const StructType *getStructType() const; }; StructMemberExpr::StructMemberExpr(Expr *e, const char *id, SourcePos p, - SourcePos idpos, - const StructType *structType) - : MemberExpr(e, id, p, idpos), exprStructType(structType) { + SourcePos idpos, bool derefLValue) + : MemberExpr(e, id, p, idpos, derefLValue) { } const Type * StructMemberExpr::GetType() const { - // It's a struct, and the result type is the element - // type, possibly promoted to varying if the struct type / lvalue - // is varying. - const Type *elementType = exprStructType->GetElementType(identifier); - if (!elementType) + // It's a struct, and the result type is the element type, possibly + // promoted to varying if the struct type / lvalue is varying. + const StructType *structType = getStructType(); + if (structType == NULL) + return NULL; + + const Type *elementType = structType->GetElementType(identifier); + if (elementType == NULL) Error(identifierPos, "Element name \"%s\" not present in struct type \"%s\".%s", - identifier.c_str(), exprStructType->GetString().c_str(), + identifier.c_str(), structType->GetString().c_str(), getCandidateNearMatches().c_str()); - if (exprStructType->IsVaryingType()) + const PointerType *pt = dynamic_cast(expr->GetType()); + if (structType->IsVaryingType() || + (pt != NULL && pt->IsVaryingType())) return elementType->GetAsVaryingType(); else return elementType; } + int StructMemberExpr::getElementNumber() const { - int elementNumber = exprStructType->GetElementNumber(identifier); + const StructType *structType = getStructType(); + if (structType == NULL) + return -1; + + int elementNumber = structType->GetElementNumber(identifier); if (elementNumber == -1) Error(identifierPos, "Element name \"%s\" not present in struct type \"%s\".%s", - identifier.c_str(), exprStructType->GetString().c_str(), + identifier.c_str(), structType->GetString().c_str(), getCandidateNearMatches().c_str()); return elementNumber; } @@ -2963,7 +3265,34 @@ StructMemberExpr::getElementNumber() const { const Type * StructMemberExpr::getElementType() const { - return exprStructType->GetAsUniformType()->GetElementType(identifier); + const StructType *structType = getStructType(); + if (structType == NULL) + return NULL; + + return structType->GetAsUniformType()->GetElementType(identifier); +} + + +const StructType * +StructMemberExpr::getStructType() const { + const Type *exprType = expr->GetType(); + if (exprType == NULL) + return NULL; + + const StructType *structType = dynamic_cast(exprType); + if (structType == NULL) { + const PointerType *pt = dynamic_cast(exprType); + if (pt != NULL) + structType = dynamic_cast(pt->GetBaseType()); + else { + const ReferenceType *rt = + dynamic_cast(exprType); + assert(rt != NULL); + structType = dynamic_cast(rt->GetReferenceTarget()); + } + assert(structType != NULL); + } + return structType; } @@ -2974,11 +3303,12 @@ class VectorMemberExpr : public MemberExpr { public: VectorMemberExpr(Expr *e, const char *id, SourcePos p, - SourcePos idpos, const VectorType* vectorType); + SourcePos idpos, bool derefLValue); - const Type *GetType() const; - llvm::Value *GetLValue(FunctionEmitContext* ctx) const; llvm::Value *GetValue(FunctionEmitContext* ctx) const; + llvm::Value *GetLValue(FunctionEmitContext* ctx) const; + const Type *GetType() const; + const Type *GetLValueType() const; int getElementNumber() const; const Type *getElementType() const; @@ -2990,9 +3320,21 @@ private: VectorMemberExpr::VectorMemberExpr(Expr *e, const char *id, SourcePos p, - SourcePos idpos, - const VectorType* vectorType) - : MemberExpr(e, id, p, idpos), exprVectorType(vectorType) { + SourcePos idpos, bool derefLValue) + : MemberExpr(e, id, p, idpos, derefLValue) { + const Type *exprType = e->GetType(); + exprVectorType = dynamic_cast(exprType); + if (exprVectorType == NULL) { + const PointerType *pt = dynamic_cast(exprType); + if (pt != NULL) + exprVectorType = dynamic_cast(pt->GetBaseType()); + else { + assert(dynamic_cast(exprType) != NULL); + exprVectorType = + dynamic_cast(exprType->GetReferenceTarget()); + } + assert(exprVectorType != NULL); + } memberType = new VectorType(exprVectorType->GetElementType(), identifier.length()); } @@ -3004,11 +3346,14 @@ VectorMemberExpr::GetType() const { // type. For n-element expressions, we have a shortvec type // with n > 1 elements. This can be changed when we get // type<1> -> type conversions. - if (identifier.length() == 1) { - return exprVectorType->GetElementType(); - } else { - return memberType; - } + const Type *type = (identifier.length() == 1) ? + (const Type *)exprVectorType->GetElementType() : + (const Type *)memberType; + + const Type *lvalueType = GetLValueType(); + if (lvalueType != NULL && lvalueType->IsVaryingType()) + type = type->GetAsVaryingType(); + return type; } @@ -3022,8 +3367,41 @@ VectorMemberExpr::GetLValue(FunctionEmitContext* ctx) const { } +const Type * +VectorMemberExpr::GetLValueType() const { + if (identifier.length() == 1) { + if (expr == NULL) + return NULL; + + const Type *exprLValueType = dereferenceExpr ? expr->GetType() : + expr->GetLValueType(); + if (exprLValueType == NULL) + return NULL; + + const VectorType *vt = NULL; + if (dynamic_cast(exprLValueType) != NULL) + vt = dynamic_cast(exprLValueType->GetReferenceTarget()); + else + vt = dynamic_cast(exprLValueType->GetBaseType()); + assert(vt != NULL); + + // we don't want to report that it's e.g. a pointer to a float<1>, + // but ta pointer to a float, etc. + const Type *elementType = vt->GetElementType(); + if (dynamic_cast(exprLValueType) != NULL) + return new ReferenceType(elementType); + else + return exprLValueType->IsUniformType() ? + PointerType::GetUniform(elementType) : + PointerType::GetVarying(elementType); + } + else + return NULL; +} + + llvm::Value * -VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const { +VectorMemberExpr::GetValue(FunctionEmitContext *ctx) const { if (identifier.length() == 1) { return MemberExpr::GetValue(ctx); } @@ -3040,27 +3418,47 @@ VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const { indices.push_back(idx); } - llvm::Value *basePtr = expr->GetLValue(ctx); - if (basePtr == NULL) { + llvm::Value *basePtr = NULL; + const Type *basePtrType = NULL; + if (dereferenceExpr) { + basePtr = expr->GetValue(ctx); + basePtrType = expr->GetType(); + } + else { + basePtr = expr->GetLValue(ctx); + basePtrType = expr->GetLValueType(); + } + + if (basePtr == NULL || basePtrType == NULL) { assert(m->errorCount > 0); return NULL; } - llvm::Value *ltmp = ctx->AllocaInst(memberType->LLVMType(g->ctx), + + // Allocate temporary memory to tore the result + llvm::Value *resultPtr = ctx->AllocaInst(memberType->LLVMType(g->ctx), "vector_tmp"); + // FIXME: we should be able to use the internal mask here according + // to the same logic where it's used elsewhere + llvm::Value *elementMask = ctx->GetFullMask(); + + const Type *elementPtrType = basePtrType->IsUniformType() ? + PointerType::GetUniform(exprVectorType->GetElementType()) : + PointerType::GetVarying(exprVectorType->GetElementType()); + ctx->SetDebugPos(pos); for (size_t i = 0; i < identifier.size(); ++i) { - llvm::Value *ptmp = - ctx->GetElementPtrInst(ltmp, 0, i, "new_offset"); - llvm::Value *initLValue = - ctx->GetElementPtrInst(basePtr, 0, indices[i], "orig_offset"); - llvm::Value *initValue = - ctx->LoadInst(initLValue, NULL, memberType->GetElementType(), + llvm::Value *elementPtr = ctx->AddElementOffset(basePtr, indices[i], + basePtrType); + llvm::Value *elementValue = + ctx->LoadInst(elementPtr, elementMask, elementPtrType, "vec_element"); - ctx->StoreInst(initValue, ptmp); + + llvm::Value *ptmp = ctx->AddElementOffset(resultPtr, i, NULL); + ctx->StoreInst(elementValue, ptmp); } - return ctx->LoadInst(ltmp, NULL, memberType, "swizzle_vec"); + return ctx->LoadInst(resultPtr, "swizzle_vec"); } } @@ -3081,130 +3479,63 @@ VectorMemberExpr::getElementType() const { } -/////////////////////////////////////////////////////////////////////////// -// ReferenceMemberExpr - -class ReferenceMemberExpr : public MemberExpr -{ -public: - ReferenceMemberExpr(Expr *e, const char *id, SourcePos p, - SourcePos idpos, const ReferenceType* referenceType); - - const Type *GetType() const; - llvm::Value *GetLValue(FunctionEmitContext* ctx) const; - - int getElementNumber() const; - const Type *getElementType() const; - -private: - const ReferenceType *exprReferenceType; - MemberExpr *dereferencedExpr; -}; - -ReferenceMemberExpr::ReferenceMemberExpr(Expr *e, const char *id, SourcePos p, - SourcePos idpos, - const ReferenceType *referenceType) - : MemberExpr(e, id, p, idpos), exprReferenceType(referenceType) { - const Type *refTarget = exprReferenceType->GetReferenceTarget(); - const StructType *structType = dynamic_cast(refTarget); - const VectorType *vectorType = dynamic_cast(refTarget); - - if (structType != NULL) - dereferencedExpr = new StructMemberExpr(e, id, p, idpos, structType); - else if (vectorType != NULL) - dereferencedExpr = new VectorMemberExpr(e, id, p, idpos, vectorType); - else - dereferencedExpr = NULL; -} - - -const Type * -ReferenceMemberExpr::GetType() const { - if (dereferencedExpr == NULL) { - Error(pos, "Can't access member of non-struct/vector type \"%s\".", - exprReferenceType->GetString().c_str()); - return NULL; - } else { - return dereferencedExpr->GetType(); - } -} - - -int -ReferenceMemberExpr::getElementNumber() const { - if (dereferencedExpr == NULL) { - // FIXME: I think we shouldn't ever get here and that - // typechecking should have caught this case - return -1; - } else { - return dereferencedExpr->getElementNumber(); - } -} - - -const Type * -ReferenceMemberExpr::getElementType() const { - assert(dereferencedExpr != NULL); - return dereferencedExpr->getElementType(); -} - - -llvm::Value * -ReferenceMemberExpr::GetLValue(FunctionEmitContext* ctx) const { - if (dereferencedExpr == NULL) { - // FIXME: again I think typechecking should have caught this - Error(pos, "Can't access member of non-struct/vector type \"%s\".", - exprReferenceType->GetString().c_str()); - return NULL; - } - - //FIXME: Minor Code-dup...this is the same as the base, except - // llvm::Value *basePtr = expr->GetLValue instead of expr->getValue - llvm::Value *basePtr = expr->GetValue(ctx); - if (!basePtr) - return NULL; - - int elementNumber = getElementNumber(); - if (elementNumber == -1) - return NULL; - - ctx->SetDebugPos(pos); - llvm::Value *ptr = ctx->GetElementPtrInst(basePtr, 0, elementNumber); - - const Type *elementType = getElementType(); - ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, GetType(), elementType); - - return ptr; -} - MemberExpr * -MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos) { +MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos, + bool derefLValue) { const Type *exprType; if (e == NULL || (exprType = e->GetType()) == NULL) return NULL; - const StructType *structType = dynamic_cast(exprType); - if (structType != NULL) - return new StructMemberExpr(e, id, p, idpos, structType); + const ReferenceType *referenceType = + dynamic_cast(exprType); + if (referenceType != NULL) { + e = new DereferenceExpr(e, e->pos); + exprType = e->GetType(); + assert(exprType != NULL); + } - const VectorType *vectorType = dynamic_cast(exprType); - if (vectorType != NULL) - return new VectorMemberExpr(e, id, p, idpos, vectorType); + const PointerType *pointerType = dynamic_cast(exprType); + if (pointerType != NULL) + exprType = pointerType->GetBaseType(); - const ReferenceType *referenceType = dynamic_cast(exprType); - if (referenceType != NULL) - return new ReferenceMemberExpr(e, id, p, idpos, referenceType); + if (derefLValue == true && pointerType == NULL) { + if (dynamic_cast(exprType->GetReferenceTarget()) != NULL) + Error(p, "Dereference operator \"->\" can't be applied to non-pointer " + "type \"%s\". Did you mean to use \".\"?", + exprType->GetString().c_str()); + else + Error(p, "Dereference operator \"->\" can't be applied to non-struct " + "pointer type \"%s\".", exprType->GetString().c_str()); + return NULL; + } + if (derefLValue == false && pointerType != NULL && + dynamic_cast(pointerType->GetBaseType()) != NULL) { + Error(p, "Member operator \".\" can't be applied to pointer " + "type \"%s\". Did you mean to use \"->\"?", + exprType->GetString().c_str()); + return NULL; + } - FATAL("Unexpected case in MemberExpr::create()"); - return NULL; + if (dynamic_cast(exprType) != NULL) + return new StructMemberExpr(e, id, p, idpos, derefLValue); + else if (dynamic_cast(exprType) != NULL) + return new VectorMemberExpr(e, id, p, idpos, derefLValue); + else { + Error(p, "Member operator \"%s\" can't be used with expression of " + "\"%s\" type.", derefLValue ? "->" : ".", + exprType->GetString().c_str()); + return NULL; + } } -MemberExpr::MemberExpr(Expr *e, const char *id, SourcePos p, SourcePos idpos) +MemberExpr::MemberExpr(Expr *e, const char *id, SourcePos p, SourcePos idpos, + bool derefLValue) : Expr(p), identifierPos(idpos) { expr = e; identifier = id; + dereferenceExpr = derefLValue; } @@ -3214,6 +3545,8 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { return NULL; llvm::Value *lvalue = GetLValue(ctx); + const Type *lvalueType = GetLValueType(); + llvm::Value *mask = NULL; if (lvalue == NULL) { // As in the array case, this may be a temporary that hasn't hit @@ -3233,9 +3566,10 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { int elementNumber = getElementNumber(); if (elementNumber == -1) return NULL; - lvalue = ctx->GetElementPtrInst(ptr, 0, elementNumber); - lvalue = lAddVaryingOffsetsIfNeeded(ctx, lvalue, GetType(), getElementType()); + lvalue = ctx->AddElementOffset(ptr, elementNumber, + PointerType::GetUniform(exprType)); + lvalueType = PointerType::GetUniform(GetType()); mask = LLVMMaskAllOn; } else { @@ -3245,7 +3579,7 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { } ctx->SetDebugPos(pos); - return ctx->LoadInst(lvalue, mask, GetType(), "structelement"); + return ctx->LoadInst(lvalue, mask, lvalueType, "structelement"); } @@ -3269,15 +3603,13 @@ MemberExpr::getElementNumber() const { llvm::Value * MemberExpr::GetLValue(FunctionEmitContext *ctx) const { - //This kindof feels like magic, but this functionality - // will have to be overridden in VectorMemberExpr when - // we support multi-swizzle. const Type *exprType; if (!expr || ((exprType = expr->GetType()) == NULL)) return NULL; ctx->SetDebugPos(pos); - llvm::Value *basePtr = expr->GetLValue(ctx); + llvm::Value *basePtr = dereferenceExpr ? expr->GetValue(ctx) : + expr->GetLValue(ctx); if (!basePtr) return NULL; @@ -3285,14 +3617,34 @@ MemberExpr::GetLValue(FunctionEmitContext *ctx) const { if (elementNumber == -1) return NULL; + const Type *exprLValueType = dereferenceExpr ? expr->GetType() : + expr->GetLValueType(); ctx->SetDebugPos(pos); - llvm::Value *ptr = ctx->GetElementPtrInst(basePtr, 0, elementNumber); - ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, GetType(), getElementType()); + llvm::Value *ptr = ctx->AddElementOffset(basePtr, elementNumber, + exprLValueType); + + ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, GetLValueType()); return ptr; } +const Type * +MemberExpr::GetLValueType() const { + if (expr == NULL) + return NULL; + + const Type *exprLValueType = dereferenceExpr ? expr->GetType() : + expr->GetLValueType(); + if (exprLValueType == NULL) + return NULL; + + return exprLValueType->IsUniformType() ? + PointerType::GetUniform(getElementType()) : + PointerType::GetVarying(getElementType()); +} + + Expr * MemberExpr::TypeCheck() { if (expr) @@ -3311,9 +3663,11 @@ MemberExpr::Optimize() { int MemberExpr::EstimateCost() const { - // FIXME: return gather cost when we can tell a gather is going to be - // needed - return COST_SIMPLE_ARITH_LOGIC_OP; + const Type *lvalueType = GetLValueType(); + if (lvalueType != NULL && lvalueType->IsVaryingType()) + return COST_GATHER + COST_SIMPLE_ARITH_LOGIC_OP; + else + return COST_SIMPLE_ARITH_LOGIC_OP; } @@ -3718,10 +4072,6 @@ lConvertElement(From from, To *to) { /** When converting from bool types to numeric types, make sure the result is one or zero. - FIXME: this is a different rule than we use elsewhere, where we sign extend - the bool. We should fix the other case to just zero extend and then - patch up places in the stdlib that depend on sign extension to call a - routine to make that happen. */ template static inline void lConvertElement(bool from, To *to) { @@ -4689,12 +5039,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, // If we also want to go from uniform to varying, replicate out the // value across the vector elements.. if (toType->IsVaryingType() && fromType->IsUniformType()) - return ctx->SmearScalar(cast); + return ctx->SmearUniform(cast); else return cast; } +// FIXME: fold this into the FunctionEmitContext::SmearUniform() method? + /** Converts the given value of the given type to be the varying equivalent, returning the resulting value. */ @@ -4705,14 +5057,15 @@ lUniformValueToVarying(FunctionEmitContext *ctx, llvm::Value *value, if (type->IsVaryingType()) return value; - LLVM_TYPE_CONST llvm::Type *llvmType = type->GetAsVaryingType()->LLVMType(g->ctx); - llvm::Value *retValue = llvm::UndefValue::get(llvmType); - // for structs/arrays/vectors, just recursively make their elements // varying (if needed) and populate the return value. const CollectionType *collectionType = dynamic_cast(type); if (collectionType != NULL) { + LLVM_TYPE_CONST llvm::Type *llvmType = + type->GetAsVaryingType()->LLVMType(g->ctx); + llvm::Value *retValue = llvm::UndefValue::get(llvmType); + for (int i = 0; i < collectionType->GetElementCount(); ++i) { llvm::Value *v = ctx->ExtractInst(value, i, "get_element"); v = lUniformValueToVarying(ctx, v, collectionType->GetElementType(i)); @@ -4724,11 +5077,10 @@ lUniformValueToVarying(FunctionEmitContext *ctx, llvm::Value *value, // Otherwise we must have a uniform AtomicType, so smear its value // across the vector lanes. assert(dynamic_cast(type) != NULL); - return ctx->SmearScalar(value); + return ctx->SmearUniform(value); } - llvm::Value * TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { if (!expr) @@ -4743,28 +5095,103 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { const PointerType *fromPointerType = dynamic_cast(fromType); const PointerType *toPointerType = dynamic_cast(toType); - if (fromPointerType != NULL && toPointerType != NULL) { - llvm::Value *value = expr->GetValue(ctx); - if (value == NULL) - return NULL; + const ArrayType *toArrayType = dynamic_cast(toType); + const ArrayType *fromArrayType = dynamic_cast(fromType); + if (fromPointerType != NULL) { + if (toArrayType != NULL) { + return expr->GetValue(ctx); + } + else if (toPointerType != NULL) { + llvm::Value *value = expr->GetValue(ctx); + if (value == NULL) + return NULL; - // bitcast from NULL to actual pointer type... - value = ctx->BitCastInst(value, toType->GetAsUniformType()->LLVMType(g->ctx)); + if (fromType->IsUniformType() && toType->IsUniformType()) + // bitcast to the actual pointer type + return ctx->BitCastInst(value, toType->LLVMType(g->ctx)); + else if (fromType->IsVaryingType() && toType->IsVaryingType()) { + // both are vectors of ints already, nothing to do at the IR + // level + return value; + } + else { + assert(fromType->IsUniformType() && toType->IsVaryingType()); + value = ctx->PtrToIntInst(value); + return ctx->SmearUniform(value); + } + } + else { + // convert pointer to bool + assert(dynamic_cast(toType) && + toType->IsBoolType()); + LLVM_TYPE_CONST llvm::Type *lfu = + fromType->GetAsUniformType()->LLVMType(g->ctx); + LLVM_TYPE_CONST llvm::PointerType *llvmFromUnifType = + llvm::dyn_cast(lfu); - if (fromType->IsUniformType() && toType->IsVaryingType()) - return ctx->SmearScalar(value); - else - return value; + llvm::Value *nullPtrValue = llvm::ConstantPointerNull::get(llvmFromUnifType); + if (fromType->IsVaryingType()) + nullPtrValue = ctx->SmearUniform(nullPtrValue); + + llvm::Value *exprVal = expr->GetValue(ctx); + llvm::Value *cmp = ctx->CmpInst(llvm::Instruction::ICmp, + llvm::CmpInst::ICMP_NE, + exprVal, nullPtrValue, "ptr_ne_NULL"); + + if (toType->IsVaryingType()) { + if (fromType->IsUniformType()) + cmp = ctx->SmearUniform(cmp); + cmp = ctx->I1VecToBoolVec(cmp); + } + + return cmp; + } } - if (Type::Equal(toType->GetAsConstType(), fromType->GetAsConstType())) + if (Type::EqualIgnoringConst(toType, fromType)) // There's nothing to do, just return the value. (LLVM's type // system doesn't worry about constiness.) return expr->GetValue(ctx); + if (fromArrayType != NULL && toPointerType != NULL) { + // implicit array to pointer to first element + Expr *zero = new ConstExpr(AtomicType::UniformInt32, 0, pos); + Expr *index = new IndexExpr(expr, zero, pos); + Expr *addr = new AddressOfExpr(index, pos); + addr = addr->TypeCheck(); + assert(addr != NULL); + addr = addr->Optimize(); + assert(addr != NULL); + if (Type::EqualIgnoringConst(addr->GetType(), toPointerType) == false) { + assert(Type::EqualIgnoringConst(addr->GetType()->GetAsVaryingType(), + toPointerType) == true); + addr = new TypeCastExpr(toPointerType, addr, false, pos); + addr = addr->TypeCheck(); + assert(addr != NULL); + addr = addr->Optimize(); + assert(addr != NULL); + } + assert(Type::EqualIgnoringConst(addr->GetType(), toPointerType)); + return addr->GetValue(ctx); + } + // This also should be caught during typechecking assert(!(toType->IsUniformType() && fromType->IsVaryingType())); + if (toArrayType != NULL && fromArrayType != NULL) { + // cast array pointer from [n x foo] to [0 x foo] if needed to be able + // to pass to a function that takes an unsized array as a parameter + if (toArrayType->GetElementCount() != 0 && + (toArrayType->GetElementCount() != fromArrayType->GetElementCount())) + Warning(pos, "Type-converting array of length %d to length %d", + fromArrayType->GetElementCount(), toArrayType->GetElementCount()); + assert(Type::EqualIgnoringConst(toArrayType->GetBaseType(), + fromArrayType->GetBaseType())); + llvm::Value *v = expr->GetValue(ctx); + LLVM_TYPE_CONST llvm::Type *ptype = toType->LLVMType(g->ctx); + return ctx->BitCastInst(v, ptype); //, "array_cast_0size"); + } + const ReferenceType *toReference = dynamic_cast(toType); const ReferenceType *fromReference = dynamic_cast(fromType); if (toReference && fromReference) { @@ -4780,8 +5207,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { (toArray->GetElementCount() != fromArray->GetElementCount())) Warning(pos, "Type-converting array of length %d to length %d", fromArray->GetElementCount(), toArray->GetElementCount()); - assert(Type::Equal(toArray->GetBaseType()->GetAsConstType(), - fromArray->GetBaseType()->GetAsConstType())); + assert(Type::EqualIgnoringConst(toArray->GetBaseType(), + fromArray->GetBaseType())); llvm::Value *v = expr->GetValue(ctx); LLVM_TYPE_CONST llvm::Type *ptype = toType->LLVMType(g->ctx); return ctx->BitCastInst(v, ptype); //, "array_cast_0size"); @@ -4850,32 +5277,6 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { toType = toEnum->IsUniformType() ? AtomicType::UniformUInt32 : AtomicType::VaryingUInt32; - if (fromPointerType != NULL) { - // convert pointer to bool - assert(dynamic_cast(toType) && - toType->IsBoolType()); - LLVM_TYPE_CONST llvm::Type *lfu = - fromType->GetAsUniformType()->LLVMType(g->ctx); - LLVM_TYPE_CONST llvm::PointerType *llvmFromUnifType = - llvm::dyn_cast(lfu); - - llvm::Value *nullPtrValue = llvm::ConstantPointerNull::get(llvmFromUnifType); - if (fromType->IsVaryingType()) - nullPtrValue = ctx->SmearScalar(nullPtrValue); - - llvm::Value *cmp = ctx->CmpInst(llvm::Instruction::ICmp, - llvm::CmpInst::ICMP_NE, - exprVal, nullPtrValue, "ptr_ne_NULL"); - - if (toType->IsVaryingType()) { - if (fromType->IsUniformType()) - cmp = ctx->SmearScalar(cmp); - cmp = ctx->I1VecToBoolVec(cmp); - } - - return cmp; - } - const AtomicType *fromAtomic = dynamic_cast(fromType); // at this point, coming from an atomic type is all that's left... assert(fromAtomic != NULL); @@ -4908,6 +5309,17 @@ TypeCastExpr::GetType() const { } +static const Type * +lDeconstifyType(const Type *t) { + const PointerType *pt = dynamic_cast(t); + if (pt != NULL) + return new PointerType(lDeconstifyType(pt->GetBaseType()), + pt->IsUniformType(), false); + else + return t->GetAsNonConstType(); +} + + Expr * TypeCastExpr::TypeCheck() { if (expr != NULL) @@ -4926,64 +5338,30 @@ TypeCastExpr::TypeCheck() { return tce->TypeCheck(); } - const char *toTypeString = toType->GetString().c_str(); - const char *fromTypeString = fromType->GetString().c_str(); + fromType = lDeconstifyType(fromType); + toType = lDeconstifyType(toType); - // It's an error to attempt to convert from varying to uniform - if (toType->IsUniformType() && !fromType->IsUniformType()) { - Error(pos, "Can't assign 'varying' value to 'uniform' type \"%s\".", - toTypeString); + // First some special cases that we allow only with an explicit type cast + const PointerType *ptFrom = dynamic_cast(fromType); + const PointerType *ptTo = dynamic_cast(toType); + if (ptFrom != NULL && ptTo != NULL) + // allow explicit typecasts between any two different pointer types + return this; + + const AtomicType *fromAtomic = dynamic_cast(fromType); + const AtomicType *toAtomic = dynamic_cast(toType); + const EnumType *fromEnum = dynamic_cast(fromType); + const EnumType *toEnum = dynamic_cast(toType); + if ((fromAtomic || fromEnum) && (toAtomic || toEnum)) + // Allow explicit casts between all of these + return this; + + // And otherwise see if it's one of the conversions allowed to happen + // implicitly. + if (CanConvertTypes(fromType, toType, "type cast expression", pos) == false) return NULL; - } - // And any kind of void type in a type cast doesn't make sense - if (toType == AtomicType::Void || fromType == AtomicType::Void) { - Error(pos, "Void type illegal in type cast from type \"%s\" to " - "type \"%s\".", fromTypeString, toTypeString); - return NULL; - } - - // FIXME: do we need to worry more about references here? - - if (dynamic_cast(fromType) != NULL) { - // Starting from a vector type; the result type must be a vector - // type as well - if (dynamic_cast(toType) == NULL) { - Error(pos, "Can't convert vector type \"%s\" to non-vector type \"%s\".", - fromTypeString, toTypeString); - return NULL; - } - - // And the two vectors must have the same number of elements - if (dynamic_cast(toType)->GetElementCount() != - dynamic_cast(fromType)->GetElementCount()) { - Error(pos, "Can't convert vector type \"%s\" to differently-sized " - "vector type \"%s\".", fromTypeString, toTypeString); - return NULL; - } - - // And we're ok; since vectors can only hold AtomicTypes, we know - // that type converting the elements will work. - return this; - } - else if (dynamic_cast(fromType) != NULL) { - FATAL("Shouldn't ever get here"); - return this; - } - else { - assert(dynamic_cast(fromType) != NULL || - dynamic_cast(fromType) != NULL); - // If we're going from an atomic or enum type, the only possible - // result is another atomic or enum type - if (dynamic_cast(toType) == NULL && - dynamic_cast(toType) == NULL) { - Error(pos, "Can't convert from type \"%s\" to \"%s\".", - fromTypeString, toTypeString); - return NULL; - } - - return this; - } + return this; } @@ -5113,13 +5491,13 @@ TypeCastExpr::GetConstant(const Type *constType) const { if (ec == NULL) return NULL; + ec = llvm::ConstantExpr::getPtrToInt(ec, LLVMTypes::PointerIntType); + + assert(type->IsVaryingType()); std::vector smear; for (int i = 0; i < g->target.vectorWidth; ++i) smear.push_back(ec); - LLVM_TYPE_CONST llvm::ArrayType *llvmVaryingType = - llvm::dyn_cast(type->LLVMType(g->ctx)); - assert(llvmVaryingType != NULL); - return llvm::ConstantArray::get(llvmVaryingType, smear); + return llvm::ConstantVector::get(smear); } @@ -5154,7 +5532,20 @@ ReferenceExpr::GetType() const { if (!type) return NULL; - return new ReferenceType(type, false); + return new ReferenceType(type); +} + + +const Type * +ReferenceExpr::GetLValueType() const { + if (!expr) + return NULL; + + const Type *type = expr->GetType(); + if (!type) + return NULL; + + return PointerType::GetUniform(type); } @@ -5213,12 +5604,16 @@ DereferenceExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Value *ptr = expr->GetValue(ctx); if (ptr == NULL) return NULL; - const Type *type = GetType(); + const Type *type = expr->GetType(); if (type == NULL) return NULL; + Symbol *baseSym = expr->GetBaseSymbol(); + assert(baseSym != NULL); + llvm::Value *mask = lMaskForSymbol(baseSym, ctx); + ctx->SetDebugPos(pos); - return ctx->LoadInst(ptr, NULL, type, "reference_load"); + return ctx->LoadInst(ptr, mask, type, "deref_load"); } @@ -5230,6 +5625,14 @@ DereferenceExpr::GetLValue(FunctionEmitContext *ctx) const { } +const Type * +DereferenceExpr::GetLValueType() const { + if (expr == NULL) + return NULL; + return expr->GetType(); +} + + Symbol * DereferenceExpr::GetBaseSymbol() const { return expr ? expr->GetBaseSymbol() : NULL; @@ -5238,8 +5641,20 @@ DereferenceExpr::GetBaseSymbol() const { const Type * DereferenceExpr::GetType() const { - return (expr && expr->GetType()) ? expr->GetType()->GetReferenceTarget() : - NULL; + if (expr == NULL) + return NULL; + const Type *exprType = expr->GetType(); + if (exprType == NULL) + return NULL; + if (dynamic_cast(exprType) != NULL) + return exprType->GetReferenceTarget(); + else { + assert(dynamic_cast(exprType) != NULL); + if (exprType->IsUniformType()) + return exprType->GetBaseType(); + else + return exprType->GetBaseType()->GetAsVaryingType(); + } } @@ -5265,7 +5680,17 @@ DereferenceExpr::Optimize() { int DereferenceExpr::EstimateCost() const { - return COST_DEREF; + if (expr == NULL) + return 0; + + const Type *exprType = expr->GetType(); + if (dynamic_cast(exprType) && + exprType->IsVaryingType()) + // Be pessimistic; some of these will later be optimized into + // vector loads/stores.. + return COST_GATHER + COST_DEREF; + else + return COST_DEREF; } @@ -5281,6 +5706,150 @@ DereferenceExpr::Print() const { } +/////////////////////////////////////////////////////////////////////////// +// AddressOfExpr + +AddressOfExpr::AddressOfExpr(Expr *e, SourcePos p) + : Expr(p), expr(e) { +} + + +llvm::Value * +AddressOfExpr::GetValue(FunctionEmitContext *ctx) const { + ctx->SetDebugPos(pos); + if (expr == NULL) + return NULL; + + const Type *exprType = expr->GetType(); + if (dynamic_cast(exprType) != NULL) + return expr->GetValue(ctx); + else + return expr->GetLValue(ctx); +} + + +const Type * +AddressOfExpr::GetType() const { + if (expr == NULL) + return NULL; + + const Type *exprType = expr->GetType(); + if (dynamic_cast(exprType) != NULL) + return PointerType::GetUniform(exprType->GetReferenceTarget()); + else + return expr->GetLValueType(); +} + + +Symbol * +AddressOfExpr::GetBaseSymbol() const { + return expr ? expr->GetBaseSymbol() : NULL; +} + + +void +AddressOfExpr::Print() const { + printf("&("); + if (expr) + expr->Print(); + else + printf("NULL expr"); + printf(")"); + pos.Print(); +} + + +Expr * +AddressOfExpr::TypeCheck() { + if (expr != NULL) + expr = expr->TypeCheck(); + return this; +} + + +Expr * +AddressOfExpr::Optimize() { + if (expr != NULL) + expr = expr->Optimize(); + return this; +} + + +int +AddressOfExpr::EstimateCost() const { + return 0; +} + + +/////////////////////////////////////////////////////////////////////////// +// SizeOfExpr + +SizeOfExpr::SizeOfExpr(Expr *e, SourcePos p) + : Expr(p), expr(e), type(NULL) { +} + + +SizeOfExpr::SizeOfExpr(const Type *t, SourcePos p) + : Expr(p), expr(NULL), type(t) { +} + + +llvm::Value * +SizeOfExpr::GetValue(FunctionEmitContext *ctx) const { + ctx->SetDebugPos(pos); + const Type *t = expr ? expr->GetType() : type; + if (t == NULL) + return NULL; + + LLVM_TYPE_CONST llvm::Type *llvmType = t->LLVMType(g->ctx); + if (llvmType == NULL) + return NULL; + + return g->target.SizeOf(llvmType); +} + + +const Type * +SizeOfExpr::GetType() const { + return (g->target.is32Bit || g->opt.force32BitAddressing) ? + AtomicType::UniformUInt32 : AtomicType::UniformUInt64; +} + + +void +SizeOfExpr::Print() const { + printf("Sizeof ("); + if (expr != NULL) + expr->Print(); + const Type *t = expr ? expr->GetType() : type; + if (t != NULL) + printf(" [type %s]", t->GetString().c_str()); + printf(")"); + pos.Print(); +} + + +Expr * +SizeOfExpr::TypeCheck() { + if (expr != NULL) + expr = expr->TypeCheck(); + return this; +} + + +Expr * +SizeOfExpr::Optimize() { + if (expr != NULL) + expr = expr->Optimize(); + return this; +} + + +int +SizeOfExpr::EstimateCost() const { + return 0; +} + /////////////////////////////////////////////////////////////////////////// // SymbolExpr @@ -5296,7 +5865,7 @@ SymbolExpr::GetValue(FunctionEmitContext *ctx) const { if (!symbol || !symbol->storagePtr) return NULL; ctx->SetDebugPos(pos); - return ctx->LoadInst(symbol->storagePtr, NULL, NULL, symbol->name.c_str()); + return ctx->LoadInst(symbol->storagePtr, symbol->name.c_str()); } @@ -5309,6 +5878,15 @@ SymbolExpr::GetLValue(FunctionEmitContext *ctx) const { } +const Type * +SymbolExpr::GetLValueType() const { + if (symbol == NULL) + return NULL; + + return PointerType::GetUniform(symbol->type); +} + + Symbol * SymbolExpr::GetBaseSymbol() const { return symbol; @@ -5432,8 +6010,7 @@ FunctionSymbolExpr::GetConstant(const Type *type) const { assert(type->IsUniformType()); assert(GetType()->IsUniformType()); - if (Type::Equal(type->GetAsConstType(), - GetType()->GetAsConstType()) == false) + if (Type::EqualIgnoringConst(type, GetType()) == false) return NULL; return matchingFunc ? matchingFunc->function : NULL; @@ -5448,38 +6025,32 @@ lGetFunctionDeclaration(const std::string &name, const FunctionType *type) { ret += name; ret += "("; - const std::vector &argTypes = type->GetArgumentTypes(); - const std::vector &argDefaults = type->GetArgumentDefaults(); + for (int i = 0; i < type->GetNumParameters(); ++i) { + const Type *paramType = type->GetParameterType(i); + ConstExpr *paramDefault = type->GetParameterDefault(i); - for (unsigned int i = 0; i < argTypes.size(); ++i) { - // If the parameter is a reference to an array, just print its type - // as the array type, since we always pass arrays by reference. - if (dynamic_cast(argTypes[i]) && - dynamic_cast(argTypes[i]->GetReferenceTarget())) - ret += argTypes[i]->GetReferenceTarget()->GetString(); - else - ret += argTypes[i]->GetString(); + ret += paramType->GetString(); ret += " "; - ret += type->GetArgumentName(i); + ret += type->GetParameterName(i); // Print the default value if present - if (argDefaults[i] != NULL) { + if (paramDefault != NULL) { char buf[32]; - if (argTypes[i]->IsFloatType()) { + if (paramType->IsFloatType()) { double val; - int count = argDefaults[i]->AsDouble(&val); + int count = paramDefault->AsDouble(&val); assert(count == 1); sprintf(buf, " = %g", val); } - else if (argTypes[i]->IsBoolType()) { + else if (paramType->IsBoolType()) { bool val; - int count = argDefaults[i]->AsBool(&val); + int count = paramDefault->AsBool(&val); assert(count == 1); sprintf(buf, " = %s", val ? "true" : "false"); } - else if (argTypes[i]->IsUnsignedType()) { + else if (paramType->IsUnsignedType()) { uint64_t val; - int count = argDefaults[i]->AsUInt64(&val); + int count = paramDefault->AsUInt64(&val); assert(count == 1); #ifdef ISPC_IS_LINUX sprintf(buf, " = %lu", val); @@ -5489,7 +6060,7 @@ lGetFunctionDeclaration(const std::string &name, const FunctionType *type) { } else { int64_t val; - int count = argDefaults[i]->AsInt64(&val); + int count = paramDefault->AsInt64(&val); assert(count == 1); #ifdef ISPC_IS_LINUX sprintf(buf, " = %ld", val); @@ -5499,7 +6070,7 @@ lGetFunctionDeclaration(const std::string &name, const FunctionType *type) { } ret += buf; } - if (i != argTypes.size() - 1) + if (i != type->GetNumParameters() - 1) ret += ", "; } ret += ")"; @@ -5564,7 +6135,7 @@ lExactMatch(const Type *callType, const Type *funcArgType) { callType = callType->GetAsNonConstType(); if (dynamic_cast(funcArgType) != NULL && dynamic_cast(callType) == NULL) - callType = new ReferenceType(callType, funcArgType->IsConstType()); + callType = new ReferenceType(callType); return Type::Equal(callType, funcArgType) ? 0 : -1; } @@ -5737,43 +6308,43 @@ FunctionSymbolExpr::tryResolve(int (*matchFunc)(const Type *, const Type *), const FunctionType *ft = dynamic_cast(candidateFunction->type); assert(ft != NULL); - const std::vector &funcArgTypes = ft->GetArgumentTypes(); - const std::vector &argumentDefaults = ft->GetArgumentDefaults(); // There's no way to match if the caller is passing more arguments // than this function instance takes. - if (callTypes.size() > funcArgTypes.size()) + if ((int)callTypes.size() > ft->GetNumParameters()) continue; - unsigned int i; + int i; // Note that we're looping over the caller arguments, not the // function arguments; it may be ok to have more arguments to the // function than are passed, if the function has default argument // values. This case is handled below. int cost = 0; - for (i = 0; i < callTypes.size(); ++i) { + for (i = 0; i < (int)callTypes.size(); ++i) { // This may happen if there's an error earlier in compilation. // It's kind of a silly to redundantly discover this for each // potential match versus detecting this earlier in the // matching process and just giving up. - if (callTypes[i] == NULL || funcArgTypes[i] == NULL || + const Type *paramType = ft->GetParameterType(i); + if (callTypes[i] == NULL || paramType == NULL || dynamic_cast(callTypes[i]) != NULL) return false; - int argCost = matchFunc(callTypes[i], funcArgTypes[i]); + int argCost = matchFunc(callTypes[i], paramType); if (argCost == -1) // If the predicate function returns -1, we have failed no // matter what else happens, so we stop trying break; cost += argCost; } - if (i == callTypes.size()) { + if (i == (int)callTypes.size()) { // All of the arguments matched! - if (i == funcArgTypes.size()) + if (i == ft->GetNumParameters()) // And we have exactly as many arguments as the function // wants, so we're done. matches.push_back(std::make_pair(cost, candidateFunction)); - else if (i < funcArgTypes.size() && argumentDefaults[i] != NULL) + else if (i < ft->GetNumParameters() && + ft->GetParameterDefault(i) != NULL) // Otherwise we can still make it if there are default // arguments for the rest of the arguments! Because in // Module::AddFunction() we have verified that once the diff --git a/expr.h b/expr.h index 17ec622b..a6720c03 100644 --- a/expr.h +++ b/expr.h @@ -65,6 +65,10 @@ public: /** Returns the Type of the expression. */ virtual const Type *GetType() const = 0; + /** Returns the type of the value returned by GetLValueType(); this + should be a pointer type of some sort (uniform or varying). */ + virtual const Type *GetLValueType() const; + /** For expressions that have values based on a symbol (e.g. regular symbol references, array indexing, etc.), this returns a pointer to that symbol. */ @@ -266,11 +270,12 @@ public: */ class IndexExpr : public Expr { public: - IndexExpr(Expr *arrayOrVector, Expr *index, SourcePos p); + IndexExpr(Expr *baseExpr, Expr *index, SourcePos p); llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const; const Type *GetType() const; + const Type *GetLValueType() const; Symbol *GetBaseSymbol() const; void Print() const; @@ -278,7 +283,7 @@ public: Expr *TypeCheck(); int EstimateCost() const; - Expr *arrayOrVector, *index; + Expr *baseExpr, *index; }; @@ -288,15 +293,13 @@ public: */ class MemberExpr : public Expr { public: - static MemberExpr* create(Expr *expr, const char *identifier, - SourcePos pos, SourcePos identifierPos); - - MemberExpr(Expr *expr, const char *identifier, SourcePos pos, - SourcePos identifierPos); - + static MemberExpr *create(Expr *expr, const char *identifier, + SourcePos pos, SourcePos identifierPos, + bool derefLvalue); llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const; const Type *GetType() const; + const Type *GetLValueType() const; Symbol *GetBaseSymbol() const; void Print() const; Expr *Optimize(); @@ -310,6 +313,15 @@ public: Expr *expr; std::string identifier; const SourcePos identifierPos; + +protected: + MemberExpr(Expr *expr, const char *identifier, SourcePos pos, + SourcePos identifierPos, bool derefLValue); + + /** Indicates whether the expression should be dereferenced before the + member is found. (i.e. this is true if the MemberExpr was a '->' + operator, and is false if it was a '.' operator. */ + bool dereferenceExpr; }; @@ -506,6 +518,7 @@ public: llvm::Value *GetValue(FunctionEmitContext *ctx) const; const Type *GetType() const; + const Type *GetLValueType() const; Symbol *GetBaseSymbol() const; void Print() const; Expr *TypeCheck(); @@ -525,6 +538,7 @@ public: llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const; const Type *GetType() const; + const Type *GetLValueType() const; Symbol *GetBaseSymbol() const; void Print() const; Expr *TypeCheck(); @@ -535,6 +549,44 @@ public: }; +/** Expression that represents taking the address of an expression. */ +class AddressOfExpr : public Expr { +public: + AddressOfExpr(Expr *e, SourcePos p); + + llvm::Value *GetValue(FunctionEmitContext *ctx) const; + const Type *GetType() const; + Symbol *GetBaseSymbol() const; + void Print() const; + Expr *TypeCheck(); + Expr *Optimize(); + int EstimateCost() const; + + Expr *expr; +}; + + +/** Expression that returns the size of the given expression or type in + bytes. */ +class SizeOfExpr : public Expr { +public: + SizeOfExpr(Expr *e, SourcePos p); + SizeOfExpr(const Type *t, SourcePos p); + + llvm::Value *GetValue(FunctionEmitContext *ctx) const; + const Type *GetType() const; + void Print() const; + Expr *TypeCheck(); + Expr *Optimize(); + int EstimateCost() const; + + /* One of expr or type should be non-NULL (but not both of them). The + SizeOfExpr returns the size of whichever one of them isn't NULL. */ + Expr *expr; + const Type *type; +}; + + /** @brief Expression representing a symbol reference in the program */ class SymbolExpr : public Expr { public: @@ -543,6 +595,7 @@ public: llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const; const Type *GetType() const; + const Type *GetLValueType() const; Symbol *GetBaseSymbol() const; Expr *TypeCheck(); Expr *Optimize(); @@ -623,9 +676,13 @@ public: /** This function indicates whether it's legal to convert from fromType to - toType. + toType. If the optional errorMsgBase and source position parameters + are provided, then an error message is issued if the type conversion + isn't possible. */ -bool CanConvertTypes(const Type *fromType, const Type *toType); +bool CanConvertTypes(const Type *fromType, const Type *toType, + const char *errorMsgBase = NULL, + SourcePos pos = SourcePos()); /** This function attempts to convert the given expression to the given type, returning a pointer to a new expression that is the result. If diff --git a/func.cpp b/func.cpp index 5be26871..7c6895a2 100644 --- a/func.cpp +++ b/func.cpp @@ -74,10 +74,32 @@ Function::Function(Symbol *s, const std::vector &a, Stmt *c) { maskSymbol = m->symbolTable->LookupVariable("__mask"); assert(maskSymbol != NULL); - if (code) { + if (code != NULL) { + if (g->debugPrint) { + fprintf(stderr, "Creating function \"%s\". Initial code:\n", + sym->name.c_str()); + code->Print(0); + fprintf(stderr, "---------------------\n"); + } + code = code->TypeCheck(); - if (code) + + if (code != NULL && g->debugPrint) { + fprintf(stderr, "After typechecking function \"%s\":\n", + sym->name.c_str()); + code->Print(0); + fprintf(stderr, "---------------------\n"); + } + + if (code != NULL) { code = code->Optimize(); + if (g->debugPrint) { + fprintf(stderr, "After optimizing function \"%s\":\n", + sym->name.c_str()); + code->Print(0); + fprintf(stderr, "---------------------\n"); + } + } } if (g->debugPrint) { @@ -149,11 +171,11 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vectorstoragePtr = ctx->AllocaInst(argType, sym->name.c_str()); // get a pointer to the value in the struct - llvm::Value *ptr = ctx->GetElementPtrInst(structArgPtr, 0, i, sym->name.c_str()); + llvm::Value *ptr = ctx->AddElementOffset(structArgPtr, i, NULL, sym->name.c_str()); // and copy the value from the struct and into the local alloca'ed // memory - llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, NULL, sym->name.c_str()); + llvm::Value *ptrval = ctx->LoadInst(ptr, sym->name.c_str()); ctx->StoreInst(ptrval, sym->storagePtr); ctx->EmitFunctionParameterDebugInfo(sym); } @@ -200,9 +222,9 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, // Copy in the mask as well. int nArgs = (int)args.size(); // The mask is the last parameter in the argument structure - llvm::Value *ptr = ctx->GetElementPtrInst(structParamPtr, 0, nArgs, + llvm::Value *ptr = ctx->AddElementOffset(structParamPtr, nArgs, NULL, "task_struct_mask"); - llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, NULL, "mask"); + llvm::Value *ptrval = ctx->LoadInst(ptr, "mask"); ctx->SetFunctionMask(ptrval); // Copy threadIndex and threadCount into stack-allocated storage so @@ -236,7 +258,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, } // If the number of actual function arguments is equal to the - // number of declared arguments in decl->functionArgs, then we + // number of declared arguments in decl->functionParams, then we // don't have a mask parameter, so set it to be all on. This // happens for exmaple with 'export'ed functions that the app // calls. @@ -338,11 +360,8 @@ Function::GenerateIR() { if (m->errorCount == 0) { if (llvm::verifyFunction(*function, llvm::ReturnStatusAction) == true) { - if (g->debugPrint) { - llvm::PassManager ppm; - ppm.add(llvm::createPrintModulePass(&llvm::outs())); - ppm.run(*m->module); - } + if (g->debugPrint) + function->dump(); FATAL("Function verificication failed"); } @@ -376,11 +395,8 @@ Function::GenerateIR() { sym->exportedFunction = appFunction; if (llvm::verifyFunction(*appFunction, llvm::ReturnStatusAction) == true) { - if (g->debugPrint) { - llvm::PassManager ppm; - ppm.add(llvm::createPrintModulePass(&llvm::outs())); - ppm.run(*m->module); - } + if (g->debugPrint) + appFunction->dump(); FATAL("Function verificication failed"); } } diff --git a/ispc.cpp b/ispc.cpp index 65dc0b67..188b753e 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -171,7 +171,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, if (!error) { llvm::TargetMachine *targetMachine = t->GetTargetMachine(); const llvm::TargetData *targetData = targetMachine->getTargetData(); - t->is32bit = (targetData->getPointerSize() == 4); + t->is32Bit = (targetData->getPointerSize() == 4); } return !error; @@ -284,8 +284,11 @@ llvm::Value * Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type) { const llvm::TargetData *td = GetTargetMachine()->getTargetData(); assert(td != NULL); - return is32bit ? LLVMInt32(td->getTypeSizeInBits(type) / 8) : - LLVMInt64(td->getTypeSizeInBits(type) / 8); + uint64_t byteSize = td->getTypeSizeInBits(type) / 8; + if (is32Bit || g->opt.force32BitAddressing) + return LLVMInt32(byteSize); + else + return LLVMInt64(byteSize); } @@ -298,7 +301,12 @@ Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element) { assert(structType != NULL); const llvm::StructLayout *sl = td->getStructLayout(structType); assert(sl != NULL); - return LLVMInt32(sl->getElementOffset(element)); + + uint64_t offset = sl->getElementOffset(element); + if (is32Bit || g->opt.force32BitAddressing) + return LLVMInt32(offset); + else + return LLVMInt64(offset); } @@ -309,6 +317,7 @@ Opt::Opt() { level = 1; fastMath = false; fastMaskedVload = false; + force32BitAddressing = false; unrollLoops = true; disableAsserts = false; disableHandlePseudoMemoryOps = false; diff --git a/ispc.h b/ispc.h index 99299c63..7a96872c 100644 --- a/ispc.h +++ b/ispc.h @@ -187,7 +187,7 @@ struct Target { std::string arch; /** Is the target architecture 32 or 64 bit */ - bool is32bit; + bool is32Bit; /** Target CPU. (e.g. "corei7", "corei7-avx", ..) */ std::string cpu; @@ -237,6 +237,12 @@ struct Opt { it will make sense. */ bool unrollLoops; + /** Indicates if addressing math will be done with 32-bit math, even on + 64-bit systems. (This is generally noticably more efficient, + though at the cost of addressing >2GB). + */ + bool force32BitAddressing; + /** Indicates whether assert() statements should be ignored (for performance in the generated code). */ bool disableAsserts; diff --git a/lex.ll b/lex.ll index b31315c4..c8ff9b1d 100644 --- a/lex.ll +++ b/lex.ll @@ -112,9 +112,12 @@ int64 { return TOKEN_INT64; } launch { return TOKEN_LAUNCH; } NULL { return TOKEN_NULL; } print { return TOKEN_PRINT; } -reference { return TOKEN_REFERENCE; } +reference { Error(*yylloc, "\"reference\" qualifier is no longer supported; " + "please use C++-style '&' syntax for references " + "instead."); } return { return TOKEN_RETURN; } soa { return TOKEN_SOA; } +sizeof { return TOKEN_SIZEOF; } static { return TOKEN_STATIC; } struct { return TOKEN_STRUCT; } switch { return TOKEN_SWITCH; } @@ -223,6 +226,7 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; "&=" { return TOKEN_AND_ASSIGN; } "^=" { return TOKEN_XOR_ASSIGN; } "|=" { return TOKEN_OR_ASSIGN; } +"->" { return TOKEN_PTR_OP; } ";" { return ';'; } ("{"|"<%") { return '{'; } ("}"|"%>") { return '}'; } @@ -266,8 +270,6 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; %% -/*sizeof { return TOKEN_SIZEOF; }*/ -/*"->" { return TOKEN_PTR_OP; }*/ /*short { return TOKEN_SHORT; }*/ /*long { return TOKEN_LONG; }*/ /*signed { return TOKEN_SIGNED; }*/ diff --git a/llvmutil.cpp b/llvmutil.cpp index f31738cc..34e830d5 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -40,6 +40,7 @@ LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL; LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL; +LLVM_TYPE_CONST llvm::Type *LLVMTypes::PointerIntType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL; @@ -74,7 +75,7 @@ LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL; -LLVM_TYPE_CONST llvm::ArrayType *LLVMTypes::VoidPointerVectorType = NULL; +LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::VoidPointerVectorType = NULL; llvm::Constant *LLVMTrue = NULL; llvm::Constant *LLVMFalse = NULL; @@ -86,6 +87,8 @@ void InitLLVMUtil(llvm::LLVMContext *ctx, Target target) { LLVMTypes::VoidType = llvm::Type::getVoidTy(*ctx); LLVMTypes::VoidPointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(*ctx), 0); + LLVMTypes::PointerIntType = target.is32Bit ? llvm::Type::getInt32Ty(*ctx) : + llvm::Type::getInt64Ty(*ctx); LLVMTypes::BoolType = llvm::Type::getInt1Ty(*ctx); LLVMTypes::Int8Type = llvm::Type::getInt8Ty(*ctx); @@ -130,8 +133,8 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) { LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0); LLVMTypes::DoubleVectorPointerType = llvm::PointerType::get(LLVMTypes::DoubleVectorType, 0); - LLVMTypes::VoidPointerVectorType = - llvm::ArrayType::get(LLVMTypes::VoidPointerType, target.vectorWidth); + LLVMTypes::VoidPointerVectorType = g->target.is32Bit ? LLVMTypes::Int32VectorType : + LLVMTypes::Int64VectorType; LLVMTrue = llvm::ConstantInt::getTrue(*ctx); LLVMFalse = llvm::ConstantInt::getFalse(*ctx); @@ -451,11 +454,3 @@ LLVMBoolVector(const bool *bvec) { } return llvm::ConstantVector::get(vals); } - - -LLVM_TYPE_CONST llvm::ArrayType * -LLVMPointerVectorType(LLVM_TYPE_CONST llvm::Type *t) { - // NOTE: ArrayType, not VectorType - return llvm::ArrayType::get(llvm::PointerType::get(t, 0), - g->target.vectorWidth); -} diff --git a/llvmutil.h b/llvmutil.h index 68397b70..0322b49e 100644 --- a/llvmutil.h +++ b/llvmutil.h @@ -52,6 +52,7 @@ struct LLVMTypes { static LLVM_TYPE_CONST llvm::Type *VoidType; static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType; + static LLVM_TYPE_CONST llvm::Type *PointerIntType; static LLVM_TYPE_CONST llvm::Type *BoolType; static LLVM_TYPE_CONST llvm::Type *Int8Type; @@ -86,7 +87,7 @@ struct LLVMTypes { static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType; static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType; - static LLVM_TYPE_CONST llvm::ArrayType *VoidPointerVectorType; + static LLVM_TYPE_CONST llvm::VectorType *VoidPointerVectorType; }; /** These variables hold the corresponding LLVM constant values as a @@ -204,10 +205,4 @@ extern llvm::Constant *LLVMMaskAllOn; /** LLVM constant value representing an 'all off' SIMD lane mask */ extern llvm::Constant *LLVMMaskAllOff; -/** Given an LLVM type, returns the corresponding type for a vector of - pointers to that type. (In practice, an array of pointers, since LLVM - prohibits vectors of pointers. - */ -extern LLVM_TYPE_CONST llvm::ArrayType *LLVMPointerVectorType(LLVM_TYPE_CONST llvm::Type *t); - #endif // ISPC_LLVMUTIL_H diff --git a/main.cpp b/main.cpp index 11d54213..40ee8550 100644 --- a/main.cpp +++ b/main.cpp @@ -83,6 +83,7 @@ static void usage(int ret) { printf(" [-o /--outfile=]\tOutput filename (may be \"-\" for standard output)\n"); printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n"); printf(" [--opt=