Add support for pointers to the language.

Pointers can be either uniform or varying, and behave correspondingly.
e.g.: "uniform float * varying" is a varying pointer to uniform float
data in memory, and "float * uniform" is a uniform pointer to varying
data in memory.  Like other types, pointers are varying by default.

Pointer-based expressions, & and *, sizeof, ->, pointer arithmetic,
and the array/pointer duality all bahave as in C.  Array arguments
to functions are converted to pointers, also like C.

There is a built-in NULL for a null pointer value; conversion from
compile-time constant 0 values to NULL still needs to be implemented.

Other changes:
- Syntax for references has been updated to be C++ style; a useful
  warning is now issued if the "reference" keyword is used.
- It is now illegal to pass a varying lvalue as a reference parameter
  to a function; references are essentially uniform pointers.
  This case had previously been handled via special case call by value
  return code.  That path has been removed, now that varying pointers
  are available to handle this use case (and much more).
- Some stdlib routines have been updated to take pointers as
  arguments where appropriate (e.g. prefetch and the atomics).
  A number of others still need attention.
- All of the examples have been updated
- Many new tests

TODO: documentation
This commit is contained in:
Matt Pharr
2011-11-21 09:16:29 -08:00
parent 15a7d353ab
commit 975db80ef6
191 changed files with 4746 additions and 3225 deletions

View File

@@ -114,61 +114,39 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
// pointers to uniform // pointers to uniform
else if (t == LLVMTypes::Int8PointerType) else if (t == LLVMTypes::Int8PointerType)
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt8 : return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt8 :
AtomicType::UniformInt8, false); AtomicType::UniformInt8);
else if (t == LLVMTypes::Int16PointerType) else if (t == LLVMTypes::Int16PointerType)
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt16 : return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt16 :
AtomicType::UniformInt16, false); AtomicType::UniformInt16);
else if (t == LLVMTypes::Int32PointerType) else if (t == LLVMTypes::Int32PointerType)
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt32 : return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt32 :
AtomicType::UniformInt32, false); AtomicType::UniformInt32);
else if (t == LLVMTypes::Int64PointerType) else if (t == LLVMTypes::Int64PointerType)
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt64 : return PointerType::GetUniform(intAsUnsigned ? AtomicType::UniformUInt64 :
AtomicType::UniformInt64, false); AtomicType::UniformInt64);
else if (t == LLVMTypes::FloatPointerType) else if (t == LLVMTypes::FloatPointerType)
return new ReferenceType(AtomicType::UniformFloat, false); return PointerType::GetUniform(AtomicType::UniformFloat);
else if (t == LLVMTypes::DoublePointerType) else if (t == LLVMTypes::DoublePointerType)
return new ReferenceType(AtomicType::UniformDouble, false); return PointerType::GetUniform(AtomicType::UniformDouble);
// pointers to varying // pointers to varying
else if (t == LLVMTypes::Int8VectorPointerType) else if (t == LLVMTypes::Int8VectorPointerType)
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt8 : return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt8 :
AtomicType::VaryingInt8, false); AtomicType::VaryingInt8);
else if (t == LLVMTypes::Int16VectorPointerType) else if (t == LLVMTypes::Int16VectorPointerType)
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt16 : return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt16 :
AtomicType::VaryingInt16, false); AtomicType::VaryingInt16);
else if (t == LLVMTypes::Int32VectorPointerType) else if (t == LLVMTypes::Int32VectorPointerType)
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt32 : return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt32 :
AtomicType::VaryingInt32, false); AtomicType::VaryingInt32);
else if (t == LLVMTypes::Int64VectorPointerType) else if (t == LLVMTypes::Int64VectorPointerType)
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt64 : return PointerType::GetUniform(intAsUnsigned ? AtomicType::VaryingUInt64 :
AtomicType::VaryingInt64, false); AtomicType::VaryingInt64);
else if (t == LLVMTypes::FloatVectorPointerType) else if (t == LLVMTypes::FloatVectorPointerType)
return new ReferenceType(AtomicType::VaryingFloat, false); return PointerType::GetUniform(AtomicType::VaryingFloat);
else if (t == LLVMTypes::DoubleVectorPointerType) else if (t == LLVMTypes::DoubleVectorPointerType)
return new ReferenceType(AtomicType::VaryingDouble, false); return PointerType::GetUniform(AtomicType::VaryingDouble);
// arrays
else if (llvm::isa<const llvm::PointerType>(t)) {
const llvm::PointerType *pt = llvm::dyn_cast<const llvm::PointerType>(t);
// Is it a pointer to an unsized array of objects? If so, then
// create the equivalent ispc type. Note that it has to be a
// reference to an array, since ispc passes arrays to functions by
// reference.
const llvm::ArrayType *at =
llvm::dyn_cast<const llvm::ArrayType>(pt->getElementType());
if (at != NULL) {
const Type *eltType = lLLVMTypeToISPCType(at->getElementType(),
intAsUnsigned);
if (eltType == NULL)
return NULL;
// FIXME: this needs to be fixed when arrays can have
// over 4G elements...
return new ReferenceType(new ArrayType(eltType, (int)at->getNumElements()),
false);
}
}
return NULL; return NULL;
} }
@@ -184,6 +162,9 @@ lCreateSymbol(const std::string &name, const Type *returnType,
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos); FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
Debug(noPos, "Created builtin symbol \"%s\" [%s]\n", name.c_str(),
funcType->GetString().c_str());
Symbol *sym = new Symbol(name, noPos, funcType); Symbol *sym = new Symbol(name, noPos, funcType);
sym->function = func; sym->function = func;
symbolTable->AddFunction(sym); symbolTable->AddFunction(sym);
@@ -244,7 +225,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
// Iterate over the arguments and try to find their equivalent ispc // Iterate over the arguments and try to find their equivalent ispc
// types. Track if any of the arguments has an integer type. // types. Track if any of the arguments has an integer type.
bool anyIntArgs = false, anyReferenceArgs = false; bool anyIntArgs = false;
std::vector<const Type *> argTypes; std::vector<const Type *> argTypes;
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) { for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
const llvm::Type *llvmArgType = ftype->getParamType(j); const llvm::Type *llvmArgType = ftype->getParamType(j);
@@ -256,7 +237,6 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
} }
anyIntArgs |= anyIntArgs |=
(Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false); (Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
anyReferenceArgs |= (dynamic_cast<const ReferenceType *>(type) != NULL);
argTypes.push_back(type); argTypes.push_back(type);
} }
@@ -264,19 +244,6 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
// so that we get symbols for things with no integer types! // so that we get symbols for things with no integer types!
if (i == 0 || anyIntArgs == true) if (i == 0 || anyIntArgs == true)
lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable); lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable);
// If there are any reference types, also make a variant of the
// symbol that has them as const references. This obviously
// doesn't make sense for many builtins, but we'll give the stdlib
// the option to call one if it needs one.
if (anyReferenceArgs == true) {
for (unsigned int j = 0; j < argTypes.size(); ++j) {
if (dynamic_cast<const ReferenceType *>(argTypes[j]) != NULL)
argTypes[j] = argTypes[j]->GetAsConstType();
lCreateSymbol(name + "_refsconst", returnType, argTypes,
ftype, func, symbolTable);
}
}
} }
return true; return true;
@@ -476,62 +443,10 @@ lSetInternalFunctions(llvm::Module *module) {
"__packed_store_active", "__packed_store_active",
"__popcnt_int32", "__popcnt_int32",
"__popcnt_int64", "__popcnt_int64",
"__prefetch_read_1_uniform_bool", "__prefetch_read_uniform_1",
"__prefetch_read_1_uniform_double", "__prefetch_read_uniform_2",
"__prefetch_read_1_uniform_float", "__prefetch_read_uniform_3",
"__prefetch_read_1_uniform_int16", "__prefetch_read_uniform_nt",
"__prefetch_read_1_uniform_int32",
"__prefetch_read_1_uniform_int64",
"__prefetch_read_1_uniform_int8",
"__prefetch_read_1_varying_bool",
"__prefetch_read_1_varying_double",
"__prefetch_read_1_varying_float",
"__prefetch_read_1_varying_int16",
"__prefetch_read_1_varying_int32",
"__prefetch_read_1_varying_int64",
"__prefetch_read_1_varying_int8",
"__prefetch_read_2_uniform_bool",
"__prefetch_read_2_uniform_double",
"__prefetch_read_2_uniform_float",
"__prefetch_read_2_uniform_int16",
"__prefetch_read_2_uniform_int32",
"__prefetch_read_2_uniform_int64",
"__prefetch_read_2_uniform_int8",
"__prefetch_read_2_varying_bool",
"__prefetch_read_2_varying_double",
"__prefetch_read_2_varying_float",
"__prefetch_read_2_varying_int16",
"__prefetch_read_2_varying_int32",
"__prefetch_read_2_varying_int64",
"__prefetch_read_2_varying_int8",
"__prefetch_read_3_uniform_bool",
"__prefetch_read_3_uniform_double",
"__prefetch_read_3_uniform_float",
"__prefetch_read_3_uniform_int16",
"__prefetch_read_3_uniform_int32",
"__prefetch_read_3_uniform_int64",
"__prefetch_read_3_uniform_int8",
"__prefetch_read_3_varying_bool",
"__prefetch_read_3_varying_double",
"__prefetch_read_3_varying_float",
"__prefetch_read_3_varying_int16",
"__prefetch_read_3_varying_int32",
"__prefetch_read_3_varying_int64",
"__prefetch_read_3_varying_int8",
"__prefetch_read_nt_uniform_bool",
"__prefetch_read_nt_uniform_double",
"__prefetch_read_nt_uniform_float",
"__prefetch_read_nt_uniform_int16",
"__prefetch_read_nt_uniform_int32",
"__prefetch_read_nt_uniform_int64",
"__prefetch_read_nt_uniform_int8",
"__prefetch_read_nt_varying_bool",
"__prefetch_read_nt_varying_double",
"__prefetch_read_nt_varying_float",
"__prefetch_read_nt_varying_int16",
"__prefetch_read_nt_varying_int32",
"__prefetch_read_nt_varying_int64",
"__prefetch_read_nt_varying_int8",
"__rcp_uniform_float", "__rcp_uniform_float",
"__rcp_varying_float", "__rcp_varying_float",
"__reduce_add_double", "__reduce_add_double",
@@ -747,7 +662,7 @@ void
DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module, DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module,
bool includeStdlibISPC) { bool includeStdlibISPC) {
// Add the definitions from the compiled builtins-c.c file // Add the definitions from the compiled builtins-c.c file
if (g->target.is32bit) { if (g->target.is32Bit) {
extern unsigned char builtins_bitcode_c_32[]; extern unsigned char builtins_bitcode_c_32[];
extern int builtins_bitcode_c_32_length; extern int builtins_bitcode_c_32_length;
AddBitcodeToModule(builtins_bitcode_c_32, builtins_bitcode_c_32_length, AddBitcodeToModule(builtins_bitcode_c_32, builtins_bitcode_c_32_length,

View File

@@ -822,40 +822,6 @@ define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
} }
') ')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; prefetch definitions
; prefetch has a new parameter in LLVM3.0, to distinguish between instruction
; and data caches--the declaration is now:
; declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality,
; i32 %cachetype) (cachetype 1 == data cache)
; however, the version below seems to still work...
declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality)
define(`prefetch_read', `
define void @__prefetch_read_1_$1($2 *) alwaysinline {
%ptr8 = bitcast $2 * %0 to i8 *
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 3)
ret void
}
define void @__prefetch_read_2_$1($2 *) alwaysinline {
%ptr8 = bitcast $2 * %0 to i8 *
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 2)
ret void
}
define void @__prefetch_read_3_$1($2 *) alwaysinline {
%ptr8 = bitcast $2 * %0 to i8 *
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 1)
ret void
}
define void @__prefetch_read_nt_$1($2 *) alwaysinline {
%ptr8 = bitcast $2 * %0 to i8 *
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 0)
ret void
}
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define(`stdlib_core', ` define(`stdlib_core', `
@@ -916,15 +882,25 @@ declare void @__pseudo_masked_store_64(<$1 x i64> * nocapture, <$1 x i64>, <$1 x
; converts them to native gather functions or converts them to vector ; converts them to native gather functions or converts them to vector
; loads, if equivalent. ; loads, if equivalent.
declare <$1 x i8> @__pseudo_gather_8([$1 x i8 *], <$1 x i32>) nounwind readonly declare <$1 x i8> @__pseudo_gather32_8(<$1 x i32>, <$1 x i32>) nounwind readonly
declare <$1 x i16> @__pseudo_gather_16([$1 x i8 *], <$1 x i32>) nounwind readonly declare <$1 x i16> @__pseudo_gather32_16(<$1 x i32>, <$1 x i32>) nounwind readonly
declare <$1 x i32> @__pseudo_gather_32([$1 x i8 *], <$1 x i32>) nounwind readonly declare <$1 x i32> @__pseudo_gather32_32(<$1 x i32>, <$1 x i32>) nounwind readonly
declare <$1 x i64> @__pseudo_gather_64([$1 x i8 *], <$1 x i32>) nounwind readonly declare <$1 x i64> @__pseudo_gather32_64(<$1 x i32>, <$1 x i32>) nounwind readonly
declare <$1 x i8> @__pseudo_gather_base_offsets_8(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly declare <$1 x i8> @__pseudo_gather64_8(<$1 x i64>, <$1 x i32>) nounwind readonly
declare <$1 x i16> @__pseudo_gather_base_offsets_16(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly declare <$1 x i16> @__pseudo_gather64_16(<$1 x i64>, <$1 x i32>) nounwind readonly
declare <$1 x i32> @__pseudo_gather_base_offsets_32(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly declare <$1 x i32> @__pseudo_gather64_32(<$1 x i64>, <$1 x i32>) nounwind readonly
declare <$1 x i64> @__pseudo_gather_base_offsets_64(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly declare <$1 x i64> @__pseudo_gather64_64(<$1 x i64>, <$1 x i32>) nounwind readonly
declare <$1 x i8> @__pseudo_gather_base_offsets32_8(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly
declare <$1 x i16> @__pseudo_gather_base_offsets32_16(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly
declare <$1 x i32> @__pseudo_gather_base_offsets32_32(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly
declare <$1 x i64> @__pseudo_gather_base_offsets32_64(i8 *, <$1 x i32>, <$1 x i32>) nounwind readonly
declare <$1 x i8> @__pseudo_gather_base_offsets64_8(i8 *, <$1 x i64>, <$1 x i32>) nounwind readonly
declare <$1 x i16> @__pseudo_gather_base_offsets64_16(i8 *, <$1 x i64>, <$1 x i32>) nounwind readonly
declare <$1 x i32> @__pseudo_gather_base_offsets64_32(i8 *, <$1 x i64>, <$1 x i32>) nounwind readonly
declare <$1 x i64> @__pseudo_gather_base_offsets64_64(i8 *, <$1 x i64>, <$1 x i32>) nounwind readonly
; Similarly to the pseudo-gathers defined above, we also declare undefined ; Similarly to the pseudo-gathers defined above, we also declare undefined
; pseudo-scatter instructions with signatures: ; pseudo-scatter instructions with signatures:
@@ -949,18 +925,32 @@ declare <$1 x i64> @__pseudo_gather_base_offsets_64(i8 *, <$1 x i32>, <$1 x i32>
; And the GSImprovementsPass in turn converts these to actual native ; And the GSImprovementsPass in turn converts these to actual native
; scatters or masked stores. ; scatters or masked stores.
declare void @__pseudo_scatter_8([$1 x i8 *], <$1 x i8>, <$1 x i32>) nounwind declare void @__pseudo_scatter32_8(<$1 x i32>, <$1 x i8>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_16([$1 x i8 *], <$1 x i16>, <$1 x i32>) nounwind declare void @__pseudo_scatter32_16(<$1 x i32>, <$1 x i16>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_32([$1 x i8 *], <$1 x i32>, <$1 x i32>) nounwind declare void @__pseudo_scatter32_32(<$1 x i32>, <$1 x i32>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_64([$1 x i8 *], <$1 x i64>, <$1 x i32>) nounwind declare void @__pseudo_scatter32_64(<$1 x i32>, <$1 x i64>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_base_offsets_8(i8 * nocapture, <$1 x i32>, declare void @__pseudo_scatter64_8(<$1 x i64>, <$1 x i8>, <$1 x i32>) nounwind
declare void @__pseudo_scatter64_16(<$1 x i64>, <$1 x i16>, <$1 x i32>) nounwind
declare void @__pseudo_scatter64_32(<$1 x i64>, <$1 x i32>, <$1 x i32>) nounwind
declare void @__pseudo_scatter64_64(<$1 x i64>, <$1 x i64>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_base_offsets32_8(i8 * nocapture, <$1 x i32>,
<$1 x i8>, <$1 x i32>) nounwind <$1 x i8>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_base_offsets_16(i8 * nocapture, <$1 x i32>, declare void @__pseudo_scatter_base_offsets32_16(i8 * nocapture, <$1 x i32>,
<$1 x i16>, <$1 x i32>) nounwind <$1 x i16>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_base_offsets_32(i8 * nocapture, <$1 x i32>, declare void @__pseudo_scatter_base_offsets32_32(i8 * nocapture, <$1 x i32>,
<$1 x i32>, <$1 x i32>) nounwind <$1 x i32>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_base_offsets_64(i8 * nocapture, <$1 x i32>, declare void @__pseudo_scatter_base_offsets32_64(i8 * nocapture, <$1 x i32>,
<$1 x i64>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_base_offsets64_8(i8 * nocapture, <$1 x i64>,
<$1 x i8>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_base_offsets64_16(i8 * nocapture, <$1 x i64>,
<$1 x i16>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_base_offsets64_32(i8 * nocapture, <$1 x i64>,
<$1 x i32>, <$1 x i32>) nounwind
declare void @__pseudo_scatter_base_offsets64_64(i8 * nocapture, <$1 x i64>,
<$1 x i64>, <$1 x i32>) nounwind <$1 x i64>, <$1 x i32>) nounwind
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1634,11 +1624,10 @@ define void
;; versions to be called from stdlib ;; versions to be called from stdlib
define void define void
@__aos_to_soa4_float([0 x float] * noalias %base, i32 %offset, @__aos_to_soa4_float(float * noalias %pf, i32 %offset,
<$1 x float> * noalias %out0, <$1 x float> * noalias %out1, <$1 x float> * noalias %out0, <$1 x float> * noalias %out1,
<$1 x float> * noalias %out2, <$1 x float> * noalias %out3) <$1 x float> * noalias %out2, <$1 x float> * noalias %out3)
nounwind alwaysinline { nounwind alwaysinline {
%pf = bitcast [0 x float] * %base to float *
%p = getelementptr float * %pf, i32 %offset %p = getelementptr float * %pf, i32 %offset
%p0 = bitcast float * %p to <$1 x float> * %p0 = bitcast float * %p to <$1 x float> *
%v0 = load <$1 x float> * %p0, align 4 %v0 = load <$1 x float> * %p0, align 4
@@ -1656,16 +1645,16 @@ define void
define void define void
@__aos_to_soa4_int32([0 x i32] * noalias %base, i32 %offset, @__aos_to_soa4_int32(i32 * noalias %base, i32 %offset,
<$1 x i32> * noalias %out0, <$1 x i32> * noalias %out1, <$1 x i32> * noalias %out0, <$1 x i32> * noalias %out1,
<$1 x i32> * noalias %out2, <$1 x i32> * noalias %out3) <$1 x i32> * noalias %out2, <$1 x i32> * noalias %out3)
nounwind alwaysinline { nounwind alwaysinline {
%fbase = bitcast [0 x i32] * %base to [0 x float] * %fbase = bitcast i32 * %base to float *
%fout0 = bitcast <$1 x i32> * %out0 to <$1 x float> * %fout0 = bitcast <$1 x i32> * %out0 to <$1 x float> *
%fout1 = bitcast <$1 x i32> * %out1 to <$1 x float> * %fout1 = bitcast <$1 x i32> * %out1 to <$1 x float> *
%fout2 = bitcast <$1 x i32> * %out2 to <$1 x float> * %fout2 = bitcast <$1 x i32> * %out2 to <$1 x float> *
%fout3 = bitcast <$1 x i32> * %out3 to <$1 x float> * %fout3 = bitcast <$1 x i32> * %out3 to <$1 x float> *
call void @__aos_to_soa4_float([0 x float] * %fbase, i32 %offset, call void @__aos_to_soa4_float(float * %fbase, i32 %offset,
<$1 x float> * %fout0, <$1 x float> * %fout1, <$1 x float> * %fout2, <$1 x float> * %fout0, <$1 x float> * %fout1, <$1 x float> * %fout2,
<$1 x float> * %fout3) <$1 x float> * %fout3)
ret void ret void
@@ -1674,9 +1663,8 @@ define void
define void define void
@__soa_to_aos4_float(<$1 x float> %v0, <$1 x float> %v1, <$1 x float> %v2, @__soa_to_aos4_float(<$1 x float> %v0, <$1 x float> %v1, <$1 x float> %v2,
<$1 x float> %v3, [0 x float] * noalias %base, <$1 x float> %v3, float * noalias %pf,
i32 %offset) nounwind alwaysinline { i32 %offset) nounwind alwaysinline {
%pf = bitcast [0 x float] * %base to float *
%p = getelementptr float * %pf, i32 %offset %p = getelementptr float * %pf, i32 %offset
%out0 = bitcast float * %p to <$1 x float> * %out0 = bitcast float * %p to <$1 x float> *
%out1 = getelementptr <$1 x float> * %out0, i32 1 %out1 = getelementptr <$1 x float> * %out0, i32 1
@@ -1691,25 +1679,24 @@ define void
define void define void
@__soa_to_aos4_int32(<$1 x i32> %v0, <$1 x i32> %v1, <$1 x i32> %v2, @__soa_to_aos4_int32(<$1 x i32> %v0, <$1 x i32> %v1, <$1 x i32> %v2,
<$1 x i32> %v3, [0 x i32] * noalias %base, <$1 x i32> %v3, i32 * noalias %base,
i32 %offset) nounwind alwaysinline { i32 %offset) nounwind alwaysinline {
%fv0 = bitcast <$1 x i32> %v0 to <$1 x float> %fv0 = bitcast <$1 x i32> %v0 to <$1 x float>
%fv1 = bitcast <$1 x i32> %v1 to <$1 x float> %fv1 = bitcast <$1 x i32> %v1 to <$1 x float>
%fv2 = bitcast <$1 x i32> %v2 to <$1 x float> %fv2 = bitcast <$1 x i32> %v2 to <$1 x float>
%fv3 = bitcast <$1 x i32> %v3 to <$1 x float> %fv3 = bitcast <$1 x i32> %v3 to <$1 x float>
%fbase = bitcast [0 x i32] * %base to [0 x float] * %fbase = bitcast i32 * %base to float *
call void @__soa_to_aos4_float(<$1 x float> %fv0, <$1 x float> %fv1, call void @__soa_to_aos4_float(<$1 x float> %fv0, <$1 x float> %fv1,
<$1 x float> %fv2, <$1 x float> %fv3, [0 x float] * %fbase, <$1 x float> %fv2, <$1 x float> %fv3, float * %fbase,
i32 %offset) i32 %offset)
ret void ret void
} }
define void define void
@__aos_to_soa3_float([0 x float] * noalias %base, i32 %offset, @__aos_to_soa3_float(float * noalias %pf, i32 %offset,
<$1 x float> * %out0, <$1 x float> * %out1, <$1 x float> * %out0, <$1 x float> * %out1,
<$1 x float> * %out2) nounwind alwaysinline { <$1 x float> * %out2) nounwind alwaysinline {
%pf = bitcast [0 x float] * %base to float *
%p = getelementptr float * %pf, i32 %offset %p = getelementptr float * %pf, i32 %offset
%p0 = bitcast float * %p to <$1 x float> * %p0 = bitcast float * %p to <$1 x float> *
%v0 = load <$1 x float> * %p0, align 4 %v0 = load <$1 x float> * %p0, align 4
@@ -1725,14 +1712,14 @@ define void
define void define void
@__aos_to_soa3_int32([0 x i32] * noalias %base, i32 %offset, @__aos_to_soa3_int32(i32 * noalias %base, i32 %offset,
<$1 x i32> * noalias %out0, <$1 x i32> * noalias %out1, <$1 x i32> * noalias %out0, <$1 x i32> * noalias %out1,
<$1 x i32> * noalias %out2) nounwind alwaysinline { <$1 x i32> * noalias %out2) nounwind alwaysinline {
%fbase = bitcast [0 x i32] * %base to [0 x float] * %fbase = bitcast i32 * %base to float *
%fout0 = bitcast <$1 x i32> * %out0 to <$1 x float> * %fout0 = bitcast <$1 x i32> * %out0 to <$1 x float> *
%fout1 = bitcast <$1 x i32> * %out1 to <$1 x float> * %fout1 = bitcast <$1 x i32> * %out1 to <$1 x float> *
%fout2 = bitcast <$1 x i32> * %out2 to <$1 x float> * %fout2 = bitcast <$1 x i32> * %out2 to <$1 x float> *
call void @__aos_to_soa3_float([0 x float] * %fbase, i32 %offset, call void @__aos_to_soa3_float(float * %fbase, i32 %offset,
<$1 x float> * %fout0, <$1 x float> * %fout1, <$1 x float> * %fout2) <$1 x float> * %fout0, <$1 x float> * %fout1, <$1 x float> * %fout2)
ret void ret void
} }
@@ -1740,8 +1727,7 @@ define void
define void define void
@__soa_to_aos3_float(<$1 x float> %v0, <$1 x float> %v1, <$1 x float> %v2, @__soa_to_aos3_float(<$1 x float> %v0, <$1 x float> %v1, <$1 x float> %v2,
[0 x float] * noalias %base, i32 %offset) nounwind alwaysinline { float * noalias %pf, i32 %offset) nounwind alwaysinline {
%pf = bitcast [0 x float] * %base to float *
%p = getelementptr float * %pf, i32 %offset %p = getelementptr float * %pf, i32 %offset
%out0 = bitcast float * %p to <$1 x float> * %out0 = bitcast float * %p to <$1 x float> *
%out1 = getelementptr <$1 x float> * %out0, i32 1 %out1 = getelementptr <$1 x float> * %out0, i32 1
@@ -1755,13 +1741,13 @@ define void
define void define void
@__soa_to_aos3_int32(<$1 x i32> %v0, <$1 x i32> %v1, <$1 x i32> %v2, @__soa_to_aos3_int32(<$1 x i32> %v0, <$1 x i32> %v1, <$1 x i32> %v2,
[0 x i32] * noalias %base, i32 %offset) nounwind alwaysinline { i32 * noalias %base, i32 %offset) nounwind alwaysinline {
%fv0 = bitcast <$1 x i32> %v0 to <$1 x float> %fv0 = bitcast <$1 x i32> %v0 to <$1 x float>
%fv1 = bitcast <$1 x i32> %v1 to <$1 x float> %fv1 = bitcast <$1 x i32> %v1 to <$1 x float>
%fv2 = bitcast <$1 x i32> %v2 to <$1 x float> %fv2 = bitcast <$1 x i32> %v2 to <$1 x float>
%fbase = bitcast [0 x i32] * %base to [0 x float] * %fbase = bitcast i32 * %base to float *
call void @__soa_to_aos3_float(<$1 x float> %fv0, <$1 x float> %fv1, call void @__soa_to_aos3_float(<$1 x float> %fv0, <$1 x float> %fv1,
<$1 x float> %fv2, [0 x float] * %fbase, i32 %offset) <$1 x float> %fv2, float * %fbase, i32 %offset)
ret void ret void
} }
@@ -1769,21 +1755,34 @@ define void
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; prefetching ;; prefetching
prefetch_read(uniform_bool, i1) ; prefetch has a new parameter in LLVM3.0, to distinguish between instruction
prefetch_read(uniform_int8, i8) ; and data caches--the declaration is now:
prefetch_read(uniform_int16, i16) ; declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality,
prefetch_read(uniform_int32, i32) ; i32 %cachetype) (cachetype 1 == data cache)
prefetch_read(uniform_int64, i64) ; however, the version below seems to still work...
prefetch_read(uniform_float, float)
prefetch_read(uniform_double, double) declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality)
define void @__prefetch_read_uniform_1(i8 *) alwaysinline {
call void @llvm.prefetch(i8 * %0, i32 0, i32 3)
ret void
}
define void @__prefetch_read_uniform_2(i8 *) alwaysinline {
call void @llvm.prefetch(i8 * %0, i32 0, i32 2)
ret void
}
define void @__prefetch_read_uniform_3(i8 *) alwaysinline {
call void @llvm.prefetch(i8 * %0, i32 0, i32 1)
ret void
}
define void @__prefetch_read_uniform_nt(i8 *) alwaysinline {
call void @llvm.prefetch(i8 * %0, i32 0, i32 0)
ret void
}
prefetch_read(varying_bool, <$1 x i32>)
prefetch_read(varying_int8, <$1 x i8>)
prefetch_read(varying_int16, <$1 x i16>)
prefetch_read(varying_int32, <$1 x i32>)
prefetch_read(varying_int64, <$1 x i64>)
prefetch_read(varying_float, <$1 x float>)
prefetch_read(varying_double, <$1 x double>)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; assert ;; assert
@@ -2354,11 +2353,10 @@ define void @__masked_store_blend_16(<16 x i16>* nocapture, <16 x i16>,
define(`packed_load_and_store', ` define(`packed_load_and_store', `
define i32 @__packed_load_active([0 x i32] *, i32 %start_offset, <$1 x i32> * %val_ptr, define i32 @__packed_load_active(i32 * %baseptr, i32 %start_offset, <$1 x i32> * %val_ptr,
<$1 x i32> %full_mask) nounwind alwaysinline { <$1 x i32> %full_mask) nounwind alwaysinline {
entry: entry:
%mask = call i32 @__movmsk(<$1 x i32> %full_mask) %mask = call i32 @__movmsk(<$1 x i32> %full_mask)
%baseptr = bitcast [0 x i32] * %0 to i32 *
%startptr = getelementptr i32 * %baseptr, i32 %start_offset %startptr = getelementptr i32 * %baseptr, i32 %start_offset
%mask_known = call i1 @__is_compile_time_constant_mask(<$1 x i32> %full_mask) %mask_known = call i1 @__is_compile_time_constant_mask(<$1 x i32> %full_mask)
br i1 %mask_known, label %known_mask, label %unknown_mask br i1 %mask_known, label %known_mask, label %unknown_mask
@@ -2410,11 +2408,10 @@ done:
ret i32 %nextoffset ret i32 %nextoffset
} }
define i32 @__packed_store_active([0 x i32] *, i32 %start_offset, <$1 x i32> %vals, define i32 @__packed_store_active(i32 * %baseptr, i32 %start_offset, <$1 x i32> %vals,
<$1 x i32> %full_mask) nounwind alwaysinline { <$1 x i32> %full_mask) nounwind alwaysinline {
entry: entry:
%mask = call i32 @__movmsk(<$1 x i32> %full_mask) %mask = call i32 @__movmsk(<$1 x i32> %full_mask)
%baseptr = bitcast [0 x i32] * %0 to i32 *
%startptr = getelementptr i32 * %baseptr, i32 %start_offset %startptr = getelementptr i32 * %baseptr, i32 %start_offset
%mask_known = call i1 @__is_compile_time_constant_mask(<$1 x i32> %full_mask) %mask_known = call i1 @__is_compile_time_constant_mask(<$1 x i32> %full_mask)
br i1 %mask_known, label %known_mask, label %unknown_mask br i1 %mask_known, label %known_mask, label %unknown_mask
@@ -2686,7 +2683,7 @@ pl_done:
define(`gen_gather', ` define(`gen_gather', `
;; Define the utility function to do the gather operation for a single element ;; Define the utility function to do the gather operation for a single element
;; of the type ;; of the type
define <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x $2> %ret, define <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x $2> %ret,
i32 %lane) nounwind readonly alwaysinline { i32 %lane) nounwind readonly alwaysinline {
; compute address for this one from the base ; compute address for this one from the base
%offset32 = extractelement <$1 x i32> %offsets, i32 %lane %offset32 = extractelement <$1 x i32> %offsets, i32 %lane
@@ -2699,8 +2696,21 @@ define <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x $2> %ret
ret <$1 x $2> %updatedret ret <$1 x $2> %updatedret
} }
define <$1 x $2> @__gather_elt64_$2(i8 * %ptr, <$1 x i64> %offsets, <$1 x $2> %ret,
i32 %lane) nounwind readonly alwaysinline {
; compute address for this one from the base
%offset32 = extractelement <$1 x i64> %offsets, i32 %lane
%ptroffset = getelementptr i8 * %ptr, i64 %offset32
%ptrcast = bitcast i8 * %ptroffset to $2 *
define <$1 x $2> @__gather_base_offsets_$2(i8 * %ptr, <$1 x i32> %offsets, ; load value and insert into returned value
%val = load $2 *%ptrcast
%updatedret = insertelement <$1 x $2> %ret, $2 %val, i32 %lane
ret <$1 x $2> %updatedret
}
define <$1 x $2> @__gather_base_offsets32_$2(i8 * %ptr, <$1 x i32> %offsets,
<$1 x i32> %vecmask) nounwind readonly alwaysinline { <$1 x i32> %vecmask) nounwind readonly alwaysinline {
; We can be clever and avoid the per-lane stuff for gathers if we are willing ; We can be clever and avoid the per-lane stuff for gathers if we are willing
; to require that the 0th element of the array being gathered from is always ; to require that the 0th element of the array being gathered from is always
@@ -2713,14 +2723,68 @@ define <$1 x $2> @__gather_base_offsets_$2(i8 * %ptr, <$1 x i32> %offsets,
<$1 x i32> %vecmask) <$1 x i32> %vecmask)
%newOffsets = load <$1 x i32> * %offsetsPtr %newOffsets = load <$1 x i32> * %offsetsPtr
%ret0 = call <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %newOffsets, %ret0 = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %newOffsets,
<$1 x $2> undef, i32 0) <$1 x $2> undef, i32 0)
forloop(lane, 1, eval($1-1), forloop(lane, 1, eval($1-1),
`patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt_$2(i8 * %ptr, `patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr,
<$1 x i32> %newOffsets, <$1 x $2> %retPREV, i32 LANE) <$1 x i32> %newOffsets, <$1 x $2> %retPREV, i32 LANE)
', `LANE', lane), `PREV', eval(lane-1))') ', `LANE', lane), `PREV', eval(lane-1))')
ret <$1 x $2> %ret`'eval($1-1) ret <$1 x $2> %ret`'eval($1-1)
} }
define <$1 x $2> @__gather_base_offsets64_$2(i8 * %ptr, <$1 x i64> %offsets,
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
; We can be clever and avoid the per-lane stuff for gathers if we are willing
; to require that the 0th element of the array being gathered from is always
; legal to read from (and we do indeed require that, given the benefits!)
;
; Set the offset to zero for lanes that are off
%offsetsPtr = alloca <$1 x i64>
store <$1 x i64> zeroinitializer, <$1 x i64> * %offsetsPtr
call void @__masked_store_blend_64(<$1 x i64> * %offsetsPtr, <$1 x i64> %offsets,
<$1 x i32> %vecmask)
%newOffsets = load <$1 x i64> * %offsetsPtr
%ret0 = call <$1 x $2> @__gather_elt64_$2(i8 * %ptr, <$1 x i64> %newOffsets,
<$1 x $2> undef, i32 0)
forloop(lane, 1, eval($1-1),
`patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt64_$2(i8 * %ptr,
<$1 x i64> %newOffsets, <$1 x $2> %retPREV, i32 LANE)
', `LANE', lane), `PREV', eval(lane-1))')
ret <$1 x $2> %ret`'eval($1-1)
}
; fully general 32-bit gather, takes array of pointers encoded as vector of i32s
define <$1 x $2> @__gather32_$2(<$1 x i32> %ptrs,
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
%ret_ptr = alloca <$1 x $2>
per_lane($1, <$1 x i32> %vecmask, `
%iptr_ID = extractelement <$1 x i32> %ptrs, i32 LANE
%ptr_ID = inttoptr i32 %iptr_ID to $2 *
%val_ID = load $2 * %ptr_ID
%store_ptr_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
store $2 %val_ID, $2 * %store_ptr_ID
')
%ret = load <$1 x $2> * %ret_ptr
ret <$1 x $2> %ret
}
; fully general 64-bit gather, takes array of pointers encoded as vector of i32s
define <$1 x $2> @__gather64_$2(<$1 x i64> %ptrs,
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
%ret_ptr = alloca <$1 x $2>
per_lane($1, <$1 x i32> %vecmask, `
%iptr_ID = extractelement <$1 x i64> %ptrs, i32 LANE
%ptr_ID = inttoptr i64 %iptr_ID to $2 *
%val_ID = load $2 * %ptr_ID
%store_ptr_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
store $2 %val_ID, $2 * %store_ptr_ID
')
%ret = load <$1 x $2> * %ret_ptr
ret <$1 x $2> %ret
}
' '
) )
@@ -2735,7 +2799,7 @@ define <$1 x $2> @__gather_base_offsets_$2(i8 * %ptr, <$1 x i32> %offsets,
define(`gen_scatter', ` define(`gen_scatter', `
;; Define the function that descripes the work to do to scatter a single ;; Define the function that descripes the work to do to scatter a single
;; value ;; value
define void @__scatter_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values, define void @__scatter_elt32_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values,
i32 %lane) nounwind alwaysinline { i32 %lane) nounwind alwaysinline {
%offset32 = extractelement <$1 x i32> %offsets, i32 %lane %offset32 = extractelement <$1 x i32> %offsets, i32 %lane
%offset64 = zext i32 %offset32 to i64 %offset64 = zext i32 %offset32 to i64
@@ -2746,13 +2810,57 @@ define void @__scatter_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values
ret void ret void
} }
define void @__scatter_base_offsets_$2(i8* %base, <$1 x i32> %offsets, <$1 x $2> %values, define void @__scatter_elt64_$2(i64 %ptr64, <$1 x i64> %offsets, <$1 x $2> %values,
i32 %lane) nounwind alwaysinline {
%offset64 = extractelement <$1 x i64> %offsets, i32 %lane
%ptrdelta = add i64 %ptr64, %offset64
%ptr = inttoptr i64 %ptrdelta to $2 *
%storeval = extractelement <$1 x $2> %values, i32 %lane
store $2 %storeval, $2 * %ptr
ret void
}
define void @__scatter_base_offsets32_$2(i8* %base, <$1 x i32> %offsets, <$1 x $2> %values,
<$1 x i32> %mask) nounwind alwaysinline { <$1 x i32> %mask) nounwind alwaysinline {
;; And use the `per_lane' macro to do all of the per-lane work for scatter... ;; And use the `per_lane' macro to do all of the per-lane work for scatter...
%ptr64 = ptrtoint i8 * %base to i64 %ptr64 = ptrtoint i8 * %base to i64
per_lane($1, <$1 x i32> %mask, ` per_lane($1, <$1 x i32> %mask, `
call void @__scatter_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values, i32 LANE)') call void @__scatter_elt32_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values, i32 LANE)')
ret void ret void
} }
define void @__scatter_base_offsets64_$2(i8* %base, <$1 x i64> %offsets, <$1 x $2> %values,
<$1 x i32> %mask) nounwind alwaysinline {
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
%ptr64 = ptrtoint i8 * %base to i64
per_lane($1, <$1 x i32> %mask, `
call void @__scatter_elt64_$2(i64 %ptr64, <$1 x i64> %offsets, <$1 x $2> %values, i32 LANE)')
ret void
}
; fully general 32-bit scatter, takes array of pointers encoded as vector of i32s
define void @__scatter32_$2(<$1 x i32> %ptrs, <$1 x $2> %values,
<$1 x i32> %mask) nounwind alwaysinline {
per_lane($1, <$1 x i32> %mask, `
%iptr_ID = extractelement <$1 x i32> %ptrs, i32 LANE
%ptr_ID = inttoptr i32 %iptr_ID to $2 *
%val_ID = extractelement <$1 x $2> %values, i32 LANE
store $2 %val_ID, $2 * %ptr_ID
')
ret void
}
; fully general 64-bit scatter, takes array of pointers encoded as vector of i64s
define void @__scatter64_$2(<$1 x i64> %ptrs, <$1 x $2> %values,
<$1 x i32> %mask) nounwind alwaysinline {
per_lane($1, <$1 x i32> %mask, `
%iptr_ID = extractelement <$1 x i64> %ptrs, i32 LANE
%ptr_ID = inttoptr i64 %iptr_ID to $2 *
%val_ID = extractelement <$1 x $2> %values, i32 LANE
store $2 %val_ID, $2 * %ptr_ID
')
ret void
}
' '
) )

1061
ctx.cpp

File diff suppressed because it is too large Load Diff

83
ctx.h
View File

@@ -311,20 +311,13 @@ public:
/** Given a scalar value, return a vector of the same type (or an /** Given a scalar value, return a vector of the same type (or an
array, for pointer types). */ array, for pointer types). */
llvm::Value *SmearScalar(llvm::Value *value, const char *name = NULL); llvm::Value *SmearUniform(llvm::Value *value, const char *name = NULL);
llvm::Value *BitCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, llvm::Value *BitCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
const char *name = NULL); const char *name = NULL);
llvm::Value *PtrToIntInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, llvm::Value *PtrToIntInst(llvm::Value *value, const char *name = NULL);
const char *name = NULL);
llvm::Value *IntToPtrInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, llvm::Value *IntToPtrInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
const char *name = NULL); const char *name = NULL);
/** Given a value of some array type, return the corresponding value of
vector type. */
llvm::Value *ArrayToVectorInst(llvm::Value *value);
/** Given a value of some vector type, return the corresponding value of
array type. */
llvm::Value *VectorToArrayInst(llvm::Value *value);
llvm::Instruction *TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, llvm::Instruction *TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
const char *name = NULL); const char *name = NULL);
@@ -337,26 +330,37 @@ public:
llvm::Instruction *ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, llvm::Instruction *ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
const char *name = NULL); const char *name = NULL);
/** This GEP method is a generalization of the standard one in LLVM; it /** These GEP methods are generalizations of the standard ones in LLVM;
supports both uniform and varying basePtr values (an array of they support both uniform and varying basePtr values as well as
pointers) as well as uniform and varying index values (arrays of uniform and varying index values (arrays of indices). Varying base
indices). */ pointers are expected to come in as vectors of i32/i64 (depending
on the target), since LLVM doesn't currently support vectors of
pointers. The underlying type of the base pointer must be provided
via the ptrType parameter */
llvm::Value *GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index,
const Type *ptrType, const char *name = NULL);
llvm::Value *GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0, llvm::Value *GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0,
llvm::Value *index1, const char *name = NULL); llvm::Value *index1, const Type *ptrType,
/** This is a convenience method to generate a GEP instruction with
indices with values with known constant values as the ispc program
is being compiled. */
llvm::Value *GetElementPtrInst(llvm::Value *basePtr, int v0, int v1,
const char *name = NULL); const char *name = NULL);
/** This method returns a new pointer that represents offsetting the
given base pointer to point at the given element number of the
structure type that the base pointer points to. (The provided
pointer must be a pointer to a structure type. The ptrType gives
the type of the pointer, though it may be NULL if the base pointer
is uniform. */
llvm::Value *AddElementOffset(llvm::Value *basePtr, int elementNum,
const Type *ptrType, const char *name = NULL);
/** Load from the memory location(s) given by lvalue, using the given /** Load from the memory location(s) given by lvalue, using the given
mask. The lvalue may be varying, in which case this corresponds to mask. The lvalue may be varying, in which case this corresponds to
a gather from the multiple memory locations given by the array of a gather from the multiple memory locations given by the array of
pointer values given by the lvalue. If the lvalue is not varying, pointer values given by the lvalue. If the lvalue is not varying,
then both the mask pointer and the type pointer may be NULL. */ then both the mask pointer and the type pointer may be NULL. */
llvm::Value *LoadInst(llvm::Value *lvalue, llvm::Value *mask, llvm::Value *LoadInst(llvm::Value *ptr, llvm::Value *mask,
const Type *type, const char *name = NULL); const Type *ptrType, const char *name = NULL);
llvm::Value *LoadInst(llvm::Value *ptr, const char *name = NULL);
/** Emits an alloca instruction to allocate stack storage for the given /** Emits an alloca instruction to allocate stack storage for the given
type. If a non-zero alignment is specified, the object is also type. If a non-zero alignment is specified, the object is also
@@ -370,16 +374,14 @@ public:
/** Standard store instruction; for this variant, the lvalue must be a /** Standard store instruction; for this variant, the lvalue must be a
single pointer, not a varying lvalue. */ single pointer, not a varying lvalue. */
void StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, void StoreInst(llvm::Value *value, llvm::Value *ptr);
const char *name = NULL);
/** In this variant of StoreInst(), the lvalue may be varying. If so, /** In this variant of StoreInst(), the lvalue may be varying. If so,
this corresponds to a scatter. Whether the lvalue is uniform of this corresponds to a scatter. Whether the lvalue is uniform of
varying, the given storeMask is used to mask the stores so that varying, the given storeMask is used to mask the stores so that
they only execute for the active program instances. */ they only execute for the active program instances. */
void StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, void StoreInst(llvm::Value *value, llvm::Value *ptr,
llvm::Value *storeMask, const Type *rvalueType, llvm::Value *storeMask, const Type *ptrType);
const char *name = NULL);
void BranchInst(llvm::BasicBlock *block); void BranchInst(llvm::BasicBlock *block);
void BranchInst(llvm::BasicBlock *trueBlock, llvm::BasicBlock *falseBlock, void BranchInst(llvm::BasicBlock *trueBlock, llvm::BasicBlock *falseBlock,
@@ -401,20 +403,22 @@ public:
llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,
llvm::Value *val1, const char *name = NULL); llvm::Value *val1, const char *name = NULL);
/** Emits IR to do a function call with the given arguments. The /** Emits IR to do a function call with the given arguments. If the
function return type must be provided in returnType. */ function type is a varying function pointer type, its full type
llvm::Value *CallInst(llvm::Value *func, const Type *returnType, must be provided in funcType. funcType can be NULL if func is a
uniform function pointer. */
llvm::Value *CallInst(llvm::Value *func, const FunctionType *funcType,
const std::vector<llvm::Value *> &args, const std::vector<llvm::Value *> &args,
const char *name = NULL); const char *name = NULL);
/** This is a convenience method that issues a call instruction to a /** This is a convenience method that issues a call instruction to a
function that takes just a single argument. */ function that takes just a single argument. */
llvm::Value *CallInst(llvm::Value *func, const Type *returnType, llvm::Value *CallInst(llvm::Value *func, const FunctionType *funcType,
llvm::Value *arg, const char *name = NULL); llvm::Value *arg, const char *name = NULL);
/** This is a convenience method that issues a call instruction to a /** This is a convenience method that issues a call instruction to a
function that takes two arguments. */ function that takes two arguments. */
llvm::Value *CallInst(llvm::Value *func, const Type *returnType, llvm::Value *CallInst(llvm::Value *func, const FunctionType *funcType,
llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg0, llvm::Value *arg1,
const char *name = NULL); const char *name = NULL);
@@ -530,15 +534,18 @@ private:
void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target); void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target);
llvm::Value *emitGatherCallback(llvm::Value *lvalue, llvm::Value *retPtr); llvm::Value *emitGatherCallback(llvm::Value *lvalue, llvm::Value *retPtr);
llvm::Value *applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
const Type *ptrType);
void restoreMaskGivenReturns(llvm::Value *oldMask); void restoreMaskGivenReturns(llvm::Value *oldMask);
void scatter(llvm::Value *rvalue, llvm::Value *lvalue, void scatter(llvm::Value *value, llvm::Value *ptr, const Type *ptrType,
llvm::Value *maskPtr, const Type *rvalueType); llvm::Value *mask);
llvm::Value *gather(llvm::Value *lvalue, llvm::Value *mask, void maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType,
const Type *type, const char *name); llvm::Value *mask);
void maskedStore(llvm::Value *rvalue, llvm::Value *lvalue, llvm::Value *gather(llvm::Value *ptr, const Type *ptrType, llvm::Value *mask,
const Type *rvalueType, llvm::Value *maskPtr); const char *name);
llvm::Value *addVaryingOffsetsIfNeeded(llvm::Value *value, const Type *type); llvm::Value *addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType);
}; };
#endif // ISPC_CTX_H #endif // ISPC_CTX_H

155
decl.cpp
View File

@@ -46,12 +46,14 @@
#include <stdio.h> #include <stdio.h>
#include <llvm/Module.h> #include <llvm/Module.h>
/** Given a Type and a set of type qualifiers, apply the type qualifiers to
the type, returning the type that is the result.
*/
static const Type * static const Type *
lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) { lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) {
if (type == NULL) if (type == NULL)
return NULL; return NULL;
// Account for 'unsigned' and 'const' qualifiers in the type
if ((typeQualifiers & TYPEQUAL_UNSIGNED) != 0) { if ((typeQualifiers & TYPEQUAL_UNSIGNED) != 0) {
const Type *unsignedType = type->GetAsUnsignedType(); const Type *unsignedType = type->GetAsUnsignedType();
if (unsignedType != NULL) if (unsignedType != NULL)
@@ -60,11 +62,10 @@ lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) {
Error(pos, "\"unsigned\" qualifier is illegal with \"%s\" type.", Error(pos, "\"unsigned\" qualifier is illegal with \"%s\" type.",
type->GetString().c_str()); type->GetString().c_str());
} }
if ((typeQualifiers & TYPEQUAL_CONST) != 0) if ((typeQualifiers & TYPEQUAL_CONST) != 0)
type = type->GetAsConstType(); type = type->GetAsConstType();
// if uniform/varying is specified explicitly, then go with that
if (dynamic_cast<const FunctionType *>(type) == NULL) {
if ((typeQualifiers & TYPEQUAL_UNIFORM) != 0) if ((typeQualifiers & TYPEQUAL_UNIFORM) != 0)
type = type->GetAsUniformType(); type = type->GetAsUniformType();
else if ((typeQualifiers & TYPEQUAL_VARYING) != 0) else if ((typeQualifiers & TYPEQUAL_VARYING) != 0)
@@ -77,7 +78,6 @@ lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) {
else else
type = type->GetAsVaryingType(); type = type->GetAsVaryingType();
} }
}
return type; return type;
} }
@@ -127,7 +127,6 @@ DeclSpecs::Print() const {
if (typeQualifiers & TYPEQUAL_UNIFORM) printf("uniform "); if (typeQualifiers & TYPEQUAL_UNIFORM) printf("uniform ");
if (typeQualifiers & TYPEQUAL_VARYING) printf("varying "); if (typeQualifiers & TYPEQUAL_VARYING) printf("varying ");
if (typeQualifiers & TYPEQUAL_TASK) printf("task "); if (typeQualifiers & TYPEQUAL_TASK) printf("task ");
if (typeQualifiers & TYPEQUAL_REFERENCE) printf("reference ");
if (typeQualifiers & TYPEQUAL_UNSIGNED) printf("unsigned "); if (typeQualifiers & TYPEQUAL_UNSIGNED) printf("unsigned ");
printf("%s", baseType->GetString().c_str()); printf("%s", baseType->GetString().c_str());
@@ -161,8 +160,10 @@ Declarator::InitFromDeclSpecs(DeclSpecs *ds) {
Symbol * Symbol *
Declarator::GetSymbol() { Declarator::GetSymbol() const {
Declarator *d = this; // The symbol lives at the last child in the chain, so walk down there
// and return the one there.
const Declarator *d = this;
while (d->child != NULL) while (d->child != NULL)
d = d->child; d = d->child;
return d->sym; return d->sym;
@@ -171,7 +172,12 @@ Declarator::GetSymbol() {
void void
Declarator::Print() const { Declarator::Print() const {
Symbol *sym = GetSymbol();
if (sym != NULL)
printf("%s", sym->name.c_str()); printf("%s", sym->name.c_str());
else
printf("(null symbol)");
if (initExpr != NULL) { if (initExpr != NULL) {
printf(" = ("); printf(" = (");
initExpr->Print(); initExpr->Print();
@@ -181,28 +187,39 @@ Declarator::Print() const {
} }
void Symbol *
Declarator::GetFunctionInfo(DeclSpecs *ds, Symbol **funSym, Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
std::vector<Symbol *> *funArgs) {
// Get the symbol for the function from the symbol table. (It should
// already have been added to the symbol table by AddGlobal() by the
// time we get here.)
const FunctionType *type = const FunctionType *type =
dynamic_cast<const FunctionType *>(GetType(ds)); dynamic_cast<const FunctionType *>(GetType(ds));
if (type == NULL) if (type == NULL)
return; return NULL;
Symbol *declSym = GetSymbol(); Symbol *declSym = GetSymbol();
assert(declSym != NULL); assert(declSym != NULL);
*funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type);
if (*funSym != NULL)
// May be NULL due to error earlier in compilation
(*funSym)->pos = pos;
for (unsigned int i = 0; i < functionArgs.size(); ++i) { // Get the symbol for the function from the symbol table. (It should
Declaration *pdecl = functionArgs[i]; // already have been added to the symbol table by AddGlobal() by the
// time we get here.)
Symbol *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type);
if (funSym != NULL)
// May be NULL due to error earlier in compilation
funSym->pos = pos;
// Walk down to the declarator for the function. (We have to get past
// the stuff that specifies the function's return type before we get to
// the function's declarator.)
Declarator *d = this;
while (d != NULL && d->kind != DK_FUNCTION)
d = d->child;
assert(d != NULL);
for (unsigned int i = 0; i < d->functionParams.size(); ++i) {
Declaration *pdecl = d->functionParams[i];
assert(pdecl->declarators.size() == 1); assert(pdecl->declarators.size() == 1);
funArgs->push_back(pdecl->declarators[0]->GetSymbol()); funArgs->push_back(pdecl->declarators[0]->GetSymbol());
} }
return funSym;
} }
@@ -211,7 +228,6 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
bool hasUniformQual = ((typeQualifiers & TYPEQUAL_UNIFORM) != 0); bool hasUniformQual = ((typeQualifiers & TYPEQUAL_UNIFORM) != 0);
bool hasVaryingQual = ((typeQualifiers & TYPEQUAL_VARYING) != 0); bool hasVaryingQual = ((typeQualifiers & TYPEQUAL_VARYING) != 0);
bool isTask = ((typeQualifiers & TYPEQUAL_TASK) != 0); bool isTask = ((typeQualifiers & TYPEQUAL_TASK) != 0);
bool isReference = ((typeQualifiers & TYPEQUAL_REFERENCE) != 0);
bool isConst = ((typeQualifiers & TYPEQUAL_CONST) != 0); bool isConst = ((typeQualifiers & TYPEQUAL_CONST) != 0);
if (hasUniformQual && hasVaryingQual) { if (hasUniformQual && hasVaryingQual) {
@@ -224,13 +240,36 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
const Type *type = base; const Type *type = base;
switch (kind) { switch (kind) {
case DK_BASE: case DK_BASE:
// All of the type qualifiers should be in the DeclSpecs for the
// base declarator
assert(typeQualifiers == 0); assert(typeQualifiers == 0);
assert(child == NULL); assert(child == NULL);
return type; return type;
case DK_POINTER: case DK_POINTER:
type = new PointerType(type, hasUniformQual, isConst); type = new PointerType(type, hasUniformQual, isConst);
if (child) if (child != NULL)
return child->GetType(type, ds);
else
return type;
break;
case DK_REFERENCE:
if (hasUniformQual)
Error(pos, "\"uniform\" qualifier is illegal to apply to references.");
if (hasVaryingQual)
Error(pos, "\"varying\" qualifier is illegal to apply to references.");
if (isConst)
Error(pos, "\"const\" qualifier is to illegal apply to references.");
// The parser should disallow this already, but double check.
if (dynamic_cast<const ReferenceType *>(type) != NULL) {
Error(pos, "References to references are illegal.");
return NULL;
}
type = new ReferenceType(type);
if (child != NULL)
return child->GetType(type, ds); return child->GetType(type, ds);
else else
return type; return type;
@@ -250,10 +289,12 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
std::vector<ConstExpr *> argDefaults; std::vector<ConstExpr *> argDefaults;
std::vector<SourcePos> argPos; std::vector<SourcePos> argPos;
// Loop over the function arguments and get names and types for // Loop over the function arguments and store the names, types,
// each one in the args and argNames arrays // default values (if any), and source file positions each one in
for (unsigned int i = 0; i < functionArgs.size(); ++i) { // the corresponding vector.
Declaration *d = functionArgs[i]; for (unsigned int i = 0; i < functionParams.size(); ++i) {
Declaration *d = functionParams[i];
char buf[32]; char buf[32];
Symbol *sym; Symbol *sym;
if (d->declarators.size() == 0) { if (d->declarators.size() == 0) {
@@ -266,6 +307,8 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
else { else {
sym = d->declarators[0]->GetSymbol(); sym = d->declarators[0]->GetSymbol();
if (sym == NULL) { if (sym == NULL) {
// Handle more complex anonymous declarations like
// float (float **).
sprintf(buf, "__anon_parameter_%d", i); sprintf(buf, "__anon_parameter_%d", i);
sym = new Symbol(buf, pos); sym = new Symbol(buf, pos);
sym->type = d->declarators[0]->GetType(d->declSpecs); sym->type = d->declarators[0]->GetType(d->declSpecs);
@@ -274,9 +317,15 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type); const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type);
if (at != NULL) { if (at != NULL) {
// Arrays are passed by reference, so convert array // As in C, arrays are passed to functions as pointers to
// parameters to be references here. // their element type. We'll just immediately make this
sym->type = new ReferenceType(sym->type, sym->type->IsConstType()); // change now. (One shortcoming of losing the fact that
// the it was originally an array is that any warnings or
// errors later issued that print the function type will
// report this differently than it was originally declared
// in the function, but it's not clear that this is a
// significant problem.)
sym->type = PointerType::GetUniform(at->GetElementType());
// Make sure there are no unsized arrays (other than the // Make sure there are no unsized arrays (other than the
// first dimension) in function parameter lists. // first dimension) in function parameter lists.
@@ -296,6 +345,8 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
ConstExpr *init = NULL; ConstExpr *init = NULL;
if (d->declarators.size()) { if (d->declarators.size()) {
// Try to find an initializer expression; if there is one,
// it lives down to the base declarator.
Declarator *decl = d->declarators[0]; Declarator *decl = d->declarators[0];
while (decl->child != NULL) { while (decl->child != NULL) {
assert(decl->initExpr == NULL); assert(decl->initExpr == NULL);
@@ -314,11 +365,6 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
argDefaults.push_back(init); argDefaults.push_back(init);
} }
if (isReference) {
Error(pos, "Function return types can't be reference types.");
return NULL;
}
const Type *returnType = type; const Type *returnType = type;
if (returnType == NULL) { if (returnType == NULL) {
Error(pos, "No return type provided in function declaration."); Error(pos, "No return type provided in function declaration.");
@@ -328,6 +374,23 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
bool isExported = ds && (ds->storageClass == SC_EXPORT); bool isExported = ds && (ds->storageClass == SC_EXPORT);
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C); bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0); bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
if (isExported && isTask) {
Error(pos, "Function can't have both \"task\" and \"export\" "
"qualifiers");
return NULL;
}
if (isExternC && isTask) {
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
"qualifiers");
return NULL;
}
if (isExternC && isExported) {
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "
"qualifiers");
return NULL;
}
Type *functionType = Type *functionType =
new FunctionType(returnType, args, pos, argNames, argDefaults, new FunctionType(returnType, args, pos, argNames, argDefaults,
argPos, isTask, isExported, isExternC); argPos, isTask, isExported, isExternC);
@@ -367,12 +430,6 @@ const Type *
Declarator::GetType(DeclSpecs *ds) const { Declarator::GetType(DeclSpecs *ds) const {
const Type *baseType = ds->GetBaseType(pos); const Type *baseType = ds->GetBaseType(pos);
const Type *type = GetType(baseType, ds); const Type *type = GetType(baseType, ds);
if ((ds->typeQualifiers & TYPEQUAL_REFERENCE) != 0) {
bool hasConstQual = ((ds->typeQualifiers & TYPEQUAL_CONST) != 0);
type = new ReferenceType(type, hasConstQual);
}
return type; return type;
} }
@@ -392,7 +449,7 @@ Declaration::Declaration(DeclSpecs *ds, std::vector<Declarator *> *dlist) {
Declaration::Declaration(DeclSpecs *ds, Declarator *d) { Declaration::Declaration(DeclSpecs *ds, Declarator *d) {
declSpecs = ds; declSpecs = ds;
if (d) { if (d != NULL) {
d->InitFromDeclSpecs(ds); d->InitFromDeclSpecs(ds);
declarators.push_back(d); declarators.push_back(d);
} }
@@ -409,6 +466,8 @@ Declaration::GetVariableDeclarations() const {
continue; continue;
Declarator *decl = declarators[i]; Declarator *decl = declarators[i];
if (decl == NULL || decl->kind == DK_FUNCTION) if (decl == NULL || decl->kind == DK_FUNCTION)
// Ignore earlier errors or external function declarations
// inside other functions.
continue; continue;
Symbol *sym = decl->GetSymbol(); Symbol *sym = decl->GetSymbol();
@@ -452,14 +511,18 @@ GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
Declarator *d = (*sd[i]->declarators)[j]; Declarator *d = (*sd[i]->declarators)[j];
d->InitFromDeclSpecs(&ds); d->InitFromDeclSpecs(&ds);
// if it's an unsized array, make it a reference to an unsized
// array, so the caller can pass a pointer...
Symbol *sym = d->GetSymbol(); Symbol *sym = d->GetSymbol();
const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type);
if (at && at->GetElementCount() == 0)
sym->type = new ReferenceType(sym->type, type->IsConstType());
const ArrayType *arrayType =
dynamic_cast<const ArrayType *>(sym->type);
if (arrayType != NULL && arrayType->GetElementCount() == 0) {
Error(d->pos, "Unsized arrays aren't allowed in struct "
"definitions.");
elementTypes->push_back(NULL);
}
else
elementTypes->push_back(sym->type); elementTypes->push_back(sym->type);
elementNames->push_back(sym->name); elementNames->push_back(sym->name);
elementPositions->push_back(sym->pos); elementPositions->push_back(sym->pos);
} }

41
decl.h
View File

@@ -79,9 +79,8 @@ enum StorageClass {
#define TYPEQUAL_UNIFORM (1<<1) #define TYPEQUAL_UNIFORM (1<<1)
#define TYPEQUAL_VARYING (1<<2) #define TYPEQUAL_VARYING (1<<2)
#define TYPEQUAL_TASK (1<<3) #define TYPEQUAL_TASK (1<<3)
#define TYPEQUAL_REFERENCE (1<<4) #define TYPEQUAL_UNSIGNED (1<<4)
#define TYPEQUAL_UNSIGNED (1<<5) #define TYPEQUAL_INLINE (1<<5)
#define TYPEQUAL_INLINE (1<<6)
/** @brief Representation of the declaration specifiers in a declaration. /** @brief Representation of the declaration specifiers in a declaration.
@@ -100,7 +99,7 @@ public:
int typeQualifiers; int typeQualifiers;
/** The basic type provided in the declaration; this should be an /** The basic type provided in the declaration; this should be an
AtomicType, a StructType, or a VectorType; other types (like AtomicType, EnumType, StructType, or VectorType; other types (like
ArrayTypes) will end up being created if a particular declaration ArrayTypes) will end up being created if a particular declaration
has an array size, etc. has an array size, etc.
*/ */
@@ -123,6 +122,7 @@ public:
enum DeclaratorKind { enum DeclaratorKind {
DK_BASE, DK_BASE,
DK_POINTER, DK_POINTER,
DK_REFERENCE,
DK_ARRAY, DK_ARRAY,
DK_FUNCTION DK_FUNCTION
}; };
@@ -142,33 +142,51 @@ public:
void InitFromDeclSpecs(DeclSpecs *ds); void InitFromDeclSpecs(DeclSpecs *ds);
/** Get the actual type of the combination of Declarator and the given /** Get the actual type of the combination of Declarator and the given
DeclSpecs */ DeclSpecs. If an explicit base type is provided, the declarator is
applied to that type; otherwise the base type from the DeclSpecs is
used. */
const Type *GetType(DeclSpecs *ds) const; const Type *GetType(DeclSpecs *ds) const;
const Type *GetType(const Type *base, DeclSpecs *ds) const; const Type *GetType(const Type *base, DeclSpecs *ds) const;
void GetFunctionInfo(DeclSpecs *ds, Symbol **sym, /** Returns the symbol corresponding to the function declared by this
std::vector<Symbol *> *args); declarator and symbols for its arguments in *args. */
Symbol *GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *args);
Symbol *GetSymbol(); /** Returns the symbol associated with the declarator. */
Symbol *GetSymbol() const;
void Print() const; void Print() const;
/** Position of the declarator in the source program. */
const SourcePos pos; const SourcePos pos;
/** The kind of this declarator; complex declarations are assembled as
a hierarchy of Declarators. (For example, a pointer to an int
would have a root declarator with kind DK_POINTER and with the
Declarator::child member pointing to a DK_BASE declarator for the
int). */
const DeclaratorKind kind; const DeclaratorKind kind;
/** Child pointer if needed; this can only be non-NULL if the
declarator's kind isn't DK_BASE. */
Declarator *child; Declarator *child;
/** Type qualifiers provided with the declarator. */
int typeQualifiers; int typeQualifiers;
/** For array declarators, this gives the declared size of the array.
Unsized arrays have arraySize == 0. */
int arraySize; int arraySize;
/** Symbol associated with the declarator. */
Symbol *sym; Symbol *sym;
/** Initialization expression for the variable. May be NULL. */ /** Initialization expression for the variable. May be NULL. */
Expr *initExpr; Expr *initExpr;
std::vector<Declaration *> functionArgs; /** For function declarations, this holds the Declaration *s for the
funciton's parameters. */
std::vector<Declaration *> functionParams;
}; };
@@ -182,6 +200,11 @@ public:
void Print() const; void Print() const;
/** This method walks through all of the Declarators in a declaration
and returns a fully-initialized Symbol and (possibly) and
initialization expression for each one. (This allows the rest of
the system to not have to worry about the mess of the general
Declarator representation.) */
std::vector<VariableDeclaration> GetVariableDeclarations() const; std::vector<VariableDeclaration> GetVariableDeclarations() const;
DeclSpecs *declSpecs; DeclSpecs *declSpecs;

View File

@@ -75,7 +75,7 @@ static inline vec vcross(vec v0, vec v1) {
return ret; return ret;
} }
static inline void vnormalize(reference vec v) { static inline void vnormalize(vec &v) {
float len2 = dot(v, v); float len2 = dot(v, v);
float invlen = rsqrt(len2); float invlen = rsqrt(len2);
v *= invlen; v *= invlen;
@@ -83,8 +83,7 @@ static inline void vnormalize(reference vec v) {
static inline void static inline void
ray_plane_intersect(reference Isect isect, reference Ray ray, ray_plane_intersect(Isect &isect, Ray &ray, Plane &plane) {
reference Plane plane) {
float d = -dot(plane.p, plane.n); float d = -dot(plane.p, plane.n);
float v = dot(ray.dir, plane.n); float v = dot(ray.dir, plane.n);
@@ -104,8 +103,7 @@ ray_plane_intersect(reference Isect isect, reference Ray ray,
static inline void static inline void
ray_sphere_intersect(reference Isect isect, reference Ray ray, ray_sphere_intersect(Isect &isect, Ray &ray, Sphere &sphere) {
reference Sphere sphere) {
vec rs = ray.org - sphere.center; vec rs = ray.org - sphere.center;
float B = dot(rs, ray.dir); float B = dot(rs, ray.dir);
@@ -127,7 +125,7 @@ ray_sphere_intersect(reference Isect isect, reference Ray ray,
static inline void static inline void
orthoBasis(reference vec basis[3], vec n) { orthoBasis(vec basis[3], vec n) {
basis[2] = n; basis[2] = n;
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0; basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
@@ -150,8 +148,8 @@ orthoBasis(reference vec basis[3], vec n) {
static inline float static inline float
ambient_occlusion(reference Isect isect, reference Plane plane, ambient_occlusion(Isect &isect, Plane &plane, Sphere spheres[3],
reference Sphere spheres[3], reference RNGState rngstate) { RNGState &rngstate) {
float eps = 0.0001f; float eps = 0.0001f;
vec p, n; vec p, n;
vec basis[3]; vec basis[3];
@@ -168,8 +166,8 @@ ambient_occlusion(reference Isect isect, reference Plane plane,
Ray ray; Ray ray;
Isect occIsect; Isect occIsect;
float theta = sqrt(frandom(rngstate)); float theta = sqrt(frandom(&rngstate));
float phi = 2.0f * M_PI * frandom(rngstate); float phi = 2.0f * M_PI * frandom(&rngstate);
float x = cos(phi) * theta; float x = cos(phi) * theta;
float y = sin(phi) * theta; float y = sin(phi) * theta;
float z = sqrt(1.0 - theta * theta); float z = sqrt(1.0 - theta * theta);
@@ -205,7 +203,7 @@ ambient_occlusion(reference Isect isect, reference Plane plane,
*/ */
static void ao_scanlines(uniform int y0, uniform int y1, uniform int w, static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
uniform int h, uniform int nsubsamples, uniform int h, uniform int nsubsamples,
reference uniform float image[]) { uniform float image[]) {
static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } }; static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
static Sphere spheres[3] = { static Sphere spheres[3] = {
{ { -2.0f, 0.0f, -3.5f }, 0.5f }, { { -2.0f, 0.0f, -3.5f }, 0.5f },
@@ -213,7 +211,7 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
{ { 1.0f, 0.0f, -2.2f }, 0.5f } }; { { 1.0f, 0.0f, -2.2f }, 0.5f } };
RNGState rngstate; RNGState rngstate;
seed_rng(rngstate, y0); seed_rng(&rngstate, y0);
// Compute the mapping between the 'programCount'-wide program // Compute the mapping between the 'programCount'-wide program
// instances running in parallel and samples in the image. // instances running in parallel and samples in the image.

View File

@@ -75,7 +75,7 @@ static inline vec vcross(vec v0, vec v1) {
return ret; return ret;
} }
static inline void vnormalize(reference vec v) { static inline void vnormalize(vec &v) {
float len2 = dot(v, v); float len2 = dot(v, v);
float invlen = rsqrt(len2); float invlen = rsqrt(len2);
v *= invlen; v *= invlen;
@@ -83,8 +83,7 @@ static inline void vnormalize(reference vec v) {
static inline void static inline void
ray_plane_intersect(reference Isect isect, reference Ray ray, ray_plane_intersect(Isect &isect, Ray &ray, Plane &plane) {
reference Plane plane) {
float d = -dot(plane.p, plane.n); float d = -dot(plane.p, plane.n);
float v = dot(ray.dir, plane.n); float v = dot(ray.dir, plane.n);
@@ -104,8 +103,7 @@ ray_plane_intersect(reference Isect isect, reference Ray ray,
static inline void static inline void
ray_sphere_intersect(reference Isect isect, reference Ray ray, ray_sphere_intersect(Isect &isect, Ray &ray, Sphere &sphere) {
reference Sphere sphere) {
vec rs = ray.org - sphere.center; vec rs = ray.org - sphere.center;
float B = dot(rs, ray.dir); float B = dot(rs, ray.dir);
@@ -127,7 +125,7 @@ ray_sphere_intersect(reference Isect isect, reference Ray ray,
static inline void static inline void
orthoBasis(reference vec basis[3], vec n) { orthoBasis(vec basis[3], vec n) {
basis[2] = n; basis[2] = n;
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0; basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
@@ -150,8 +148,8 @@ orthoBasis(reference vec basis[3], vec n) {
static inline float static inline float
ambient_occlusion(reference Isect isect, reference Plane plane, ambient_occlusion(Isect &isect, Plane &plane, Sphere spheres[3],
reference Sphere spheres[3], reference RNGState rngstate) { RNGState &rngstate) {
float eps = 0.0001f; float eps = 0.0001f;
vec p, n; vec p, n;
vec basis[3]; vec basis[3];
@@ -168,8 +166,8 @@ ambient_occlusion(reference Isect isect, reference Plane plane,
Ray ray; Ray ray;
Isect occIsect; Isect occIsect;
float theta = sqrt(frandom(rngstate)); float theta = sqrt(frandom(&rngstate));
float phi = 2.0f * M_PI * frandom(rngstate); float phi = 2.0f * M_PI * frandom(&rngstate);
float x = cos(phi) * theta; float x = cos(phi) * theta;
float y = sin(phi) * theta; float y = sin(phi) * theta;
float z = sqrt(1.0 - theta * theta); float z = sqrt(1.0 - theta * theta);
@@ -203,8 +201,9 @@ ambient_occlusion(reference Isect isect, reference Plane plane,
/* Compute the image for the scanlines from [y0,y1), for an overall image /* Compute the image for the scanlines from [y0,y1), for an overall image
of width w and height h. of width w and height h.
*/ */
void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h, static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
uniform int nsubsamples, reference uniform float image[]) { uniform int h, uniform int nsubsamples,
uniform float image[]) {
static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } }; static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
static Sphere spheres[3] = { static Sphere spheres[3] = {
{ { -2.0f, 0.0f, -3.5f }, 0.5f }, { { -2.0f, 0.0f, -3.5f }, 0.5f },
@@ -212,7 +211,7 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
{ { 1.0f, 0.0f, -2.2f }, 0.5f } }; { { 1.0f, 0.0f, -2.2f }, 0.5f } };
RNGState rngstate; RNGState rngstate;
seed_rng(rngstate, y0); seed_rng(&rngstate, y0);
// Compute the mapping between the 'programCount'-wide program // Compute the mapping between the 'programCount'-wide program
// instances running in parallel and samples in the image. // instances running in parallel and samples in the image.
@@ -231,6 +230,9 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
// direction we do per iteration and ny the number in y. // direction we do per iteration and ny the number in y.
uniform int nx = 1, ny = 1; uniform int nx = 1, ny = 1;
// FIXME: We actually need ny to be 1 regardless of the decomposition,
// since the task decomposition is one scanline high.
if (programCount == 8) { if (programCount == 8) {
// Do two pixels at once in the x direction // Do two pixels at once in the x direction
nx = 2; nx = 2;
@@ -239,19 +241,21 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
++du; ++du;
} }
else if (programCount == 16) { else if (programCount == 16) {
// Two at once in both x and y nx = 4;
nx = ny = 2; ny = 1;
if ((programIndex >= 4 && programIndex < 8) || programIndex >= 12) if (programIndex >= 4 && programIndex < 8)
++du; ++du;
if (programIndex >= 8) if (programIndex >= 8 && programIndex < 12)
++dv; du += 2;
if (programIndex >= 12)
du += 3;
} }
// Now loop over all of the pixels, stepping in x and y as calculated // Now loop over all of the pixels, stepping in x and y as calculated
// above. (Assumes that ny divides y and nx divides x...) // above. (Assumes that ny divides y and nx divides x...)
for (uniform int y = y0; y < y1; y += ny) { for (uniform int y = y0; y < y1; y += ny) {
for (uniform int x = 0; x < w; x += nx) { for (uniform int x = 0; x < w; x += nx) {
// Figur out x,y pixel in NDC // Figure out x,y pixel in NDC
float px = (x + du - (w / 2.0f)) / (w / 2.0f); float px = (x + du - (w / 2.0f)) / (w / 2.0f);
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f); float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
float ret = 0.f; float ret = 0.f;
@@ -293,7 +297,7 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
// offset to the first pixel in the image // offset to the first pixel in the image
uniform int offset = 3 * (y * w + x); uniform int offset = 3 * (y * w + x);
for (uniform int p = 0; p < programCount; p += 4, ++offset) { for (uniform int p = 0; p < programCount; p += 4, offset += 3) {
// Get the four sample values for this pixel // Get the four sample values for this pixel
uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] + uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] +
retArray[p+3]; retArray[p+3];
@@ -315,3 +319,15 @@ export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples,
uniform float image[]) { uniform float image[]) {
ao_scanlines(0, h, w, h, nsubsamples, image); ao_scanlines(0, h, w, h, nsubsamples, image);
} }
static void task ao_task(uniform int width, uniform int height,
uniform int nsubsamples, uniform float image[]) {
ao_scanlines(taskIndex, taskIndex+1, width, height, nsubsamples, image);
}
export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
uniform float image[]) {
launch[h] < ao_task(w, h, nsubsamples, image) >;
}

View File

@@ -35,22 +35,22 @@
struct InputDataArrays struct InputDataArrays
{ {
uniform float zBuffer[]; uniform float * uniform zBuffer;
uniform unsigned int16 normalEncoded_x[]; // half float uniform unsigned int16 * uniform normalEncoded_x; // half float
uniform unsigned int16 normalEncoded_y[]; // half float uniform unsigned int16 * uniform normalEncoded_y; // half float
uniform unsigned int16 specularAmount[]; // half float uniform unsigned int16 * uniform specularAmount; // half float
uniform unsigned int16 specularPower[]; // half float uniform unsigned int16 * uniform specularPower; // half float
uniform unsigned int8 albedo_x[]; // unorm8 uniform unsigned int8 * uniform albedo_x; // unorm8
uniform unsigned int8 albedo_y[]; // unorm8 uniform unsigned int8 * uniform albedo_y; // unorm8
uniform unsigned int8 albedo_z[]; // unorm8 uniform unsigned int8 * uniform albedo_z; // unorm8
uniform float lightPositionView_x[]; uniform float * uniform lightPositionView_x;
uniform float lightPositionView_y[]; uniform float * uniform lightPositionView_y;
uniform float lightPositionView_z[]; uniform float * uniform lightPositionView_z;
uniform float lightAttenuationBegin[]; uniform float * uniform lightAttenuationBegin;
uniform float lightColor_x[]; uniform float * uniform lightColor_x;
uniform float lightColor_y[]; uniform float * uniform lightColor_y;
uniform float lightColor_z[]; uniform float * uniform lightColor_z;
uniform float lightAttenuationEnd[]; uniform float * uniform lightAttenuationEnd;
}; };
struct InputHeader struct InputHeader
@@ -77,8 +77,7 @@ dot3(float x, float y, float z, float a, float b, float c) {
static inline void static inline void
normalize3(float x, float y, float z, reference float ox, normalize3(float x, float y, float z, float &ox, float &oy, float &oz) {
reference float oy, reference float oz) {
float n = rsqrt(x*x + y*y + z*z); float n = rsqrt(x*x + y*y + z*z);
ox = x * n; ox = x * n;
oy = y * n; oy = y * n;
@@ -110,8 +109,8 @@ ComputeZBounds(
uniform float cameraProj_33, uniform float cameraProj_43, uniform float cameraProj_33, uniform float cameraProj_43,
uniform float cameraNear, uniform float cameraFar, uniform float cameraNear, uniform float cameraFar,
// Output // Output
reference uniform float minZ, uniform float &minZ,
reference uniform float maxZ uniform float &maxZ
) )
{ {
// Find Z bounds // Find Z bounds
@@ -156,7 +155,7 @@ IntersectLightsWithTileMinMax(
uniform float light_positionView_z_array[], uniform float light_positionView_z_array[],
uniform float light_attenuationEnd_array[], uniform float light_attenuationEnd_array[],
// Output // Output
reference uniform int32 tileLightIndices[] uniform int32 tileLightIndices[]
) )
{ {
uniform float gBufferScale_x = 0.5f * (float)gBufferWidth; uniform float gBufferScale_x = 0.5f * (float)gBufferWidth;
@@ -268,7 +267,7 @@ IntersectLightsWithTile(
uniform float light_positionView_z_array[], uniform float light_positionView_z_array[],
uniform float light_attenuationEnd_array[], uniform float light_attenuationEnd_array[],
// Output // Output
reference uniform int32 tileLightIndices[] uniform int32 tileLightIndices[]
) )
{ {
uniform float minZ, maxZ; uniform float minZ, maxZ;
@@ -293,19 +292,19 @@ ShadeTile(
uniform int32 tileStartX, uniform int32 tileEndX, uniform int32 tileStartX, uniform int32 tileEndX,
uniform int32 tileStartY, uniform int32 tileEndY, uniform int32 tileStartY, uniform int32 tileEndY,
uniform int32 gBufferWidth, uniform int32 gBufferHeight, uniform int32 gBufferWidth, uniform int32 gBufferHeight,
reference uniform InputDataArrays inputData, uniform InputDataArrays &inputData,
// Camera data // Camera data
uniform float cameraProj_11, uniform float cameraProj_22, uniform float cameraProj_11, uniform float cameraProj_22,
uniform float cameraProj_33, uniform float cameraProj_43, uniform float cameraProj_33, uniform float cameraProj_43,
// Light list // Light list
reference uniform int32 tileLightIndices[], uniform int32 tileLightIndices[],
uniform int32 tileNumLights, uniform int32 tileNumLights,
// UI // UI
uniform bool visualizeLightCount, uniform bool visualizeLightCount,
// Output // Output
reference uniform unsigned int8 framebuffer_r[], uniform unsigned int8 framebuffer_r[],
reference uniform unsigned int8 framebuffer_g[], uniform unsigned int8 framebuffer_g[],
reference uniform unsigned int8 framebuffer_b[] uniform unsigned int8 framebuffer_b[]
) )
{ {
if (tileNumLights == 0 || visualizeLightCount) { if (tileNumLights == 0 || visualizeLightCount) {
@@ -478,13 +477,13 @@ ShadeTile(
task void task void
RenderTile(uniform int num_groups_x, uniform int num_groups_y, RenderTile(uniform int num_groups_x, uniform int num_groups_y,
reference uniform InputHeader inputHeader, uniform InputHeader &inputHeader,
reference uniform InputDataArrays inputData, uniform InputDataArrays &inputData,
uniform int visualizeLightCount, uniform int visualizeLightCount,
// Output // Output
reference uniform unsigned int8 framebuffer_r[], uniform unsigned int8 framebuffer_r[],
reference uniform unsigned int8 framebuffer_g[], uniform unsigned int8 framebuffer_g[],
reference uniform unsigned int8 framebuffer_b[]) { uniform unsigned int8 framebuffer_b[]) {
uniform int32 group_y = taskIndex / num_groups_x; uniform int32 group_y = taskIndex / num_groups_x;
uniform int32 group_x = taskIndex % num_groups_x; uniform int32 group_x = taskIndex % num_groups_x;
uniform int32 tile_start_x = group_x * MIN_TILE_WIDTH; uniform int32 tile_start_x = group_x * MIN_TILE_WIDTH;
@@ -526,13 +525,13 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
export void export void
RenderStatic(reference uniform InputHeader inputHeader, RenderStatic(uniform InputHeader &inputHeader,
reference uniform InputDataArrays inputData, uniform InputDataArrays &inputData,
uniform int visualizeLightCount, uniform int visualizeLightCount,
// Output // Output
reference uniform unsigned int8 framebuffer_r[], uniform unsigned int8 framebuffer_r[],
reference uniform unsigned int8 framebuffer_g[], uniform unsigned int8 framebuffer_g[],
reference uniform unsigned int8 framebuffer_b[]) { uniform unsigned int8 framebuffer_b[]) {
uniform int num_groups_x = (inputHeader.framebufferWidth + uniform int num_groups_x = (inputHeader.framebufferWidth +
MIN_TILE_WIDTH - 1) / MIN_TILE_WIDTH; MIN_TILE_WIDTH - 1) / MIN_TILE_WIDTH;
uniform int num_groups_y = (inputHeader.framebufferHeight + uniform int num_groups_y = (inputHeader.framebufferHeight +
@@ -564,8 +563,8 @@ ComputeZBoundsRow(
uniform float cameraProj_33, uniform float cameraProj_43, uniform float cameraProj_33, uniform float cameraProj_43,
uniform float cameraNear, uniform float cameraFar, uniform float cameraNear, uniform float cameraFar,
// Output // Output
reference uniform float minZArray[], uniform float minZArray[],
reference uniform float maxZArray[] uniform float maxZArray[]
) )
{ {
for (uniform int32 tileX = 0; tileX < numTilesX; ++tileX) { for (uniform int32 tileX = 0; tileX < numTilesX; ++tileX) {
@@ -596,7 +595,7 @@ SplitTileMinMax(
// Camera data // Camera data
uniform float cameraProj_11, uniform float cameraProj_22, uniform float cameraProj_11, uniform float cameraProj_22,
// Light Data // Light Data
reference uniform int32 lightIndices[], uniform int32 lightIndices[],
uniform int32 numLights, uniform int32 numLights,
uniform float light_positionView_x_array[], uniform float light_positionView_x_array[],
uniform float light_positionView_y_array[], uniform float light_positionView_y_array[],
@@ -605,9 +604,9 @@ SplitTileMinMax(
// Outputs // Outputs
// TODO: ISPC doesn't currently like multidimensionsal arrays so we'll do the // TODO: ISPC doesn't currently like multidimensionsal arrays so we'll do the
// indexing math ourselves // indexing math ourselves
reference uniform int32 subtileIndices[], uniform int32 subtileIndices[],
uniform int32 subtileIndicesPitch, uniform int32 subtileIndicesPitch,
reference uniform int32 subtileNumLights[] uniform int32 subtileNumLights[]
) )
{ {
uniform float gBufferScale_x = 0.5f * (float)gBufferWidth; uniform float gBufferScale_x = 0.5f * (float)gBufferWidth;

View File

@@ -51,7 +51,7 @@ export void mandelbrot_ispc(uniform float x0, uniform float y0,
uniform float x1, uniform float y1, uniform float x1, uniform float y1,
uniform int width, uniform int height, uniform int width, uniform int height,
uniform int maxIterations, uniform int maxIterations,
reference uniform int output[]) uniform int output[])
{ {
float dx = (x1 - x0) / width; float dx = (x1 - x0) / width;
float dy = (y1 - y0) / height; float dy = (y1 - y0) / height;

View File

@@ -57,7 +57,7 @@ mandelbrot_scanlines(uniform int ybase, uniform int span,
uniform float x0, uniform float dx, uniform float x0, uniform float dx,
uniform float y0, uniform float dy, uniform float y0, uniform float dy,
uniform int width, uniform int maxIterations, uniform int width, uniform int maxIterations,
reference uniform int output[]) { uniform int output[]) {
uniform int ystart = ybase + taskIndex * span; uniform int ystart = ybase + taskIndex * span;
uniform int yend = ystart + span; uniform int yend = ystart + span;
@@ -77,7 +77,7 @@ task void
mandelbrot_chunk(uniform float x0, uniform float dx, mandelbrot_chunk(uniform float x0, uniform float dx,
uniform float y0, uniform float dy, uniform float y0, uniform float dy,
uniform int width, uniform int height, uniform int width, uniform int height,
uniform int maxIterations, reference uniform int output[]) { uniform int maxIterations, uniform int output[]) {
uniform int ystart = taskIndex * (height/taskCount); uniform int ystart = taskIndex * (height/taskCount);
uniform int yend = (taskIndex+1) * (height/taskCount); uniform int yend = (taskIndex+1) * (height/taskCount);
uniform int span = 1; uniform int span = 1;
@@ -91,7 +91,7 @@ export void
mandelbrot_ispc(uniform float x0, uniform float y0, mandelbrot_ispc(uniform float x0, uniform float y0,
uniform float x1, uniform float y1, uniform float x1, uniform float y1,
uniform int width, uniform int height, uniform int width, uniform int height,
uniform int maxIterations, reference uniform int output[]) { uniform int maxIterations, uniform int output[]) {
uniform float dx = (x1 - x0) / width; uniform float dx = (x1 - x0) / width;
uniform float dy = (y1 - y0) / height; uniform float dy = (y1 - y0) / height;

View File

@@ -73,7 +73,7 @@ static inline float Dot(const float3 a, const float3 b) {
static void generateRay(uniform const float raster2camera[4][4], static void generateRay(uniform const float raster2camera[4][4],
uniform const float camera2world[4][4], uniform const float camera2world[4][4],
float x, float y, reference Ray ray) { float x, float y, Ray &ray) {
ray.mint = 0.f; ray.mint = 0.f;
ray.maxt = 1e30f; ray.maxt = 1e30f;
@@ -105,7 +105,7 @@ static void generateRay(uniform const float raster2camera[4][4],
static inline bool BBoxIntersect(const uniform float bounds[2][3], static inline bool BBoxIntersect(const uniform float bounds[2][3],
const reference Ray ray) { const Ray &ray) {
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] }; uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] }; uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
float t0 = ray.mint, t1 = ray.maxt; float t0 = ray.mint, t1 = ray.maxt;
@@ -143,7 +143,7 @@ static inline bool BBoxIntersect(const uniform float bounds[2][3],
static inline bool TriIntersect(const reference Triangle tri, reference Ray ray) { static inline bool TriIntersect(const Triangle &tri, Ray &ray) {
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] }; uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] }; uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] }; uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
@@ -184,7 +184,7 @@ static inline bool TriIntersect(const reference Triangle tri, reference Ray ray)
bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[], bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
reference Ray r) { Ray &r) {
Ray ray = r; Ray ray = r;
bool hit = false; bool hit = false;
// Follow ray through BVH nodes to find primitive intersections // Follow ray through BVH nodes to find primitive intersections

View File

@@ -8,7 +8,7 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
CXX=g++ CXX=g++
CXXFLAGS=-Iobjs/ -O3 -Wall -m64 CXXFLAGS=-Iobjs/ -O3 -Wall -m64
ISPC=ispc ISPC=ispc
ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64 ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64 --opt=32-bit-addressing
OBJS=objs/volume.o objs/volume_serial.o $(TASK_OBJ) objs/volume_ispc.o \ OBJS=objs/volume.o objs/volume_serial.o $(TASK_OBJ) objs/volume_ispc.o \
objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o

View File

@@ -41,7 +41,7 @@ struct Ray {
static void static void
generateRay(const uniform float raster2camera[4][4], generateRay(const uniform float raster2camera[4][4],
const uniform float camera2world[4][4], const uniform float camera2world[4][4],
float x, float y, reference Ray ray) { float x, float y, Ray &ray) {
// transform raster coordinate (x, y, 0) to camera space // transform raster coordinate (x, y, 0) to camera space
float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3]; float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3];
float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3]; float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3];
@@ -70,7 +70,7 @@ Inside(float3 p, float3 pMin, float3 pMax) {
static bool static bool
IntersectP(Ray ray, float3 pMin, float3 pMax, reference float hit0, reference float hit1) { IntersectP(Ray ray, float3 pMin, float3 pMax, float &hit0, float &hit1) {
float t0 = -1e30, t1 = 1e30; float t0 = -1e30, t1 = 1e30;
float3 tNear = (pMin - ray.origin) / ray.dir; float3 tNear = (pMin - ray.origin) / ray.dir;
@@ -141,7 +141,7 @@ static inline float3 Offset(float3 p, float3 pMin, float3 pMax) {
static inline float Density(float3 Pobj, float3 pMin, float3 pMax, static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
uniform float density[], uniform int nVoxels[3], uniform float density[], uniform int nVoxels[3],
reference uniform bool checkForSameVoxel) { uniform bool &checkForSameVoxel) {
if (!Inside(Pobj, pMin, pMax)) if (!Inside(Pobj, pMin, pMax))
return 0; return 0;
// Compute voxel coordinates and offsets for _Pobj_ // Compute voxel coordinates and offsets for _Pobj_
@@ -155,8 +155,8 @@ static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
// Trilinearly interpolate density values to compute local density // Trilinearly interpolate density values to compute local density
float d00, d10, d01, d11; float d00, d10, d01, d11;
uniform int uvx, uvy, uvz; uniform int uvx, uvy, uvz;
if (checkForSameVoxel && reduce_equal(vx, uvx) && reduce_equal(vy, uvy) && if (checkForSameVoxel && reduce_equal(vx, &uvx) && reduce_equal(vy, &uvy) &&
reduce_equal(vz, uvz)) { reduce_equal(vz, &uvz)) {
// If all of the program instances are inside the same voxel, then // If all of the program instances are inside the same voxel, then
// we'll call the 'uniform' variant of the voxel density lookup // we'll call the 'uniform' variant of the voxel density lookup
// function, thus doing a single load for each value rather than a // function, thus doing a single load for each value rather than a

View File

@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations"> <ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32"> <ProjectConfiguration Include="Debug|Win32">
@@ -158,13 +158,13 @@
<FileType>Document</FileType> <FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2 <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
</Command> </Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 --opt=32-bit-addressing
</Command> </Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2 <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
</Command> </Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 --opt=32-bit-addressing
</Command> </Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>

1847
expr.cpp

File diff suppressed because it is too large Load Diff

77
expr.h
View File

@@ -65,6 +65,10 @@ public:
/** Returns the Type of the expression. */ /** Returns the Type of the expression. */
virtual const Type *GetType() const = 0; virtual const Type *GetType() const = 0;
/** Returns the type of the value returned by GetLValueType(); this
should be a pointer type of some sort (uniform or varying). */
virtual const Type *GetLValueType() const;
/** For expressions that have values based on a symbol (e.g. regular /** For expressions that have values based on a symbol (e.g. regular
symbol references, array indexing, etc.), this returns a pointer to symbol references, array indexing, etc.), this returns a pointer to
that symbol. */ that symbol. */
@@ -266,11 +270,12 @@ public:
*/ */
class IndexExpr : public Expr { class IndexExpr : public Expr {
public: public:
IndexExpr(Expr *arrayOrVector, Expr *index, SourcePos p); IndexExpr(Expr *baseExpr, Expr *index, SourcePos p);
llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetValue(FunctionEmitContext *ctx) const;
llvm::Value *GetLValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
const Type *GetType() const; const Type *GetType() const;
const Type *GetLValueType() const;
Symbol *GetBaseSymbol() const; Symbol *GetBaseSymbol() const;
void Print() const; void Print() const;
@@ -278,7 +283,7 @@ public:
Expr *TypeCheck(); Expr *TypeCheck();
int EstimateCost() const; int EstimateCost() const;
Expr *arrayOrVector, *index; Expr *baseExpr, *index;
}; };
@@ -288,15 +293,13 @@ public:
*/ */
class MemberExpr : public Expr { class MemberExpr : public Expr {
public: public:
static MemberExpr* create(Expr *expr, const char *identifier, static MemberExpr *create(Expr *expr, const char *identifier,
SourcePos pos, SourcePos identifierPos); SourcePos pos, SourcePos identifierPos,
bool derefLvalue);
MemberExpr(Expr *expr, const char *identifier, SourcePos pos,
SourcePos identifierPos);
llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetValue(FunctionEmitContext *ctx) const;
llvm::Value *GetLValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
const Type *GetType() const; const Type *GetType() const;
const Type *GetLValueType() const;
Symbol *GetBaseSymbol() const; Symbol *GetBaseSymbol() const;
void Print() const; void Print() const;
Expr *Optimize(); Expr *Optimize();
@@ -310,6 +313,15 @@ public:
Expr *expr; Expr *expr;
std::string identifier; std::string identifier;
const SourcePos identifierPos; const SourcePos identifierPos;
protected:
MemberExpr(Expr *expr, const char *identifier, SourcePos pos,
SourcePos identifierPos, bool derefLValue);
/** Indicates whether the expression should be dereferenced before the
member is found. (i.e. this is true if the MemberExpr was a '->'
operator, and is false if it was a '.' operator. */
bool dereferenceExpr;
}; };
@@ -506,6 +518,7 @@ public:
llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetValue(FunctionEmitContext *ctx) const;
const Type *GetType() const; const Type *GetType() const;
const Type *GetLValueType() const;
Symbol *GetBaseSymbol() const; Symbol *GetBaseSymbol() const;
void Print() const; void Print() const;
Expr *TypeCheck(); Expr *TypeCheck();
@@ -525,6 +538,7 @@ public:
llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetValue(FunctionEmitContext *ctx) const;
llvm::Value *GetLValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
const Type *GetType() const; const Type *GetType() const;
const Type *GetLValueType() const;
Symbol *GetBaseSymbol() const; Symbol *GetBaseSymbol() const;
void Print() const; void Print() const;
Expr *TypeCheck(); Expr *TypeCheck();
@@ -535,6 +549,44 @@ public:
}; };
/** Expression that represents taking the address of an expression. */
class AddressOfExpr : public Expr {
public:
AddressOfExpr(Expr *e, SourcePos p);
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
const Type *GetType() const;
Symbol *GetBaseSymbol() const;
void Print() const;
Expr *TypeCheck();
Expr *Optimize();
int EstimateCost() const;
Expr *expr;
};
/** Expression that returns the size of the given expression or type in
bytes. */
class SizeOfExpr : public Expr {
public:
SizeOfExpr(Expr *e, SourcePos p);
SizeOfExpr(const Type *t, SourcePos p);
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
const Type *GetType() const;
void Print() const;
Expr *TypeCheck();
Expr *Optimize();
int EstimateCost() const;
/* One of expr or type should be non-NULL (but not both of them). The
SizeOfExpr returns the size of whichever one of them isn't NULL. */
Expr *expr;
const Type *type;
};
/** @brief Expression representing a symbol reference in the program */ /** @brief Expression representing a symbol reference in the program */
class SymbolExpr : public Expr { class SymbolExpr : public Expr {
public: public:
@@ -543,6 +595,7 @@ public:
llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetValue(FunctionEmitContext *ctx) const;
llvm::Value *GetLValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
const Type *GetType() const; const Type *GetType() const;
const Type *GetLValueType() const;
Symbol *GetBaseSymbol() const; Symbol *GetBaseSymbol() const;
Expr *TypeCheck(); Expr *TypeCheck();
Expr *Optimize(); Expr *Optimize();
@@ -623,9 +676,13 @@ public:
/** This function indicates whether it's legal to convert from fromType to /** This function indicates whether it's legal to convert from fromType to
toType. toType. If the optional errorMsgBase and source position parameters
are provided, then an error message is issued if the type conversion
isn't possible.
*/ */
bool CanConvertTypes(const Type *fromType, const Type *toType); bool CanConvertTypes(const Type *fromType, const Type *toType,
const char *errorMsgBase = NULL,
SourcePos pos = SourcePos());
/** This function attempts to convert the given expression to the given /** This function attempts to convert the given expression to the given
type, returning a pointer to a new expression that is the result. If type, returning a pointer to a new expression that is the result. If

View File

@@ -74,10 +74,32 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
maskSymbol = m->symbolTable->LookupVariable("__mask"); maskSymbol = m->symbolTable->LookupVariable("__mask");
assert(maskSymbol != NULL); assert(maskSymbol != NULL);
if (code) { if (code != NULL) {
if (g->debugPrint) {
fprintf(stderr, "Creating function \"%s\". Initial code:\n",
sym->name.c_str());
code->Print(0);
fprintf(stderr, "---------------------\n");
}
code = code->TypeCheck(); code = code->TypeCheck();
if (code)
if (code != NULL && g->debugPrint) {
fprintf(stderr, "After typechecking function \"%s\":\n",
sym->name.c_str());
code->Print(0);
fprintf(stderr, "---------------------\n");
}
if (code != NULL) {
code = code->Optimize(); code = code->Optimize();
if (g->debugPrint) {
fprintf(stderr, "After optimizing function \"%s\":\n",
sym->name.c_str());
code->Print(0);
fprintf(stderr, "---------------------\n");
}
}
} }
if (g->debugPrint) { if (g->debugPrint) {
@@ -149,11 +171,11 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
sym->storagePtr = ctx->AllocaInst(argType, sym->name.c_str()); sym->storagePtr = ctx->AllocaInst(argType, sym->name.c_str());
// get a pointer to the value in the struct // get a pointer to the value in the struct
llvm::Value *ptr = ctx->GetElementPtrInst(structArgPtr, 0, i, sym->name.c_str()); llvm::Value *ptr = ctx->AddElementOffset(structArgPtr, i, NULL, sym->name.c_str());
// and copy the value from the struct and into the local alloca'ed // and copy the value from the struct and into the local alloca'ed
// memory // memory
llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, NULL, sym->name.c_str()); llvm::Value *ptrval = ctx->LoadInst(ptr, sym->name.c_str());
ctx->StoreInst(ptrval, sym->storagePtr); ctx->StoreInst(ptrval, sym->storagePtr);
ctx->EmitFunctionParameterDebugInfo(sym); ctx->EmitFunctionParameterDebugInfo(sym);
} }
@@ -200,9 +222,9 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
// Copy in the mask as well. // Copy in the mask as well.
int nArgs = (int)args.size(); int nArgs = (int)args.size();
// The mask is the last parameter in the argument structure // The mask is the last parameter in the argument structure
llvm::Value *ptr = ctx->GetElementPtrInst(structParamPtr, 0, nArgs, llvm::Value *ptr = ctx->AddElementOffset(structParamPtr, nArgs, NULL,
"task_struct_mask"); "task_struct_mask");
llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, NULL, "mask"); llvm::Value *ptrval = ctx->LoadInst(ptr, "mask");
ctx->SetFunctionMask(ptrval); ctx->SetFunctionMask(ptrval);
// Copy threadIndex and threadCount into stack-allocated storage so // Copy threadIndex and threadCount into stack-allocated storage so
@@ -236,7 +258,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
} }
// If the number of actual function arguments is equal to the // If the number of actual function arguments is equal to the
// number of declared arguments in decl->functionArgs, then we // number of declared arguments in decl->functionParams, then we
// don't have a mask parameter, so set it to be all on. This // don't have a mask parameter, so set it to be all on. This
// happens for exmaple with 'export'ed functions that the app // happens for exmaple with 'export'ed functions that the app
// calls. // calls.
@@ -338,11 +360,8 @@ Function::GenerateIR() {
if (m->errorCount == 0) { if (m->errorCount == 0) {
if (llvm::verifyFunction(*function, llvm::ReturnStatusAction) == true) { if (llvm::verifyFunction(*function, llvm::ReturnStatusAction) == true) {
if (g->debugPrint) { if (g->debugPrint)
llvm::PassManager ppm; function->dump();
ppm.add(llvm::createPrintModulePass(&llvm::outs()));
ppm.run(*m->module);
}
FATAL("Function verificication failed"); FATAL("Function verificication failed");
} }
@@ -376,11 +395,8 @@ Function::GenerateIR() {
sym->exportedFunction = appFunction; sym->exportedFunction = appFunction;
if (llvm::verifyFunction(*appFunction, if (llvm::verifyFunction(*appFunction,
llvm::ReturnStatusAction) == true) { llvm::ReturnStatusAction) == true) {
if (g->debugPrint) { if (g->debugPrint)
llvm::PassManager ppm; appFunction->dump();
ppm.add(llvm::createPrintModulePass(&llvm::outs()));
ppm.run(*m->module);
}
FATAL("Function verificication failed"); FATAL("Function verificication failed");
} }
} }

View File

@@ -171,7 +171,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
if (!error) { if (!error) {
llvm::TargetMachine *targetMachine = t->GetTargetMachine(); llvm::TargetMachine *targetMachine = t->GetTargetMachine();
const llvm::TargetData *targetData = targetMachine->getTargetData(); const llvm::TargetData *targetData = targetMachine->getTargetData();
t->is32bit = (targetData->getPointerSize() == 4); t->is32Bit = (targetData->getPointerSize() == 4);
} }
return !error; return !error;
@@ -284,8 +284,11 @@ llvm::Value *
Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type) { Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type) {
const llvm::TargetData *td = GetTargetMachine()->getTargetData(); const llvm::TargetData *td = GetTargetMachine()->getTargetData();
assert(td != NULL); assert(td != NULL);
return is32bit ? LLVMInt32(td->getTypeSizeInBits(type) / 8) : uint64_t byteSize = td->getTypeSizeInBits(type) / 8;
LLVMInt64(td->getTypeSizeInBits(type) / 8); if (is32Bit || g->opt.force32BitAddressing)
return LLVMInt32(byteSize);
else
return LLVMInt64(byteSize);
} }
@@ -298,7 +301,12 @@ Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element) {
assert(structType != NULL); assert(structType != NULL);
const llvm::StructLayout *sl = td->getStructLayout(structType); const llvm::StructLayout *sl = td->getStructLayout(structType);
assert(sl != NULL); assert(sl != NULL);
return LLVMInt32(sl->getElementOffset(element));
uint64_t offset = sl->getElementOffset(element);
if (is32Bit || g->opt.force32BitAddressing)
return LLVMInt32(offset);
else
return LLVMInt64(offset);
} }
@@ -309,6 +317,7 @@ Opt::Opt() {
level = 1; level = 1;
fastMath = false; fastMath = false;
fastMaskedVload = false; fastMaskedVload = false;
force32BitAddressing = false;
unrollLoops = true; unrollLoops = true;
disableAsserts = false; disableAsserts = false;
disableHandlePseudoMemoryOps = false; disableHandlePseudoMemoryOps = false;

8
ispc.h
View File

@@ -187,7 +187,7 @@ struct Target {
std::string arch; std::string arch;
/** Is the target architecture 32 or 64 bit */ /** Is the target architecture 32 or 64 bit */
bool is32bit; bool is32Bit;
/** Target CPU. (e.g. "corei7", "corei7-avx", ..) */ /** Target CPU. (e.g. "corei7", "corei7-avx", ..) */
std::string cpu; std::string cpu;
@@ -237,6 +237,12 @@ struct Opt {
it will make sense. */ it will make sense. */
bool unrollLoops; bool unrollLoops;
/** Indicates if addressing math will be done with 32-bit math, even on
64-bit systems. (This is generally noticably more efficient,
though at the cost of addressing >2GB).
*/
bool force32BitAddressing;
/** Indicates whether assert() statements should be ignored (for /** Indicates whether assert() statements should be ignored (for
performance in the generated code). */ performance in the generated code). */
bool disableAsserts; bool disableAsserts;

8
lex.ll
View File

@@ -112,9 +112,12 @@ int64 { return TOKEN_INT64; }
launch { return TOKEN_LAUNCH; } launch { return TOKEN_LAUNCH; }
NULL { return TOKEN_NULL; } NULL { return TOKEN_NULL; }
print { return TOKEN_PRINT; } print { return TOKEN_PRINT; }
reference { return TOKEN_REFERENCE; } reference { Error(*yylloc, "\"reference\" qualifier is no longer supported; "
"please use C++-style '&' syntax for references "
"instead."); }
return { return TOKEN_RETURN; } return { return TOKEN_RETURN; }
soa { return TOKEN_SOA; } soa { return TOKEN_SOA; }
sizeof { return TOKEN_SIZEOF; }
static { return TOKEN_STATIC; } static { return TOKEN_STATIC; }
struct { return TOKEN_STRUCT; } struct { return TOKEN_STRUCT; }
switch { return TOKEN_SWITCH; } switch { return TOKEN_SWITCH; }
@@ -223,6 +226,7 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
"&=" { return TOKEN_AND_ASSIGN; } "&=" { return TOKEN_AND_ASSIGN; }
"^=" { return TOKEN_XOR_ASSIGN; } "^=" { return TOKEN_XOR_ASSIGN; }
"|=" { return TOKEN_OR_ASSIGN; } "|=" { return TOKEN_OR_ASSIGN; }
"->" { return TOKEN_PTR_OP; }
";" { return ';'; } ";" { return ';'; }
("{"|"<%") { return '{'; } ("{"|"<%") { return '{'; }
("}"|"%>") { return '}'; } ("}"|"%>") { return '}'; }
@@ -266,8 +270,6 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
%% %%
/*sizeof { return TOKEN_SIZEOF; }*/
/*"->" { return TOKEN_PTR_OP; }*/
/*short { return TOKEN_SHORT; }*/ /*short { return TOKEN_SHORT; }*/
/*long { return TOKEN_LONG; }*/ /*long { return TOKEN_LONG; }*/
/*signed { return TOKEN_SIGNED; }*/ /*signed { return TOKEN_SIGNED; }*/

View File

@@ -40,6 +40,7 @@
LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL;
LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL; LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::PointerIntType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL;
@@ -74,7 +75,7 @@ LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL;
LLVM_TYPE_CONST llvm::ArrayType *LLVMTypes::VoidPointerVectorType = NULL; LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::VoidPointerVectorType = NULL;
llvm::Constant *LLVMTrue = NULL; llvm::Constant *LLVMTrue = NULL;
llvm::Constant *LLVMFalse = NULL; llvm::Constant *LLVMFalse = NULL;
@@ -86,6 +87,8 @@ void
InitLLVMUtil(llvm::LLVMContext *ctx, Target target) { InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
LLVMTypes::VoidType = llvm::Type::getVoidTy(*ctx); LLVMTypes::VoidType = llvm::Type::getVoidTy(*ctx);
LLVMTypes::VoidPointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(*ctx), 0); LLVMTypes::VoidPointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(*ctx), 0);
LLVMTypes::PointerIntType = target.is32Bit ? llvm::Type::getInt32Ty(*ctx) :
llvm::Type::getInt64Ty(*ctx);
LLVMTypes::BoolType = llvm::Type::getInt1Ty(*ctx); LLVMTypes::BoolType = llvm::Type::getInt1Ty(*ctx);
LLVMTypes::Int8Type = llvm::Type::getInt8Ty(*ctx); LLVMTypes::Int8Type = llvm::Type::getInt8Ty(*ctx);
@@ -130,8 +133,8 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0); LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0);
LLVMTypes::DoubleVectorPointerType = llvm::PointerType::get(LLVMTypes::DoubleVectorType, 0); LLVMTypes::DoubleVectorPointerType = llvm::PointerType::get(LLVMTypes::DoubleVectorType, 0);
LLVMTypes::VoidPointerVectorType = LLVMTypes::VoidPointerVectorType = g->target.is32Bit ? LLVMTypes::Int32VectorType :
llvm::ArrayType::get(LLVMTypes::VoidPointerType, target.vectorWidth); LLVMTypes::Int64VectorType;
LLVMTrue = llvm::ConstantInt::getTrue(*ctx); LLVMTrue = llvm::ConstantInt::getTrue(*ctx);
LLVMFalse = llvm::ConstantInt::getFalse(*ctx); LLVMFalse = llvm::ConstantInt::getFalse(*ctx);
@@ -451,11 +454,3 @@ LLVMBoolVector(const bool *bvec) {
} }
return llvm::ConstantVector::get(vals); return llvm::ConstantVector::get(vals);
} }
LLVM_TYPE_CONST llvm::ArrayType *
LLVMPointerVectorType(LLVM_TYPE_CONST llvm::Type *t) {
// NOTE: ArrayType, not VectorType
return llvm::ArrayType::get(llvm::PointerType::get(t, 0),
g->target.vectorWidth);
}

View File

@@ -52,6 +52,7 @@
struct LLVMTypes { struct LLVMTypes {
static LLVM_TYPE_CONST llvm::Type *VoidType; static LLVM_TYPE_CONST llvm::Type *VoidType;
static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType; static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType;
static LLVM_TYPE_CONST llvm::Type *PointerIntType;
static LLVM_TYPE_CONST llvm::Type *BoolType; static LLVM_TYPE_CONST llvm::Type *BoolType;
static LLVM_TYPE_CONST llvm::Type *Int8Type; static LLVM_TYPE_CONST llvm::Type *Int8Type;
@@ -86,7 +87,7 @@ struct LLVMTypes {
static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType; static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType;
static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType; static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType;
static LLVM_TYPE_CONST llvm::ArrayType *VoidPointerVectorType; static LLVM_TYPE_CONST llvm::VectorType *VoidPointerVectorType;
}; };
/** These variables hold the corresponding LLVM constant values as a /** These variables hold the corresponding LLVM constant values as a
@@ -204,10 +205,4 @@ extern llvm::Constant *LLVMMaskAllOn;
/** LLVM constant value representing an 'all off' SIMD lane mask */ /** LLVM constant value representing an 'all off' SIMD lane mask */
extern llvm::Constant *LLVMMaskAllOff; extern llvm::Constant *LLVMMaskAllOff;
/** Given an LLVM type, returns the corresponding type for a vector of
pointers to that type. (In practice, an array of pointers, since LLVM
prohibits vectors of pointers.
*/
extern LLVM_TYPE_CONST llvm::ArrayType *LLVMPointerVectorType(LLVM_TYPE_CONST llvm::Type *t);
#endif // ISPC_LLVMUTIL_H #endif // ISPC_LLVMUTIL_H

View File

@@ -83,6 +83,7 @@ static void usage(int ret) {
printf(" [-o <name>/--outfile=<name>]\tOutput filename (may be \"-\" for standard output)\n"); printf(" [-o <name>/--outfile=<name>]\tOutput filename (may be \"-\" for standard output)\n");
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n"); printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
printf(" [--opt=<option>]\t\t\tSet optimization option\n"); printf(" [--opt=<option>]\t\t\tSet optimization option\n");
printf(" 32-bit-addressing\t\tUse 32-bit math for addressing calculations even on 64-bit targets.\n");
printf(" disable-assertions\t\tRemove assertion statements from final code.\n"); printf(" disable-assertions\t\tRemove assertion statements from final code.\n");
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n"); printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n"); printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
@@ -248,6 +249,8 @@ int main(int Argc, char *Argv[]) {
g->opt.fastMath = true; g->opt.fastMath = true;
else if (!strcmp(opt, "fast-masked-vload")) else if (!strcmp(opt, "fast-masked-vload"))
g->opt.fastMaskedVload = true; g->opt.fastMaskedVload = true;
else if (!strcmp(opt, "32-bit-addressing"))
g->opt.force32BitAddressing = true;
else if (!strcmp(opt, "disable-assertions")) else if (!strcmp(opt, "disable-assertions"))
g->opt.disableAsserts = true; g->opt.disableAsserts = true;
else if (!strcmp(opt, "disable-loop-unroll")) else if (!strcmp(opt, "disable-loop-unroll"))

View File

@@ -250,6 +250,8 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) {
} }
LLVM_TYPE_CONST llvm::Type *llvmType = sym->type->LLVMType(g->ctx); LLVM_TYPE_CONST llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
if (llvmType == NULL)
return;
// See if we have an initializer expression for the global. If so, // See if we have an initializer expression for the global. If so,
// make sure it's a compile-time constant! // make sure it's a compile-time constant!
@@ -365,12 +367,12 @@ lCheckForVaryingParameter(const Type *type, const std::string &name,
*/ */
static void static void
lCheckForStructParameters(const FunctionType *ftype, SourcePos pos) { lCheckForStructParameters(const FunctionType *ftype, SourcePos pos) {
const std::vector<const Type *> &argTypes = ftype->GetArgumentTypes(); for (int i = 0; i < ftype->GetNumParameters(); ++i) {
for (unsigned int i = 0; i < argTypes.size(); ++i) { const Type *type = ftype->GetParameterType(i);
const Type *type = argTypes[i];
if (dynamic_cast<const StructType *>(type) != NULL) { if (dynamic_cast<const StructType *>(type) != NULL) {
Error(pos, "Passing structs to/from application functions is currently broken. " Error(pos, "Passing structs to/from application functions is "
"Use a reference or const reference instead for now."); "currently broken. Use a pointer or const pointer to the "
"struct instead for now.");
return; return;
} }
} }
@@ -483,27 +485,32 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
bool seenDefaultArg = false; bool seenDefaultArg = false;
int nArgs = functionType->GetNumParameters(); int nArgs = functionType->GetNumParameters();
for (int i = 0; i < nArgs; ++i) { for (int i = 0; i < nArgs; ++i) {
const Type *argType = (functionType->GetArgumentTypes())[i]; const Type *argType = functionType->GetParameterType(i);
const std::string &argName = functionType->GetArgumentName(i); const std::string &argName = functionType->GetParameterName(i);
ConstExpr *defaultValue = (functionType->GetArgumentDefaults())[i]; ConstExpr *defaultValue = functionType->GetParameterDefault(i);
const SourcePos &argPos = (functionType->GetArgumentSourcePos())[i]; const SourcePos &argPos = functionType->GetParameterSourcePos(i);
// If the function is exported, make sure that the parameter // If the function is exported, make sure that the parameter
// doesn't have any varying stuff going on in it. // doesn't have any varying stuff going on in it.
if (funSym->storageClass == SC_EXPORT) if (funSym->storageClass == SC_EXPORT)
lCheckForVaryingParameter(argType, argName, argPos); lCheckForVaryingParameter(argType, argName, argPos);
// ISPC assumes that all memory passed in is aligned to the native // ISPC assumes that no pointers alias. (It should be possible to
// width and that no pointers alias. (It should be possible to
// specify when this is not the case, but this should be the // specify when this is not the case, but this should be the
// default.) Set parameter attributes accordingly. // default.) Set parameter attributes accordingly. (Only for
// uniform pointers, since varying pointers are int vectors...)
if (!functionType->isTask && if (!functionType->isTask &&
dynamic_cast<const ReferenceType *>(argType) != NULL) { ((dynamic_cast<const PointerType *>(argType) != NULL &&
argType->IsUniformType()) ||
dynamic_cast<const ReferenceType *>(argType) != NULL)) {
// NOTE: LLVM indexes function parameters starting from 1. // NOTE: LLVM indexes function parameters starting from 1.
// This is unintuitive. // This is unintuitive.
function->setDoesNotAlias(i+1, true); function->setDoesNotAlias(i+1, true);
#if 0
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth); int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
function->addAttribute(i+1, llvm::Attribute::constructAlignmentFromInt(align)); function->addAttribute(i+1, llvm::Attribute::constructAlignmentFromInt(align));
#endif
} }
if (symbolTable->LookupFunction(argName.c_str()) != NULL) if (symbolTable->LookupFunction(argName.c_str()) != NULL)
@@ -887,6 +894,9 @@ lGetExportedTypes(const Type *type,
if (dynamic_cast<const ReferenceType *>(type) != NULL) if (dynamic_cast<const ReferenceType *>(type) != NULL)
lGetExportedTypes(type->GetReferenceTarget(), exportedStructTypes, lGetExportedTypes(type->GetReferenceTarget(), exportedStructTypes,
exportedEnumTypes, exportedVectorTypes); exportedEnumTypes, exportedVectorTypes);
else if (dynamic_cast<const PointerType *>(type) != NULL)
lGetExportedTypes(type->GetBaseType(), exportedStructTypes,
exportedEnumTypes, exportedVectorTypes);
else if (arrayType != NULL) else if (arrayType != NULL)
lGetExportedTypes(arrayType->GetElementType(), exportedStructTypes, lGetExportedTypes(arrayType->GetElementType(), exportedStructTypes,
exportedEnumTypes, exportedVectorTypes); exportedEnumTypes, exportedVectorTypes);
@@ -920,9 +930,8 @@ lGetExportedParamTypes(const std::vector<Symbol *> &funcs,
exportedEnumTypes, exportedVectorTypes); exportedEnumTypes, exportedVectorTypes);
// And now the parameter types... // And now the parameter types...
const std::vector<const Type *> &argTypes = ftype->GetArgumentTypes(); for (int j = 0; j < ftype->GetNumParameters(); ++j)
for (unsigned int j = 0; j < argTypes.size(); ++j) lGetExportedTypes(ftype->GetParameterType(j), exportedStructTypes,
lGetExportedTypes(argTypes[j], exportedStructTypes,
exportedEnumTypes, exportedVectorTypes); exportedEnumTypes, exportedVectorTypes);
} }
} }

1525
opt.cpp

File diff suppressed because it is too large Load Diff

View File

@@ -104,14 +104,14 @@ static const char *lBuiltinTokens[] = {
"cif", "cwhile", "const", "continue", "creturn", "default", "do", "double", "cif", "cwhile", "const", "continue", "creturn", "default", "do", "double",
"else", "enum", "export", "extern", "false", "float", "for", "goto", "if", "else", "enum", "export", "extern", "false", "float", "for", "goto", "if",
"inline", "int", "int8", "int16", "int32", "int64", "launch", "NULL", "inline", "int", "int8", "int16", "int32", "int64", "launch", "NULL",
"print", "reference", "return", "print", "return", "sizeof",
"static", "struct", "switch", "sync", "task", "true", "typedef", "uniform", "static", "struct", "switch", "sync", "task", "true", "typedef", "uniform",
"unsigned", "varying", "void", "while", NULL "unsigned", "varying", "void", "while", NULL
}; };
static const char *lParamListTokens[] = { static const char *lParamListTokens[] = {
"bool", "const", "double", "enum", "false", "float", "int", "bool", "const", "double", "enum", "false", "float", "int",
"int8", "int16", "int32", "int64", "reference", "struct", "true", "int8", "int16", "int32", "int64", "struct", "true",
"uniform", "unsigned", "varying", "void", NULL "uniform", "unsigned", "varying", "void", NULL
}; };
@@ -152,12 +152,13 @@ static const char *lParamListTokens[] = {
%token TOKEN_AND_OP TOKEN_OR_OP TOKEN_MUL_ASSIGN TOKEN_DIV_ASSIGN TOKEN_MOD_ASSIGN %token TOKEN_AND_OP TOKEN_OR_OP TOKEN_MUL_ASSIGN TOKEN_DIV_ASSIGN TOKEN_MOD_ASSIGN
%token TOKEN_ADD_ASSIGN TOKEN_SUB_ASSIGN TOKEN_LEFT_ASSIGN TOKEN_RIGHT_ASSIGN %token TOKEN_ADD_ASSIGN TOKEN_SUB_ASSIGN TOKEN_LEFT_ASSIGN TOKEN_RIGHT_ASSIGN
%token TOKEN_AND_ASSIGN TOKEN_OR_ASSIGN TOKEN_XOR_ASSIGN %token TOKEN_AND_ASSIGN TOKEN_OR_ASSIGN TOKEN_XOR_ASSIGN
%token TOKEN_SIZEOF
%token TOKEN_EXTERN TOKEN_EXPORT TOKEN_STATIC TOKEN_INLINE TOKEN_TASK %token TOKEN_EXTERN TOKEN_EXPORT TOKEN_STATIC TOKEN_INLINE TOKEN_TASK
%token TOKEN_UNIFORM TOKEN_VARYING TOKEN_TYPEDEF TOKEN_SOA %token TOKEN_UNIFORM TOKEN_VARYING TOKEN_TYPEDEF TOKEN_SOA
%token TOKEN_CHAR TOKEN_INT TOKEN_UNSIGNED TOKEN_FLOAT TOKEN_DOUBLE %token TOKEN_CHAR TOKEN_INT TOKEN_UNSIGNED TOKEN_FLOAT TOKEN_DOUBLE
%token TOKEN_INT8 TOKEN_INT16 TOKEN_INT64 TOKEN_CONST TOKEN_VOID TOKEN_BOOL %token TOKEN_INT8 TOKEN_INT16 TOKEN_INT64 TOKEN_CONST TOKEN_VOID TOKEN_BOOL
%token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE TOKEN_REFERENCE %token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE
%token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH %token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH
%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH %token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH
@@ -183,7 +184,8 @@ static const char *lParamListTokens[] = {
%type <declaration> declaration parameter_declaration %type <declaration> declaration parameter_declaration
%type <declarators> init_declarator_list %type <declarators> init_declarator_list
%type <declarationList> parameter_list parameter_type_list %type <declarationList> parameter_list parameter_type_list
%type <declarator> declarator pointer init_declarator direct_declarator struct_declarator %type <declarator> declarator pointer reference
%type <declarator> init_declarator direct_declarator struct_declarator
%type <declarator> abstract_declarator direct_abstract_declarator %type <declarator> abstract_declarator direct_abstract_declarator
%type <structDeclaratorList> struct_declarator_list %type <structDeclaratorList> struct_declarator_list
@@ -289,10 +291,9 @@ postfix_expression
{ $$ = new FunctionCallExpr($1, $3, Union(@1,@4)); } { $$ = new FunctionCallExpr($1, $3, Union(@1,@4)); }
| launch_expression | launch_expression
| postfix_expression '.' TOKEN_IDENTIFIER | postfix_expression '.' TOKEN_IDENTIFIER
{ $$ = MemberExpr::create($1, yytext, Union(@1,@3), @3); } { $$ = MemberExpr::create($1, yytext, Union(@1,@3), @3, false); }
/* | postfix_expression TOKEN_PTR_OP TOKEN_IDENTIFIER | postfix_expression TOKEN_PTR_OP TOKEN_IDENTIFIER
{ UNIMPLEMENTED } { $$ = MemberExpr::create($1, yytext, Union(@1,@3), @3, true); }
*/
| postfix_expression TOKEN_INC_OP | postfix_expression TOKEN_INC_OP
{ $$ = new UnaryExpr(UnaryExpr::PostInc, $1, Union(@1,@2)); } { $$ = new UnaryExpr(UnaryExpr::PostInc, $1, Union(@1,@2)); }
| postfix_expression TOKEN_DEC_OP | postfix_expression TOKEN_DEC_OP
@@ -317,6 +318,10 @@ unary_expression
{ $$ = new UnaryExpr(UnaryExpr::PreInc, $2, Union(@1, @2)); } { $$ = new UnaryExpr(UnaryExpr::PreInc, $2, Union(@1, @2)); }
| TOKEN_DEC_OP unary_expression | TOKEN_DEC_OP unary_expression
{ $$ = new UnaryExpr(UnaryExpr::PreDec, $2, Union(@1, @2)); } { $$ = new UnaryExpr(UnaryExpr::PreDec, $2, Union(@1, @2)); }
| '&' unary_expression
{ $$ = new AddressOfExpr($2, Union(@1, @2)); }
| '*' unary_expression
{ $$ = new DereferenceExpr($2, Union(@1, @2)); }
| '+' cast_expression | '+' cast_expression
{ $$ = $2; } { $$ = $2; }
| '-' cast_expression | '-' cast_expression
@@ -325,6 +330,10 @@ unary_expression
{ $$ = new UnaryExpr(UnaryExpr::BitNot, $2, Union(@1, @2)); } { $$ = new UnaryExpr(UnaryExpr::BitNot, $2, Union(@1, @2)); }
| '!' cast_expression | '!' cast_expression
{ $$ = new UnaryExpr(UnaryExpr::LogicalNot, $2, Union(@1, @2)); } { $$ = new UnaryExpr(UnaryExpr::LogicalNot, $2, Union(@1, @2)); }
| TOKEN_SIZEOF unary_expression
{ $$ = new SizeOfExpr($2, Union(@1, @2)); }
| TOKEN_SIZEOF '(' type_name ')'
{ $$ = new SizeOfExpr($3, Union(@1, @4)); }
; ;
cast_expression cast_expression
@@ -711,8 +720,6 @@ specifier_qualifier_list
$$ = $2->GetAsUniformType(); $$ = $2->GetAsUniformType();
else if ($1 == TYPEQUAL_VARYING) else if ($1 == TYPEQUAL_VARYING)
$$ = $2->GetAsVaryingType(); $$ = $2->GetAsVaryingType();
else if ($1 == TYPEQUAL_REFERENCE)
$$ = new ReferenceType($2, false);
else if ($1 == TYPEQUAL_CONST) else if ($1 == TYPEQUAL_CONST)
$$ = $2->GetAsConstType(); $$ = $2->GetAsConstType();
else if ($1 == TYPEQUAL_UNSIGNED) { else if ($1 == TYPEQUAL_UNSIGNED) {
@@ -860,7 +867,6 @@ type_qualifier
| TOKEN_VARYING { $$ = TYPEQUAL_VARYING; } | TOKEN_VARYING { $$ = TYPEQUAL_VARYING; }
| TOKEN_TASK { $$ = TYPEQUAL_TASK; } | TOKEN_TASK { $$ = TYPEQUAL_TASK; }
| TOKEN_INLINE { $$ = TYPEQUAL_INLINE; } | TOKEN_INLINE { $$ = TYPEQUAL_INLINE; }
| TOKEN_REFERENCE { $$ = TYPEQUAL_REFERENCE; }
| TOKEN_UNSIGNED { $$ = TYPEQUAL_UNSIGNED; } | TOKEN_UNSIGNED { $$ = TYPEQUAL_UNSIGNED; }
; ;
@@ -884,6 +890,14 @@ declarator
tail->child = $2; tail->child = $2;
$$ = $1; $$ = $1;
} }
| reference direct_declarator
{
Declarator *tail = $1;
while (tail->child != NULL)
tail = tail->child;
tail->child = $2;
$$ = $1;
}
| direct_declarator | direct_declarator
; ;
@@ -930,7 +944,7 @@ direct_declarator
if ($1 != NULL) { if ($1 != NULL) {
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4)); Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4));
d->child = $1; d->child = $1;
d->functionArgs = *$3; if ($3 != NULL) d->functionParams = *$3;
$$ = d; $$ = d;
} }
else else
@@ -976,6 +990,14 @@ pointer
; ;
reference
: '&'
{
$$ = new Declarator(DK_REFERENCE, @1);
}
;
parameter_type_list parameter_type_list
: parameter_list { $$ = $1; } : parameter_list { $$ = $1; }
; ;
@@ -1067,6 +1089,17 @@ abstract_declarator
d->child = $2; d->child = $2;
$$ = d; $$ = d;
} }
| reference
{
Declarator *d = new Declarator(DK_REFERENCE, @1);
$$ = d;
}
| reference direct_abstract_declarator
{
Declarator *d = new Declarator(DK_REFERENCE, Union(@1, @2));
d->child = $2;
$$ = d;
}
; ;
direct_abstract_declarator direct_abstract_declarator
@@ -1113,7 +1146,7 @@ direct_abstract_declarator
| '(' parameter_type_list ')' | '(' parameter_type_list ')'
{ {
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @3)); Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @3));
d->functionArgs = *$2; if ($2 != NULL) d->functionParams = *$2;
} }
| direct_abstract_declarator '(' ')' | direct_abstract_declarator '(' ')'
{ {
@@ -1125,7 +1158,7 @@ direct_abstract_declarator
{ {
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4)); Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4));
d->child = $1; d->child = $1;
d->functionArgs = *$3; if ($3 != NULL) d->functionParams = *$3;
$$ = d; $$ = d;
} }
; ;
@@ -1370,9 +1403,9 @@ function_definition
} }
compound_statement compound_statement
{ {
Symbol *sym;
std::vector<Symbol *> args; std::vector<Symbol *> args;
$2->GetFunctionInfo($1, &sym, &args); Symbol *sym = $2->GetFunctionInfo($1, &args);
if (sym != NULL)
m->AddFunctionDefinition(sym, args, $4); m->AddFunctionDefinition(sym, args, $4);
m->symbolTable->PopScope(); // push in lAddFunctionParams(); m->symbolTable->PopScope(); // push in lAddFunctionParams();
} }
@@ -1397,14 +1430,12 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
if (ds->storageClass == SC_TYPEDEF) if (ds->storageClass == SC_TYPEDEF)
m->AddTypeDef(decl->GetSymbol()); m->AddTypeDef(decl->GetSymbol());
else if (decl->kind == DK_FUNCTION) { else {
// function declaration
const Type *t = decl->GetType(ds); const Type *t = decl->GetType(ds);
if (t == NULL) if (t == NULL)
return; return;
const FunctionType *ft = dynamic_cast<const FunctionType *>(t); const FunctionType *ft = dynamic_cast<const FunctionType *>(t);
assert(ft != NULL); if (ft != NULL) {
Symbol *funSym = decl->GetSymbol(); Symbol *funSym = decl->GetSymbol();
assert(funSym != NULL); assert(funSym != NULL);
funSym->type = ft; funSym->type = ft;
@@ -1416,6 +1447,7 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
else else
m->AddGlobalVariable(decl->GetSymbol(), decl->initExpr, m->AddGlobalVariable(decl->GetSymbol(), decl->initExpr,
(ds->typeQualifiers & TYPEQUAL_CONST) != 0); (ds->typeQualifiers & TYPEQUAL_CONST) != 0);
}
} }
@@ -1426,9 +1458,14 @@ static void
lAddFunctionParams(Declarator *decl) { lAddFunctionParams(Declarator *decl) {
m->symbolTable->PushScope(); m->symbolTable->PushScope();
// wire up arguments // walk down to the declarator for the function itself
for (unsigned int i = 0; i < decl->functionArgs.size(); ++i) { while (decl->kind != DK_FUNCTION && decl->child != NULL)
Declaration *pdecl = decl->functionArgs[i]; decl = decl->child;
assert(decl->kind == DK_FUNCTION);
// now loop over its parameters and add them to the symbol table
for (unsigned int i = 0; i < decl->functionParams.size(); ++i) {
Declaration *pdecl = decl->functionParams[i];
if (pdecl == NULL) if (pdecl == NULL)
continue; continue;
assert(pdecl->declarators.size() == 1); assert(pdecl->declarators.size() == 1);

View File

@@ -40,7 +40,8 @@ parser.add_option('-o', '--no-opt', dest='no_opt', help='Disable optimization',
# if no specific test files are specified, run all of the tests in tests/ # if no specific test files are specified, run all of the tests in tests/
# and failing_tests/ # and failing_tests/
if len(args) == 0: if len(args) == 0:
files = glob.glob("tests/*ispc") + glob.glob("failing_tests/*ispc") files = glob.glob("tests/*ispc") + glob.glob("failing_tests/*ispc") + \
glob.glob("tests_errors/*ispc")
else: else:
files = args files = args

View File

@@ -319,85 +319,89 @@ static inline uniform int lanemask() {
// AOS/SOA conversion // AOS/SOA conversion
static inline void static inline void
aos_to_soa3(uniform float a[], uniform int offset, reference float v0, aos_to_soa3(uniform float a[], uniform int offset, float * uniform v0,
reference float v1, reference float v2) { float * uniform v1, float * uniform v2) {
__aos_to_soa3_float(a, offset, v0, v1, v2); __aos_to_soa3_float(&a[0], offset, v0, v1, v2);
} }
static inline void static inline void
soa_to_aos3(float v0, float v1, float v2, uniform float a[], soa_to_aos3(float v0, float v1, float v2, uniform float a[],
uniform int offset) { uniform int offset) {
__soa_to_aos3_float(v0, v1, v2, a, offset); __soa_to_aos3_float(v0, v1, v2, &a[0], offset);
} }
static inline void static inline void
aos_to_soa4(uniform float a[], uniform int offset, reference float v0, aos_to_soa4(uniform float a[], uniform int offset, float * uniform v0,
reference float v1, reference float v2, reference float v3) { float * uniform v1, float * uniform v2, float * uniform v3) {
__aos_to_soa4_float(a, offset, v0, v1, v2, v3); __aos_to_soa4_float(&a[0], offset, v0, v1, v2, v3);
} }
static inline void static inline void
soa_to_aos4(float v0, float v1, float v2, float v3, uniform float a[], soa_to_aos4(float v0, float v1, float v2, float v3, uniform float a[],
uniform int offset) { uniform int offset) {
__soa_to_aos4_float(v0, v1, v2, v3, a, offset); __soa_to_aos4_float(v0, v1, v2, v3, &a[0], offset);
} }
static inline void static inline void
aos_to_soa3(uniform int32 a[], uniform int offset, reference int32 v0, aos_to_soa3(uniform int32 a[], uniform int offset, int32 * uniform v0,
reference int32 v1, reference int32 v2) { int32 * uniform v1, int32 * uniform v2) {
__aos_to_soa3_int32(a, offset, v0, v1, v2); __aos_to_soa3_int32(&a[0], offset, v0, v1, v2);
} }
static inline void static inline void
soa_to_aos3(int32 v0, int32 v1, int32 v2, uniform int32 a[], soa_to_aos3(int32 v0, int32 v1, int32 v2, uniform int32 a[],
uniform int offset) { uniform int offset) {
__soa_to_aos3_int32(v0, v1, v2, a, offset); __soa_to_aos3_int32(v0, v1, v2, &a[0], offset);
} }
static inline void static inline void
aos_to_soa4(uniform int32 a[], uniform int offset, reference int32 v0, aos_to_soa4(uniform int32 a[], uniform int offset, int32 * uniform v0,
reference int32 v1, reference int32 v2, reference int32 v3) { int32 * uniform v1, int32 * uniform v2, int32 * uniform v3) {
__aos_to_soa4_int32(a, offset, v0, v1, v2, v3); __aos_to_soa4_int32(&a[0], offset, v0, v1, v2, v3);
} }
static inline void static inline void
soa_to_aos4(int32 v0, int32 v1, int32 v2, int32 v3, uniform int32 a[], soa_to_aos4(int32 v0, int32 v1, int32 v2, int32 v3, uniform int32 a[],
uniform int offset) { uniform int offset) {
__soa_to_aos4_int32(v0, v1, v2, v3, a, offset); __soa_to_aos4_int32(v0, v1, v2, v3, &a[0], offset);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// Prefetching // Prefetching
#define PREFETCHES(NAME, TYPE) \ static inline void prefetch_l1(const void * uniform ptr) {
static inline void prefetch_l1(const reference TYPE ptr) { \ __prefetch_read_uniform_1((uniform int8 * uniform)ptr);
__prefetch_read_1_##NAME##_refsconst(ptr); \
} \
static inline void prefetch_l2(const reference TYPE ptr) { \
__prefetch_read_2_##NAME##_refsconst(ptr); \
} \
static inline void prefetch_l3(const reference TYPE ptr) { \
__prefetch_read_3_##NAME##_refsconst(ptr); \
} \
static inline void prefetch_nt(const reference TYPE ptr) { \
__prefetch_read_nt_##NAME##_refsconst(ptr); \
} }
PREFETCHES(uniform_int8, uniform int8) static inline void prefetch_l2(const void * uniform ptr) {
PREFETCHES(uniform_int16, uniform int16) __prefetch_read_uniform_2((uniform int8 * uniform)ptr);
PREFETCHES(uniform_int32, uniform int32) }
PREFETCHES(uniform_int64, uniform int64)
PREFETCHES(uniform_float, uniform float)
PREFETCHES(uniform_double, uniform double)
PREFETCHES(varying_int8, int8) static inline void prefetch_l3(const void * uniform ptr) {
PREFETCHES(varying_int16, int16) __prefetch_read_uniform_3((uniform int8 * uniform)ptr);
PREFETCHES(varying_int32, int32) }
PREFETCHES(varying_int64, int64)
PREFETCHES(varying_float, float)
PREFETCHES(varying_double, double)
#undef PREFETCHES static inline void prefetch_nt(const void * uniform ptr) {
__prefetch_read_uniform_nt((uniform int8 * uniform)ptr);
}
#if 0
static inline void prefetch_l1(const void * varying ptr) {
__prefetch_read_varying_1((varying int8 * varying)ptr);
}
static inline void prefetch_l2(const void * varying ptr) {
__prefetch_read_varying_2((varying int8 * varying)ptr);
}
static inline void prefetch_l3(const void * varying ptr) {
__prefetch_read_varying_3((varying int8 * varying)ptr);
}
static inline void prefetch_nt(const void * varying ptr) {
__prefetch_read_varying_nt((varying int8 * varying)ptr);
}
#endif
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// Horizontal ops / reductions // Horizontal ops / reductions
@@ -525,9 +529,9 @@ static inline uniform unsigned int64 reduce_max(unsigned int64 v) {
#define REDUCE_EQUAL(TYPE, FUNCTYPE, MASKTYPE) \ #define REDUCE_EQUAL(TYPE, FUNCTYPE, MASKTYPE) \
static inline uniform bool reduce_equal(TYPE v) { \ static inline uniform bool reduce_equal(TYPE v) { \
uniform TYPE unusedValue; \ uniform TYPE unusedValue; \
return __reduce_equal_##FUNCTYPE(v, unusedValue, (MASKTYPE)__mask); \ return __reduce_equal_##FUNCTYPE(v, &unusedValue, (MASKTYPE)__mask); \
} \ } \
static inline uniform bool reduce_equal(TYPE v, reference uniform TYPE value) { \ static inline uniform bool reduce_equal(TYPE v, uniform TYPE * uniform value) { \
return __reduce_equal_##FUNCTYPE(v, value, (MASKTYPE)__mask); \ return __reduce_equal_##FUNCTYPE(v, value, (MASKTYPE)__mask); \
} }
@@ -599,26 +603,26 @@ static unsigned int64 exclusive_scan_or(unsigned int64 v) {
static inline uniform int static inline uniform int
packed_load_active(uniform unsigned int a[], uniform int start, packed_load_active(uniform unsigned int a[], uniform int start,
reference unsigned int vals) { unsigned int * uniform vals) {
return __packed_load_active(a, (unsigned int)start, vals, return __packed_load_active(&a[0], (unsigned int)start, vals,
(unsigned int32)__mask); (unsigned int32)__mask);
} }
static inline uniform int static inline uniform int
packed_store_active(uniform unsigned int a[], uniform int start, packed_store_active(uniform unsigned int a[], uniform int start,
unsigned int vals) { unsigned int vals) {
return __packed_store_active(a, (unsigned int)start, vals, return __packed_store_active(&a[0], (unsigned int)start, vals,
(unsigned int32)__mask); (unsigned int32)__mask);
} }
static inline uniform int packed_load_active(uniform int a[], uniform int start, static inline uniform int packed_load_active(uniform int a[], uniform int start,
reference int vals) { int * uniform vals) {
return __packed_load_active(a, start, vals, (int32)__mask); return __packed_load_active(&a[0], start, vals, (int32)__mask);
} }
static inline uniform int packed_store_active(uniform int a[], uniform int start, static inline uniform int packed_store_active(uniform int a[], uniform int start,
int vals) { int vals) {
return __packed_store_active(a, start, vals, (int32)__mask); return __packed_store_active(&a[0], start, vals, (int32)__mask);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -636,35 +640,35 @@ static inline void memory_barrier() {
} }
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \ #define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \
static inline TA atomic_##OPA##_global(uniform reference TA ref, TA value) { \ static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
memory_barrier(); \ memory_barrier(); \
TA ret = __atomic_##OPB##_##TB##_global(ref, value, (MASKTYPE)__mask); \ TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
memory_barrier(); \ memory_barrier(); \
return ret; \ return ret; \
} \ } \
static inline uniform TA atomic_##OPA##_global(uniform reference TA ref, \ static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
uniform TA value) { \ uniform TA value) { \
memory_barrier(); \ memory_barrier(); \
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ref, value, (MASKTYPE)__mask); \ uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value, (MASKTYPE)__mask); \
memory_barrier(); \ memory_barrier(); \
return ret; \ return ret; \
} }
#define DEFINE_ATOMIC_MINMAX_OP(TA,TB,OPA,OPB, MASKTYPE) \ #define DEFINE_ATOMIC_MINMAX_OP(TA,TB,OPA,OPB, MASKTYPE) \
static inline TA atomic_##OPA##_global(uniform reference TA ref, TA value) { \ static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
uniform TA oneval = reduce_##OPA(value); \ uniform TA oneval = reduce_##OPA(value); \
TA ret; \ TA ret; \
if (lanemask() != 0) { \ if (lanemask() != 0) { \
memory_barrier(); \ memory_barrier(); \
ret = __atomic_##OPB##_uniform_##TB##_global(ref, oneval, (MASKTYPE)__mask); \ ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval, (MASKTYPE)__mask); \
memory_barrier(); \ memory_barrier(); \
} \ } \
return ret; \ return ret; \
} \ } \
static inline uniform TA atomic_##OPA##_global(uniform reference TA ref, \ static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
uniform TA value) { \ uniform TA value) { \
memory_barrier(); \ memory_barrier(); \
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ref, value, (MASKTYPE)__mask); \ uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value, (MASKTYPE)__mask); \
memory_barrier(); \ memory_barrier(); \
return ret; \ return ret; \
} }
@@ -717,16 +721,16 @@ DEFINE_ATOMIC_OP(double,double,swap,swap,int32)
#define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \ #define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \
static inline TA atomic_compare_exchange_global( \ static inline TA atomic_compare_exchange_global( \
uniform reference TA ref, TA oldval, TA newval) { \ uniform TA * uniform ptr, TA oldval, TA newval) { \
memory_barrier(); \ memory_barrier(); \
TA ret = __atomic_compare_exchange_##TB##_global(ref, oldval, newval, (MASKTYPE)__mask); \ TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval, (MASKTYPE)__mask); \
memory_barrier(); \ memory_barrier(); \
return ret; \ return ret; \
} \ } \
static inline uniform TA atomic_compare_exchange_global( \ static inline uniform TA atomic_compare_exchange_global( \
uniform reference TA ref, uniform TA oldval, uniform TA newval) { \ uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
memory_barrier(); \ memory_barrier(); \
uniform TA ret = __atomic_compare_exchange_uniform_##TB##_global(ref, oldval, newval, (MASKTYPE)__mask); \ uniform TA ret = __atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval, (MASKTYPE)__mask); \
memory_barrier(); \ memory_barrier(); \
return ret; \ return ret; \
} }
@@ -1162,22 +1166,22 @@ static inline uniform float ldexp(uniform float x, uniform int n) {
return floatbits(ix); return floatbits(ix);
} }
static inline float frexp(float x, reference int pw2) { static inline float frexp(float x, int * uniform pw2) {
unsigned int ex = 0x7F800000u; // exponent mask unsigned int ex = 0x7F800000u; // exponent mask
unsigned int ix = intbits(x); unsigned int ix = intbits(x);
ex &= ix; ex &= ix;
ix &= ~0x7F800000u; // clear exponent ix &= ~0x7F800000u; // clear exponent
pw2 = (int)(ex >> 23) - 126; // compute exponent *pw2 = (int)(ex >> 23) - 126; // compute exponent
ix |= 0x3F000000u; // insert exponent +1 in x ix |= 0x3F000000u; // insert exponent +1 in x
return floatbits(ix); return floatbits(ix);
} }
static inline uniform float frexp(uniform float x, reference uniform int pw2) { static inline uniform float frexp(uniform float x, uniform int * uniform pw2) {
uniform unsigned int ex = 0x7F800000u; // exponent mask uniform unsigned int ex = 0x7F800000u; // exponent mask
uniform unsigned int ix = intbits(x); uniform unsigned int ix = intbits(x);
ex &= ix; ex &= ix;
ix &= ~0x7F800000u; // clear exponent ix &= ~0x7F800000u; // clear exponent
pw2 = (uniform int)(ex >> 23) - 126; // compute exponent *pw2 = (uniform int)(ex >> 23) - 126; // compute exponent
ix |= 0x3F000000u; // insert exponent +1 in x ix |= 0x3F000000u; // insert exponent +1 in x
return floatbits(ix); return floatbits(ix);
} }
@@ -1441,7 +1445,8 @@ static inline uniform float cos(uniform float x_full) {
} }
static inline void sincos(float x_full, reference float sin_result, reference float cos_result) { static inline void sincos(float x_full, float * uniform sin_result,
float * uniform cos_result) {
if (__math_lib == __math_lib_svml) { if (__math_lib == __math_lib_svml) {
__svml_sincos(x_full, sin_result, cos_result); __svml_sincos(x_full, sin_result, cos_result);
} }
@@ -1451,9 +1456,9 @@ static inline void sincos(float x_full, reference float sin_result, reference fl
if ((mask & (1 << i)) == 0) if ((mask & (1 << i)) == 0)
continue; continue;
uniform float s, c; uniform float s, c;
__stdlib_sincosf(extract(x_full, i), s, c); __stdlib_sincosf(extract(x_full, i), &s, &c);
sin_result = insert(sin_result, i, s); *sin_result = insert(*sin_result, i, s);
cos_result = insert(cos_result, i, c); *cos_result = insert(*cos_result, i, c);
} }
} }
else if (__math_lib == __math_lib_ispc || else if (__math_lib == __math_lib_ispc ||
@@ -1503,17 +1508,17 @@ static inline void sincos(float x_full, reference float sin_result, reference fl
sin_formula *= x; sin_formula *= x;
sin_result = sin_usecos ? cos_formula : sin_formula; *sin_result = sin_usecos ? cos_formula : sin_formula;
cos_result = cos_usecos ? cos_formula : sin_formula; *cos_result = cos_usecos ? cos_formula : sin_formula;
sin_result = sin_flipsign ? -sin_result : sin_result; *sin_result = sin_flipsign ? -*sin_result : *sin_result;
cos_result = cos_flipsign ? -cos_result : cos_result; *cos_result = cos_flipsign ? -*cos_result : *cos_result;
} }
} }
static inline void sincos(uniform float x_full, reference uniform float sin_result, static inline void sincos(uniform float x_full, uniform float * uniform sin_result,
reference uniform float cos_result) { uniform float * uniform cos_result) {
if (__math_lib == __math_lib_system || if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) { __math_lib == __math_lib_svml) {
__stdlib_sincosf(x_full, sin_result, cos_result); __stdlib_sincosf(x_full, sin_result, cos_result);
@@ -1565,11 +1570,11 @@ static inline void sincos(uniform float x_full, reference uniform float sin_resu
sin_formula *= x; sin_formula *= x;
sin_result = sin_usecos ? cos_formula : sin_formula; *sin_result = sin_usecos ? cos_formula : sin_formula;
cos_result = cos_usecos ? cos_formula : sin_formula; *cos_result = cos_usecos ? cos_formula : sin_formula;
sin_result = sin_flipsign ? -sin_result : sin_result; *sin_result = sin_flipsign ? -*sin_result : *sin_result;
cos_result = cos_flipsign ? -cos_result : cos_result; *cos_result = cos_flipsign ? -*cos_result : *cos_result;
} }
} }
@@ -2038,7 +2043,8 @@ static inline uniform float exp(uniform float x_full) {
// Range reduction for logarithms takes log(x) -> log(2^n * y) -> n // Range reduction for logarithms takes log(x) -> log(2^n * y) -> n
// * log(2) + log(y) where y is the reduced range (usually in [1/2, // * log(2) + log(y) where y is the reduced range (usually in [1/2,
// 1)). // 1)).
static inline void __range_reduce_log(float input, reference float reduced, reference int exponent) { static inline void __range_reduce_log(float input, float * uniform reduced,
int * uniform exponent) {
int int_version = intbits(input); int int_version = intbits(input);
// single precision = SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM // single precision = SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM
// exponent mask = 0111 1111 1000 0000 0000 0000 0000 0000 // exponent mask = 0111 1111 1000 0000 0000 0000 0000 0000
@@ -2057,28 +2063,28 @@ static inline void __range_reduce_log(float input, reference float reduced, refe
int biased_exponent = int_version >> 23; // This number is [0, 255] but it means [-127, 128] int biased_exponent = int_version >> 23; // This number is [0, 255] but it means [-127, 128]
int offset_exponent = biased_exponent + 1; // Treat the number as if it were 2^{e+1} * (1.m)/2 int offset_exponent = biased_exponent + 1; // Treat the number as if it were 2^{e+1} * (1.m)/2
exponent = offset_exponent - 127; // get the real value *exponent = offset_exponent - 127; // get the real value
// Blend the offset_exponent with the original input (do this in // Blend the offset_exponent with the original input (do this in
// int for now, until I decide if float can have & and &not) // int for now, until I decide if float can have & and &not)
int blended = (int_version & nonexponent_mask) | (exponent_neg1); int blended = (int_version & nonexponent_mask) | (exponent_neg1);
reduced = floatbits(blended); *reduced = floatbits(blended);
} }
static inline void __range_reduce_log(uniform float input, reference uniform float reduced, static inline void __range_reduce_log(uniform float input, uniform float * uniform reduced,
reference uniform int exponent) { uniform int * uniform exponent) {
uniform int int_version = intbits(input); uniform int int_version = intbits(input);
static const uniform int nonexponent_mask = 0x807FFFFF; static const uniform int nonexponent_mask = 0x807FFFFF;
static const uniform int exponent_neg1 = (126 << 23); static const uniform int exponent_neg1 = (126 << 23);
uniform int biased_exponent = int_version >> 23; uniform int biased_exponent = int_version >> 23;
uniform int offset_exponent = biased_exponent + 1; uniform int offset_exponent = biased_exponent + 1;
exponent = offset_exponent - 127; // get the real value *exponent = offset_exponent - 127; // get the real value
uniform int blended = (int_version & nonexponent_mask) | (exponent_neg1); uniform int blended = (int_version & nonexponent_mask) | (exponent_neg1);
reduced = floatbits(blended); *reduced = floatbits(blended);
} }
@@ -2099,7 +2105,7 @@ static inline float log(float x_full) {
} }
else if (__math_lib == __math_lib_ispc_fast) { else if (__math_lib == __math_lib_ispc_fast) {
int e; int e;
x_full = frexp(x_full, e); x_full = frexp(x_full, &e);
int x_smaller_SQRTHF = (0.707106781186547524f > x_full) ? 0xffffffff : 0; int x_smaller_SQRTHF = (0.707106781186547524f > x_full) ? 0xffffffff : 0;
e += x_smaller_SQRTHF; e += x_smaller_SQRTHF;
@@ -2139,7 +2145,7 @@ static inline float log(float x_full) {
const float one = 1.0; const float one = 1.0;
float patched = exceptional ? one : x_full; float patched = exceptional ? one : x_full;
__range_reduce_log(patched, reduced, exponent); __range_reduce_log(patched, &reduced, &exponent);
const float ln2 = 0.693147182464599609375; const float ln2 = 0.693147182464599609375;
@@ -2179,7 +2185,7 @@ static inline uniform float log(uniform float x_full) {
} }
else if (__math_lib == __math_lib_ispc_fast) { else if (__math_lib == __math_lib_ispc_fast) {
uniform int e; uniform int e;
x_full = frexp(x_full, e); x_full = frexp(x_full, &e);
uniform int x_smaller_SQRTHF = (0.707106781186547524f > x_full) ? 0xffffffff : 0; uniform int x_smaller_SQRTHF = (0.707106781186547524f > x_full) ? 0xffffffff : 0;
e += x_smaller_SQRTHF; e += x_smaller_SQRTHF;
@@ -2219,7 +2225,7 @@ static inline uniform float log(uniform float x_full) {
const uniform float one = 1.0; const uniform float one = 1.0;
uniform float patched = exceptional ? one : x_full; uniform float patched = exceptional ? one : x_full;
__range_reduce_log(patched, reduced, exponent); __range_reduce_log(patched, &reduced, &exponent);
const uniform float ln2 = 0.693147182464599609375; const uniform float ln2 = 0.693147182464599609375;
@@ -2315,22 +2321,22 @@ static inline uniform double ldexp(uniform double x, uniform int n) {
return doublebits(ix); return doublebits(ix);
} }
static inline double frexp(double x, reference int pw2) { static inline double frexp(double x, int * uniform pw2) {
unsigned int64 ex = 0x7ff0000000000000; // exponent mask unsigned int64 ex = 0x7ff0000000000000; // exponent mask
unsigned int64 ix = intbits(x); unsigned int64 ix = intbits(x);
ex &= ix; ex &= ix;
ix &= ~0x7ff0000000000000; // clear exponent ix &= ~0x7ff0000000000000; // clear exponent
pw2 = (int)(ex >> 52) - 1022; // compute exponent *pw2 = (int)(ex >> 52) - 1022; // compute exponent
ix |= 0x3fe0000000000000; // insert exponent +1 in x ix |= 0x3fe0000000000000; // insert exponent +1 in x
return doublebits(ix); return doublebits(ix);
} }
static inline uniform double frexp(uniform double x, reference uniform int pw2) { static inline uniform double frexp(uniform double x, uniform int * uniform pw2) {
uniform unsigned int64 ex = 0x7ff0000000000000; // exponent mask uniform unsigned int64 ex = 0x7ff0000000000000; // exponent mask
uniform unsigned int64 ix = intbits(x); uniform unsigned int64 ix = intbits(x);
ex &= ix; ex &= ix;
ix &= ~0x7ff0000000000000; // clear exponent ix &= ~0x7ff0000000000000; // clear exponent
pw2 = (int)(ex >> 52) - 1022; // compute exponent *pw2 = (int)(ex >> 52) - 1022; // compute exponent
ix |= 0x3fe0000000000000; // insert exponent +1 in x ix |= 0x3fe0000000000000; // insert exponent +1 in x
return doublebits(ix); return doublebits(ix);
} }
@@ -2381,13 +2387,13 @@ static inline uniform double cos(uniform double x) {
return __stdlib_cos(x); return __stdlib_cos(x);
} }
static inline void sincos(double x, reference double sin_result, static inline void sincos(double x, double * uniform sin_result,
reference double cos_result) { double * uniform cos_result) {
if (__math_lib == __math_lib_ispc_fast) { if (__math_lib == __math_lib_ispc_fast) {
float sr, cr; float sr, cr;
sincos((float)x, sr, cr); sincos((float)x, &sr, &cr);
sin_result = sr; *sin_result = sr;
cos_result = cr; *cos_result = cr;
} }
else { else {
uniform int mask = lanemask(); uniform int mask = lanemask();
@@ -2395,20 +2401,20 @@ static inline void sincos(double x, reference double sin_result,
uniform double sr, cr; uniform double sr, cr;
if ((mask & (1 << i)) == 0) if ((mask & (1 << i)) == 0)
continue; continue;
__stdlib_sincos(extract(x, i), sr, cr); __stdlib_sincos(extract(x, i), &sr, &cr);
sin_result = insert(sin_result, i, sr); *sin_result = insert(*sin_result, i, sr);
cos_result = insert(cos_result, i, cr); *cos_result = insert(*cos_result, i, cr);
} }
} }
} }
static inline void sincos(uniform double x, reference uniform double sin_result, static inline void sincos(uniform double x, uniform double * uniform sin_result,
reference uniform double cos_result) { uniform double * uniform cos_result) {
if (__math_lib == __math_lib_ispc_fast) { if (__math_lib == __math_lib_ispc_fast) {
uniform float sr, cr; uniform float sr, cr;
sincos((uniform float)x, sr, cr); sincos((uniform float)x, &sr, &cr);
sin_result = sr; *sin_result = sr;
cos_result = cr; *cos_result = cr;
} }
else else
__stdlib_sincos(x, sin_result, cos_result); __stdlib_sincos(x, sin_result, cos_result);
@@ -2883,63 +2889,64 @@ struct RNGState {
unsigned int z1, z2, z3, z4; unsigned int z1, z2, z3, z4;
}; };
static inline unsigned int random(reference RNGState state) static inline unsigned int random(RNGState * uniform state)
{ {
unsigned int b; unsigned int b;
b = ((state.z1 << 6) ^ state.z1) >> 13; // FIXME: state->z1, etc..
state.z1 = ((state.z1 & 4294967294U) << 18) ^ b; b = (((*state).z1 << 6) ^ (*state).z1) >> 13;
b = ((state.z2 << 2) ^ state.z2) >> 27; (*state).z1 = (((*state).z1 & 4294967294U) << 18) ^ b;
state.z2 = ((state.z2 & 4294967288U) << 2) ^ b; b = (((*state).z2 << 2) ^ (*state).z2) >> 27;
b = ((state.z3 << 13) ^ state.z3) >> 21; (*state).z2 = (((*state).z2 & 4294967288U) << 2) ^ b;
state.z3 = ((state.z3 & 4294967280U) << 7) ^ b; b = (((*state).z3 << 13) ^ (*state).z3) >> 21;
b = ((state.z4 << 3) ^ state.z4) >> 12; (*state).z3 = (((*state).z3 & 4294967280U) << 7) ^ b;
state.z4 = ((state.z4 & 4294967168U) << 13) ^ b; b = (((*state).z4 << 3) ^ (*state).z4) >> 12;
return (state.z1 ^ state.z2 ^ state.z3 ^ state.z4); (*state).z4 = (((*state).z4 & 4294967168U) << 13) ^ b;
return ((*state).z1 ^ (*state).z2 ^ (*state).z3 ^ (*state).z4);
} }
static inline float frandom(reference RNGState state) static inline float frandom(RNGState * uniform state)
{ {
unsigned int irand = random(state); unsigned int irand = random(state);
irand &= (1<<23)-1; irand &= (1<<23)-1;
return floatbits(0x3F800000 | irand)-1.0f; return floatbits(0x3F800000 | irand)-1.0f;
} }
static inline uniform unsigned int __seed4(reference RNGState state, static inline uniform unsigned int __seed4(RNGState * uniform state,
uniform int start, uniform int start,
uniform unsigned int seed) { uniform unsigned int seed) {
uniform unsigned int c1 = 0xf0f0f0f0; uniform unsigned int c1 = 0xf0f0f0f0;
uniform unsigned int c2 = 0x0f0f0f0f; uniform unsigned int c2 = 0x0f0f0f0f;
state.z1 = insert(state.z1, start + 0, seed); (*state).z1 = insert((*state).z1, start + 0, seed);
state.z1 = insert(state.z1, start + 1, seed ^ c1); (*state).z1 = insert((*state).z1, start + 1, seed ^ c1);
state.z1 = insert(state.z1, start + 2, (seed << 3) ^ c1); (*state).z1 = insert((*state).z1, start + 2, (seed << 3) ^ c1);
state.z1 = insert(state.z1, start + 3, (seed << 2) ^ c2); (*state).z1 = insert((*state).z1, start + 3, (seed << 2) ^ c2);
seed += 131; seed += 131;
state.z2 = insert(state.z2, start + 0, seed); (*state).z2 = insert((*state).z2, start + 0, seed);
state.z2 = insert(state.z2, start + 1, seed ^ c1); (*state).z2 = insert((*state).z2, start + 1, seed ^ c1);
state.z2 = insert(state.z2, start + 2, (seed << 3) ^ c1); (*state).z2 = insert((*state).z2, start + 2, (seed << 3) ^ c1);
state.z2 = insert(state.z2, start + 3, (seed << 2) ^ c2); (*state).z2 = insert((*state).z2, start + 3, (seed << 2) ^ c2);
seed ^= extract(state.z2, 2); seed ^= extract((*state).z2, 2);
state.z3 = insert(state.z3, start + 0, seed); (*state).z3 = insert((*state).z3, start + 0, seed);
state.z3 = insert(state.z3, start + 1, seed ^ c1); (*state).z3 = insert((*state).z3, start + 1, seed ^ c1);
state.z3 = insert(state.z3, start + 2, (seed << 3) ^ c1); (*state).z3 = insert((*state).z3, start + 2, (seed << 3) ^ c1);
state.z3 = insert(state.z3, start + 3, (seed << 2) ^ c2); (*state).z3 = insert((*state).z3, start + 3, (seed << 2) ^ c2);
seed <<= 4; seed <<= 4;
seed += 3; seed += 3;
seed ^= extract(state.z1, 3); seed ^= extract((*state).z1, 3);
state.z4 = insert(state.z4, start + 0, seed); (*state).z4 = insert((*state).z4, start + 0, seed);
state.z4 = insert(state.z4, start + 1, seed ^ c1); (*state).z4 = insert((*state).z4, start + 1, seed ^ c1);
state.z4 = insert(state.z4, start + 2, (seed << 3) ^ c1); (*state).z4 = insert((*state).z4, start + 2, (seed << 3) ^ c1);
state.z4 = insert(state.z4, start + 3, (seed << 2) ^ c2); (*state).z4 = insert((*state).z4, start + 3, (seed << 2) ^ c2);
return seed; return seed;
} }
static inline void seed_rng(reference uniform RNGState state, uniform unsigned int seed) { static inline void seed_rng(uniform RNGState * uniform state, uniform unsigned int seed) {
seed = __seed4(state, 0, seed); seed = __seed4(state, 0, seed);
if (programCount == 8) if (programCount == 8)
__seed4(state, 4, seed ^ 0xbeeff00d); __seed4(state, 4, seed ^ 0xbeeff00d);

View File

@@ -131,7 +131,11 @@ lPossiblyResolveFunctionOverloads(Expr *expr, const Type *type) {
// which in turn may represent an overloaded function. So we need // which in turn may represent an overloaded function. So we need
// to try to resolve the overload based on the type of the symbol // to try to resolve the overload based on the type of the symbol
// we're initializing here. // we're initializing here.
if (fse->ResolveOverloads(funcType->GetArgumentTypes()) == false) std::vector<const Type *> paramTypes;
for (int i = 0; i < funcType->GetNumParameters(); ++i)
paramTypes.push_back(funcType->GetParameterType(i));
if (fse->ResolveOverloads(paramTypes) == false)
return false; return false;
} }
return true; return true;
@@ -151,14 +155,9 @@ lPossiblyResolveFunctionOverloads(Expr *expr, const Type *type) {
static void static void
lInitSymbol(llvm::Value *lvalue, const char *symName, const Type *symType, lInitSymbol(llvm::Value *lvalue, const char *symName, const Type *symType,
Expr *initExpr, FunctionEmitContext *ctx, SourcePos pos) { Expr *initExpr, FunctionEmitContext *ctx, SourcePos pos) {
if (initExpr == NULL) { if (initExpr == NULL)
// Initialize things without initializers to the undefined value. // leave it uninitialized
// To auto-initialize everything to zero, replace 'UndefValue' with
// 'NullValue' in the below
LLVM_TYPE_CONST llvm::Type *ltype = symType->LLVMType(g->ctx);
ctx->StoreInst(llvm::UndefValue::get(ltype), lvalue);
return; return;
}
// If the initializer is a straight up expression that isn't an // If the initializer is a straight up expression that isn't an
// ExprList, then we'll see if we can type convert it to the type of // ExprList, then we'll see if we can type convert it to the type of
@@ -239,7 +238,14 @@ lInitSymbol(llvm::Value *lvalue, const char *symName, const Type *symType,
// Initialize each element with the corresponding value from // Initialize each element with the corresponding value from
// the ExprList // the ExprList
for (int i = 0; i < nInits; ++i) { for (int i = 0; i < nInits; ++i) {
llvm::Value *ep = ctx->GetElementPtrInst(lvalue, 0, i, "element"); llvm::Value *ep;
if (dynamic_cast<const StructType *>(symType) != NULL)
ep = ctx->AddElementOffset(lvalue, i, NULL, "element");
else
ep = ctx->GetElementPtrInst(lvalue, LLVMInt32(0), LLVMInt32(i),
PointerType::GetUniform(collectionType->GetElementType(i)),
"gep");
lInitSymbol(ep, symName, collectionType->GetElementType(i), lInitSymbol(ep, symName, collectionType->GetElementType(i),
exprList->exprs[i], ctx, pos); exprList->exprs[i], ctx, pos);
} }
@@ -359,9 +365,11 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
else { else {
// For non-static variables, allocate storage on the stack // For non-static variables, allocate storage on the stack
sym->storagePtr = ctx->AllocaInst(llvmType, sym->name.c_str()); sym->storagePtr = ctx->AllocaInst(llvmType, sym->name.c_str());
// Tell the FunctionEmitContext about the variable; must do // Tell the FunctionEmitContext about the variable; must do
// this before the initializer stuff. // this before the initializer stuff.
ctx->EmitVariableDebugInfo(sym); ctx->EmitVariableDebugInfo(sym);
// And then get it initialized... // And then get it initialized...
sym->parentFunction = ctx->GetFunction(); sym->parentFunction = ctx->GetFunction();
lInitSymbol(sym->storagePtr, sym->name.c_str(), sym->type, lInitSymbol(sym->storagePtr, sym->name.c_str(), sym->type,
@@ -693,16 +701,22 @@ lSafeToRunWithAllLanesOff(Expr *expr) {
// If we can determine at compile time the size of the array/vector // If we can determine at compile time the size of the array/vector
// and if the indices are compile-time constants, then we may be // and if the indices are compile-time constants, then we may be
// able to safely run this under a predicated if statement.. // able to safely run this under a predicated if statement..
if (ie->arrayOrVector == NULL) if (ie->baseExpr == NULL)
return false; return false;
const Type *type = ie->arrayOrVector->GetType(); const Type *type = ie->baseExpr->GetType();
ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index); ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
if (type == NULL || ce == NULL) if (type == NULL || ce == NULL)
return false; return false;
if (dynamic_cast<const ReferenceType *>(type) != NULL) if (dynamic_cast<const ReferenceType *>(type) != NULL)
type = type->GetReferenceTarget(); type = type->GetReferenceTarget();
const PointerType *pointerType =
dynamic_cast<const PointerType *>(type);
if (pointerType != NULL)
// pointer[offset] -> can't be sure
return false;
const SequentialType *seqType = const SequentialType *seqType =
dynamic_cast<const SequentialType *>(type); dynamic_cast<const SequentialType *>(type);
assert(seqType != NULL); assert(seqType != NULL);
@@ -740,6 +754,14 @@ lSafeToRunWithAllLanesOff(Expr *expr) {
if ((dre = dynamic_cast<DereferenceExpr *>(expr)) != NULL) if ((dre = dynamic_cast<DereferenceExpr *>(expr)) != NULL)
return lSafeToRunWithAllLanesOff(dre->expr); return lSafeToRunWithAllLanesOff(dre->expr);
SizeOfExpr *soe;
if ((soe = dynamic_cast<SizeOfExpr *>(expr)) != NULL)
return lSafeToRunWithAllLanesOff(soe->expr);
AddressOfExpr *aoe;
if ((aoe = dynamic_cast<AddressOfExpr *>(expr)) != NULL)
return lSafeToRunWithAllLanesOff(aoe->expr);
if (dynamic_cast<SymbolExpr *>(expr) != NULL || if (dynamic_cast<SymbolExpr *>(expr) != NULL ||
dynamic_cast<FunctionSymbolExpr *>(expr) != NULL || dynamic_cast<FunctionSymbolExpr *>(expr) != NULL ||
dynamic_cast<SyncExpr *>(expr) != NULL || dynamic_cast<SyncExpr *>(expr) != NULL ||
@@ -1822,7 +1844,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
if (!ptr) if (!ptr)
return; return;
llvm::Value *arrayPtr = ctx->GetElementPtrInst(argPtrArray, 0, i); llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, i, NULL);
ctx->StoreInst(ptr, arrayPtr); ctx->StoreInst(ptr, arrayPtr);
} }
} }
@@ -1830,7 +1852,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
llvm::Value *ptr = lProcessPrintArg(values, ctx, argTypes); llvm::Value *ptr = lProcessPrintArg(values, ctx, argTypes);
if (!ptr) if (!ptr)
return; return;
llvm::Value *arrayPtr = ctx->GetElementPtrInst(argPtrArray, 0, 0); llvm::Value *arrayPtr = ctx->AddElementOffset(argPtrArray, 0, NULL);
ctx->StoreInst(ptr, arrayPtr); ctx->StoreInst(ptr, arrayPtr);
} }
} }
@@ -1846,7 +1868,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
args[2] = LLVMInt32(g->target.vectorWidth); args[2] = LLVMInt32(g->target.vectorWidth);
args[3] = ctx->LaneMask(mask); args[3] = ctx->LaneMask(mask);
std::vector<llvm::Value *> argVec(&args[0], &args[5]); std::vector<llvm::Value *> argVec(&args[0], &args[5]);
ctx->CallInst(printFunc, AtomicType::Void, argVec, ""); ctx->CallInst(printFunc, NULL, argVec, "");
} }
@@ -1926,7 +1948,7 @@ AssertStmt::EmitCode(FunctionEmitContext *ctx) const {
args.push_back(ctx->GetStringPtr(errorString)); args.push_back(ctx->GetStringPtr(errorString));
args.push_back(expr->GetValue(ctx)); args.push_back(expr->GetValue(ctx));
args.push_back(ctx->GetFullMask()); args.push_back(ctx->GetFullMask());
ctx->CallInst(assertFunc, AtomicType::Void, args, ""); ctx->CallInst(assertFunc, NULL, args, "");
#ifndef ISPC_IS_WINDOWS #ifndef ISPC_IS_WINDOWS
free(errorString); free(errorString);

View File

@@ -101,7 +101,8 @@ int main(int argc, char *argv[]) {
assert(w <= 16); assert(w <= 16);
float returned_result[16]; float returned_result[16];
memset(returned_result, 0, 16*sizeof(float)); for (int i = 0; i < 16; ++i)
returned_result[i] = -1e20;
float vfloat[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; float vfloat[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
double vdouble[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; double vdouble[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
int vint[16] = { 2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32 }; int vint[16] = { 2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32 };

View File

@@ -11,7 +11,7 @@ export void f_v(uniform float RET[]) {
a[i] = i; a[i] = i;
float x=-1, y=-1, z=-1; float x=-1, y=-1, z=-1;
aos_to_soa3(a, 0, x, y, z); aos_to_soa3(a, 0, &x, &y, &z);
int errs = 0; int errs = 0;
if (x != width * programIndex) ++errs; if (x != width * programIndex) ++errs;

View File

@@ -11,7 +11,7 @@ export void f_v(uniform float RET[]) {
a[i] = i; a[i] = i;
float x=-1, y=-1, z=-1, w=-1; float x=-1, y=-1, z=-1, w=-1;
aos_to_soa4(a, 0, x, y, z, w); aos_to_soa4(a, 0, &x, &y, &z, &w);
int errs = 0; int errs = 0;
if (x != width * programIndex) ++errs; if (x != width * programIndex) ++errs;

View File

@@ -11,7 +11,7 @@ export void f_v(uniform float RET[]) {
a[i] = i; a[i] = i;
int x=-1, y=-1, z=-1; int x=-1, y=-1, z=-1;
aos_to_soa3(a, 0, x, y, z); aos_to_soa3(a, 0, &x, &y, &z);
int errs = 0; int errs = 0;
if (x != width * programIndex) ++errs; if (x != width * programIndex) ++errs;

View File

@@ -11,7 +11,7 @@ export void f_v(uniform float RET[]) {
a[i] = i; a[i] = i;
int x=-1, y=-1, z=-1, w=-1; int x=-1, y=-1, z=-1, w=-1;
aos_to_soa4(a, 0, x, y, z, w); aos_to_soa4(a, 0, &x, &y, &z, &w);
int errs = 0; int errs = 0;
if (x != width * programIndex) ++errs; if (x != width * programIndex) ++errs;

View File

@@ -5,7 +5,7 @@ export uniform int width() { return programCount; }
struct Foo { float f; }; struct Foo { float f; };
void f(reference uniform Foo foo[], float a) { void f(uniform Foo foo[], float a) {
++foo[a].f; ++foo[a].f;
} }

View File

@@ -6,7 +6,7 @@ uniform unsigned int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
float delta = 1; float delta = 1;
float b = atomic_add_global(s, delta); float b = atomic_add_global(&s, delta);
RET[programIndex] = reduce_add(b); RET[programIndex] = reduce_add(b);
} }

View File

@@ -8,7 +8,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
float b = 0; float b = 0;
float delta = 1; float delta = 1;
if (programIndex < 2) if (programIndex < 2)
b = atomic_add_global(s, delta); b = atomic_add_global(&s, delta);
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
float b = 0; float b = 0;
if (programIndex & 1) if (programIndex & 1)
b = atomic_add_global(s, programIndex); b = atomic_add_global(&s, programIndex);
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
float b = 0; float b = 0;
if (programIndex & 1) if (programIndex & 1)
b = atomic_or_global(s, (1 << programIndex)); b = atomic_or_global(&s, (1 << programIndex));
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
float b = 0; float b = 0;
if (programIndex & 1) if (programIndex & 1)
b = atomic_or_global(s, (1 << programIndex)); b = atomic_or_global(&s, (1 << programIndex));
RET[programIndex] = popcnt(reduce_max((int32)b)); RET[programIndex] = popcnt(reduce_max((int32)b));
} }

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
float b = 0; float b = 0;
if (programIndex & 1) if (programIndex & 1)
b = atomic_or_global(s, (1 << programIndex)); b = atomic_or_global(&s, (1 << programIndex));
RET[programIndex] = (s>>20); RET[programIndex] = (s>>20);
} }

View File

@@ -6,7 +6,7 @@ uniform int64 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
float delta = 1; float delta = 1;
float b = atomic_add_global(s, delta); float b = atomic_add_global(&s, delta);
RET[programIndex] = reduce_add(b); RET[programIndex] = reduce_add(b);
} }

View File

@@ -6,7 +6,7 @@ uniform int32 s = 0xff;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
int32 bits = 0xfffffff0; int32 bits = 0xfffffff0;
float b = atomic_xor_global(s, bits); float b = atomic_xor_global(&s, bits);
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -5,7 +5,7 @@ uniform int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
float b = atomic_or_global(s, (1<<programIndex)); float b = atomic_or_global(&s, (1<<programIndex));
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -5,7 +5,7 @@ uniform int32 s = 0xbeef;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
float b = atomic_swap_global(s, programIndex); float b = atomic_swap_global(&s, programIndex);
RET[programIndex] = reduce_max(b); RET[programIndex] = reduce_max(b);
} }

View File

@@ -5,7 +5,7 @@ uniform int32 s = 2;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
float b = atomic_compare_exchange_global(s, programIndex, a*1000); float b = atomic_compare_exchange_global(&s, programIndex, a*1000);
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -5,7 +5,7 @@ uniform int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 a = aFOO[programIndex]; int32 a = aFOO[programIndex];
float b = atomic_min_global(s, a); float b = atomic_min_global(&s, a);
RET[programIndex] = reduce_min(b); RET[programIndex] = reduce_min(b);
} }

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 a = aFOO[programIndex]; int32 a = aFOO[programIndex];
int32 b = 0; int32 b = 0;
if (programIndex & 1) if (programIndex & 1)
b = atomic_max_global(s, a); b = atomic_max_global(&s, a);
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -8,7 +8,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
float b = 0; float b = 0;
int32 delta = 1; int32 delta = 1;
if (programIndex < 2) if (programIndex < 2)
b = atomic_add_global(s, delta); b = atomic_add_global(&s, delta);
RET[programIndex] = reduce_add(b); RET[programIndex] = reduce_add(b);
} }

View File

@@ -5,7 +5,7 @@ uniform unsigned int32 s = 10;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
uniform unsigned int32 b = atomic_add_global(s, 1); uniform unsigned int32 b = atomic_add_global(&s, 1);
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -5,7 +5,7 @@ uniform unsigned int32 s = 0b1010;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
uniform unsigned int32 b = atomic_or_global(s, 1); uniform unsigned int32 b = atomic_or_global(&s, 1);
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -5,7 +5,7 @@ uniform unsigned int32 s = 0b1010;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
uniform unsigned int32 b = atomic_or_global(s, 1); uniform unsigned int32 b = atomic_or_global(&s, 1);
RET[programIndex] = b; RET[programIndex] = b;
} }

View File

@@ -5,7 +5,7 @@ uniform unsigned int32 s = 0xffff;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
uniform unsigned int32 b = atomic_min_global(s, 1); uniform unsigned int32 b = atomic_min_global(&s, 1);
RET[programIndex] = b; RET[programIndex] = b;
} }

View File

@@ -5,7 +5,7 @@ uniform unsigned int32 s = 0xffff;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
uniform unsigned int32 b = atomic_min_global(s, 1); uniform unsigned int32 b = atomic_min_global(&s, 1);
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -5,7 +5,7 @@ uniform float s = 100.;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
uniform float b = atomic_swap_global(s, 1.); uniform float b = atomic_swap_global(&s, 1.);
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -5,7 +5,7 @@ uniform float s = 100.;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
uniform float b = atomic_swap_global(s, 1.); uniform float b = atomic_swap_global(&s, 1.);
RET[programIndex] = b; RET[programIndex] = b;
} }

View File

@@ -5,7 +5,7 @@ uniform float s = 100.;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
uniform float b = atomic_compare_exchange_global(s, 1., -100.); uniform float b = atomic_compare_exchange_global(&s, 1., -100.);
RET[programIndex] = b; RET[programIndex] = b;
} }

View File

@@ -5,7 +5,7 @@ uniform int64 s = 100.;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
uniform int64 b = atomic_compare_exchange_global(s, 100, -100); uniform int64 b = atomic_compare_exchange_global(&s, 100, -100);
RET[programIndex] = s; RET[programIndex] = s;
} }

View File

@@ -3,8 +3,8 @@ export uniform int width() { return programCount; }
void foo(reference float a) { void foo(float * uniform a) {
a = 0; *a = 0;
} }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
@@ -13,7 +13,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
uniform int i; uniform int i;
cfor (i = 0; i < 10; ++i) cfor (i = 0; i < 10; ++i)
x[i] = a*b; x[i] = a*b;
foo(x[b]); foo(&x[b]);
RET[programIndex] = x[5] + x[9]; RET[programIndex] = x[5] + x[9];
} }

View File

@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
void foo(reference float a[10]) { void foo(float a[10]) {
a[5] = 0; a[5] = 0;
} }

View File

@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
void foo(reference float a[10]) { void foo(float a[10]) {
a[5] = 0; a[5] = 0;
} }

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
if (programIndex & 1) if (programIndex & 1)
a = -a; a = -a;
int exponent; int exponent;
frexp(a, exponent); frexp(a, &exponent);
RET[programIndex] = exponent; RET[programIndex] = exponent;
} }

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
if (programIndex & 1) if (programIndex & 1)
a = -a; a = -a;
int exponent; int exponent;
RET[programIndex] = frexp(a, exponent); RET[programIndex] = frexp(a, &exponent);
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
if (programIndex & 1) if (programIndex & 1)
a = -a; a = -a;
int exponent; int exponent;
frexp(a, exponent); frexp(a, &exponent);
RET[programIndex] = exponent; RET[programIndex] = exponent;
} }

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
if (programIndex & 1) if (programIndex & 1)
a = -a; a = -a;
int exponent; int exponent;
RET[programIndex] = frexp(a, exponent); RET[programIndex] = frexp(a, &exponent);
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {

View File

@@ -8,7 +8,7 @@ float foo(float a, float b) {
} }
static float bar(float a, float b) { static float bar(float a, float b) {
return min(a, b); return a < b ? a : b;
} }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {

View File

@@ -14,10 +14,10 @@ export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
varying int3 vv = array[a]; varying int3 vv = array[a];
++vv.y; ++vv.y;
array[a] = vv; array[a] = vv;
//CO print("fin %\n", array[programIndex].y);
ret[programIndex] = array[programIndex].y; ret[programIndex] = array[programIndex].y;
} }
export void result(uniform float ret[]) { export void result(uniform float ret[]) {
ret[programIndex] = 100+programIndex; ret[programIndex] = 101+programIndex;
ret[0] = 100;
} }

View File

@@ -5,7 +5,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform unsigned int a[programCount]; uniform unsigned int a[programCount];
a[programIndex] = aFOO[programIndex]; a[programIndex] = aFOO[programIndex];
unsigned int aa; unsigned int aa;
packed_load_active(a, 0, aa); packed_load_active(a, 0, &aa);
RET[programIndex] = aa; RET[programIndex] = aa;
} }

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
int aa = 15; int aa = 15;
uniform int count = 0; uniform int count = 0;
if (programIndex < 2) if (programIndex < 2)
count += packed_load_active(a, 0, aa); count += packed_load_active(a, 0, &aa);
RET[programIndex] = aa; RET[programIndex] = aa;
} }

View File

@@ -7,7 +7,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
int aa; int aa;
uniform int count = 0; uniform int count = 0;
if (programIndex < 2) if (programIndex < 2)
count += packed_load_active(a, 0, aa); count += packed_load_active(a, 0, &aa);
RET[programIndex] = count; RET[programIndex] = count;
} }

View File

@@ -8,7 +8,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
int aa = 32; int aa = 32;
uniform int count = 0; uniform int count = 0;
if (programIndex < 2) if (programIndex < 2)
count += packed_load_active(a, 5, aa); count += packed_load_active(a, 5, &aa);
RET[programIndex] = aa; RET[programIndex] = aa;
} }

View File

@@ -8,9 +8,9 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
int aa = 32; int aa = 32;
uniform int count = 0; uniform int count = 0;
if (programIndex & 1) if (programIndex & 1)
count += packed_load_active(a, 10, aa); count += packed_load_active(a, 10, &aa);
if (!(programIndex & 1)) if (!(programIndex & 1))
count += packed_load_active(a, 10+count, aa); count += packed_load_active(a, 10+count, &aa);
RET[programIndex] = aa; RET[programIndex] = aa;
} }

View File

@@ -1,14 +1,14 @@
export uniform int width() { return programCount; } export uniform int width() { return programCount; }
void inc(reference float v) { ++v; } void inc(uniform float * varying v) { ++(*v); }
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) { export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
uniform float foo[32]; uniform float foo[32];
for (uniform int i = 0; i < 32; ++i) for (uniform int i = 0; i < 32; ++i)
foo[i] = 10+i; foo[i] = 10+i;
int a = (int)aa[programIndex]; int a = (int)aa[programIndex];
inc(foo[a]); inc(&foo[a]);
ret[programIndex] = foo[programIndex]-programIndex; ret[programIndex] = foo[programIndex]-programIndex;
} }

17
tests/prefetch.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
prefetch_l1(aFOO);
prefetch_l2(aFOO);
prefetch_l3(aFOO);
prefetch_nt(aFOO);
float a = aFOO[programIndex];
float b = 0.; b = a;
RET[programIndex] = a+b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2 + 2*programIndex;
}

13
tests/ptr-1.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform int a = 1;
uniform int * uniform b = &a;
RET[programIndex] = *b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

11
tests/ptr-10.ispc Normal file
View File

@@ -0,0 +1,11 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform b = aFOO;
RET[programIndex] = *b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

16
tests/ptr-11.ispc Normal file
View File

@@ -0,0 +1,16 @@
export uniform int width() { return programCount; }
void inc(int * uniform v) {
++*v;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform b = &aFOO[0];
b = b + 3;
RET[programIndex] = *b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 4;
}

17
tests/ptr-12.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
void inc(int * uniform v) {
++*v;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform b = &aFOO[0];
++b;
b++;
RET[programIndex] = *b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 3;
}

17
tests/ptr-13.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
void inc(int * uniform v) {
++*v;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform b = &aFOO[0];
b += 3;
b -= 1;
RET[programIndex] = *b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 3;
}

15
tests/ptr-14.ispc Normal file
View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float * uniform pa = &a;
int * uniform pb = (int *)pa;
float *uniform pc = (float *)pb;
*pc = programIndex;
RET[programIndex] = *pc;
}
export void result(uniform float RET[]) {
RET[programIndex] = programIndex;
}

18
tests/ptr-15.ispc Normal file
View File

@@ -0,0 +1,18 @@
export uniform int width() { return programCount; }
void foo(uniform float * uniform * ret) {
uniform float *px = *ret;
++px;
*ret = px;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform ptr = &aFOO[0];
foo(&ptr);
RET[programIndex] = *ptr;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2;
}

11
tests/ptr-16.ispc Normal file
View File

@@ -0,0 +1,11 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform b = aFOO;
RET[programIndex] = b[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = 1+programIndex;
}

16
tests/ptr-17.ispc Normal file
View File

@@ -0,0 +1,16 @@
export uniform int width() { return programCount; }
void inc(int * uniform v) {
++*v;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform b = aFOO;
b[programIndex] = programCount - programIndex;
RET[programIndex] = aFOO[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = programCount - programIndex;
}

12
tests/ptr-18.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform b = aFOO;
b += 10;
RET[programIndex] = b[-5];
}
export void result(uniform float RET[]) {
RET[programIndex] = 6;
}

13
tests/ptr-19.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform b = aFOO;
b += 10;
int8 index = -5;
RET[programIndex] = b[index];
}
export void result(uniform float RET[]) {
RET[programIndex] = 6;
}

13
tests/ptr-2.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int a = aFOO[programIndex];
int * uniform b = &a;
RET[programIndex] = *b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1+programIndex;
}

13
tests/ptr-20.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform b = aFOO;
b += 10;
uniform int8 index = -5;
RET[programIndex] = b[index];
}
export void result(uniform float RET[]) {
RET[programIndex] = 6;
}

22
tests/ptr-21.ispc Normal file
View File

@@ -0,0 +1,22 @@
export uniform int width() { return programCount; }
struct Foo {
int a;
uniform float b;
};
void update(Foo * uniform fp) {
fp->a += 1;
fp->b = 1;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
Foo f = { aFOO[programIndex], 5 };
update(&f);
RET[programIndex] = f.b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

23
tests/ptr-22.ispc Normal file
View File

@@ -0,0 +1,23 @@
export uniform int width() { return programCount; }
struct Foo {
int a;
uniform float b;
};
void update(Foo * varying fp) {
++fp;
fp->a -= 1;
fp->b = 1;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
Foo f[2] = { { 1234, 4321 }, { aFOO[programIndex], 5 } };
update(f);
RET[programIndex] = f[1].a;
}
export void result(uniform float RET[]) {
RET[programIndex] = programIndex;
}

21
tests/ptr-23.ispc Normal file
View File

@@ -0,0 +1,21 @@
export uniform int width() { return programCount; }
struct Foo {
int a;
uniform float b;
};
void update(float<3> * uniform vp) {
vp->x = 0;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
float<3> v = { 1, 2, 3 };
update(&v);
RET[programIndex] = v.x;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

24
tests/ptr-24.ispc Normal file
View File

@@ -0,0 +1,24 @@
export uniform int width() { return programCount; }
void update(uniform float<2> * varying vp) {
vp->y = 0;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float<2> v[programCount];
for (uniform int i = 0; i < programCount; ++i) {
v[i].x = 2*i;
v[i].y = 2*i+1;
}
int index = aFOO[programIndex] - 1;
update(&v[programIndex]);
//CO for (uniform int i = 0; i < programCount; ++i)
//CO print("%: % %\n", i, v[i].x, v[i].y);
RET[programIndex] = v[programIndex].x + v[programIndex].y;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2*programIndex;
}

24
tests/ptr-25.ispc Normal file
View File

@@ -0,0 +1,24 @@
export uniform int width() { return programCount; }
void update(float<2> * varying vp) {
vp->y = 0;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
float<2> v[programCount];
for (uniform int i = 0; i < programCount; ++i) {
v[i].x = 2*i;
v[i].y = 2*i+1;
}
int index = aFOO[programIndex] - 1;
update(&v[programIndex]);
//CO for (uniform int i = 0; i < programCount; ++i)
//CO print("%: % %\n", i, v[i].x, v[i].y);
RET[programIndex] = v[programIndex].x;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2*programIndex;
}

14
tests/ptr-3.ispc Normal file
View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int a = aFOO[programIndex];
int * uniform b = &a;
*b = 2;
RET[programIndex] = *b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2;
}

14
tests/ptr-4.ispc Normal file
View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int a = aFOO[programIndex];
int * uniform b = &a;
++*b;
RET[programIndex] = *b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2+programIndex;
}

14
tests/ptr-5.ispc Normal file
View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int a = aFOO[programIndex];
int * uniform b = &a;
(*b)++;
RET[programIndex] = *b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2+programIndex;
}

12
tests/ptr-6.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform ap = &aFOO[0];
RET[programIndex] = ap[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = 1+programIndex;
}

12
tests/ptr-7.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * varying ap = &aFOO[programIndex];
RET[programIndex] = *ap;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1+programIndex;
}

20
tests/ptr-8.ispc Normal file
View File

@@ -0,0 +1,20 @@
export uniform int width() { return programCount; }
void inc(int * uniform v) {
++*v;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
int a = aFOO[programIndex];
int * uniform b = &a;
if (a <= 2)
inc(b);
RET[programIndex] = a;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1+programIndex;
RET[0] = 2;
RET[1] = 3;
}

18
tests/ptr-9.ispc Normal file
View File

@@ -0,0 +1,18 @@
export uniform int width() { return programCount; }
void inc(int * uniform v) {
++*v;
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
int a = aFOO[programIndex];
int * uniform b = &a;
void * uniform vp = b;
int * uniform c = (int * uniform)vp;
RET[programIndex] = *c;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1+programIndex;
}

Some files were not shown because too many files have changed in this diff Show More