+some uniform related improvements

This commit is contained in:
Evghenii
2014-01-09 14:37:27 +01:00
parent 1ed438dcdb
commit 2c043f67d0
5 changed files with 132 additions and 254 deletions

View File

@@ -4,7 +4,7 @@ CU_SRC=ao.cu
CXX_SRC=ao.cpp ao_serial.cpp CXX_SRC=ao.cpp ao_serial.cpp
PTXCC_REGMAX=64 PTXCC_REGMAX=64
LLVM_GPU=1 # LLVM_GPU=1
NVVM_GPU=1 NVVM_GPU=1
include ../common_gpu.mk include ../common_gpu.mk

View File

@@ -82,8 +82,8 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
uniform float x1, uniform float y1, uniform float x1, uniform float y1,
uniform int width, uniform int height, uniform int width, uniform int height,
uniform int maxIterations, uniform int output[]) { uniform int maxIterations, uniform int output[]) {
uniform float dx = (x1 - x0) / width; const uniform float dx = (x1 - x0) / width;
uniform float dy = (y1 - y0) / height; const uniform float dy = (y1 - y0) / height;
const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */ const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */
const uniform int yspan = 16; const uniform int yspan = 16;

View File

@@ -38,6 +38,13 @@ typedef bool bool_t;
#endif #endif
typedef float<3> float3; typedef float<3> float3;
#ifdef __NVPTX__
#define uniform_t varying
#else
#define uniform_t uniform
#endif
struct int3 struct int3
{ {
@@ -124,8 +131,8 @@ inline
#endif #endif
static bool_t BBoxIntersect(const uniform float bounds[2][3], static bool_t BBoxIntersect(const uniform float bounds[2][3],
const Ray &ray) { const Ray &ray) {
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] }; const uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] }; const uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
float t0 = ray.mint, t1 = ray.maxt; float t0 = ray.mint, t1 = ray.maxt;
// Check all three axis-aligned slabs. Don't try to early out; it's // Check all three axis-aligned slabs. Don't try to early out; it's
@@ -164,12 +171,12 @@ static bool_t BBoxIntersect(const uniform float bounds[2][3],
#if 1 #if 1
inline inline
#endif #endif
static bool_t TriIntersect(const uniform Triangle &tri, Ray &ray) { static bool_t TriIntersect(const uniform_t Triangle tri, Ray &ray) {
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] }; const uniform_t float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] }; const uniform_t float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] }; const uniform_t float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
uniform float3 e1 = p1 - p0; const uniform_t float3 e1 = p1 - p0;
uniform float3 e2 = p2 - p0; const uniform_t float3 e2 = p2 - p0;
float3 s1 = Cross(ray.dir, e2); float3 s1 = Cross(ray.dir, e2);
float divisor = Dot(s1, e1); float divisor = Dot(s1, e1);
@@ -218,13 +225,13 @@ BVHIntersect(const uniform LinearBVHNode nodes[],
while (true) { while (true) {
// Check ray against BVH node // Check ray against BVH node
uniform LinearBVHNode node = nodes[nodeNum]; const uniform LinearBVHNode node = nodes[nodeNum];
if (any(BBoxIntersect(node.bounds, ray))) { if (any(BBoxIntersect(node.bounds, ray))) {
uniform unsigned int nPrimitives = node.nPrimitives; const uniform unsigned int nPrimitives = node.nPrimitives;
if (nPrimitives > 0) { if (nPrimitives > 0) {
// Intersect ray with primitives in leaf BVH node // Intersect ray with primitives in leaf BVH node
uniform unsigned int primitivesOffset = node.offset; const uniform unsigned int primitivesOffset = node.offset;
for (uniform unsigned int i = 0; i < nPrimitives; ++i) { for (uniform_t unsigned int i = 0; i < nPrimitives; ++i) {
if (TriIntersect(tris[primitivesOffset+i], ray)) if (TriIntersect(tris[primitivesOffset+i], ray))
hit = true; hit = true;
} }
@@ -277,8 +284,8 @@ static void raytrace_tile(uniform int x0, uniform int x1,
uniform float image[], uniform int id[], uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[], const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) { const uniform Triangle triangles[]) {
uniform float widthScale = (float)(baseWidth) / (float)(width); const uniform float widthScale = (float)(baseWidth) / (float)(width);
uniform float heightScale = (float)(baseHeight) / (float)(height); const uniform float heightScale = (float)(baseHeight) / (float)(height);
foreach_tiled (y = y0 ... y1, x = x0 ... x1) { foreach_tiled (y = y0 ... y1, x = x0 ... x1) {
Ray ray; Ray ray;
@@ -313,12 +320,12 @@ task void raytrace_tile_task(uniform int width, uniform int height,
uniform float image[], uniform int id[], uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[], const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) { const uniform Triangle triangles[]) {
uniform int dx = 64, dy = 8; // must match dx, dy below const uniform int dx = 64, dy = 8; // must match dx, dy below
uniform int xBuckets = (width + (dx-1)) / dx; const uniform int xBuckets = (width + (dx-1)) / dx;
uniform int x0 = (taskIndex % xBuckets) * dx; const uniform int x0 = (taskIndex % xBuckets) * dx;
uniform int x1 = min(x0 + dx, width); const uniform int x1 = min(x0 + dx, width);
uniform int y0 = (taskIndex / xBuckets) * dy; const uniform int y0 = (taskIndex / xBuckets) * dy;
uniform int y1 = min(y0 + dy, height); const uniform int y1 = min(y0 + dy, height);
raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight, raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight,
raster2camera, camera2world, image, raster2camera, camera2world, image,
@@ -333,10 +340,10 @@ export void raytrace_ispc_tasks(uniform int width, uniform int height,
uniform float image[], uniform int id[], uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[], const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) { const uniform Triangle triangles[]) {
uniform int dx = 64, dy = 8; const uniform int dx = 64, dy = 8;
uniform int xBuckets = (width + (dx-1)) / dx; const uniform int xBuckets = (width + (dx-1)) / dx;
uniform int yBuckets = (height + (dy-1)) / dy; const uniform int yBuckets = (height + (dy-1)) / dy;
uniform int nTasks = xBuckets * yBuckets; const uniform int nTasks = xBuckets * yBuckets;
launch[nTasks] raytrace_tile_task(width, height, baseWidth, baseHeight, launch[nTasks] raytrace_tile_task(width, height, baseWidth, baseHeight,
raster2camera, camera2world, raster2camera, camera2world,
image, id, nodes, triangles); image, id, nodes, triangles);

View File

@@ -181,7 +181,7 @@ transmittance(uniform float3 p0, float3 p1, uniform float3 pMin,
float tau = 0; float tau = 0;
float rayLength = sqrt(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y + float rayLength = sqrt(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y +
ray.dir.z * ray.dir.z); ray.dir.z * ray.dir.z);
uniform float stepDist = 0.2; const uniform float stepDist = 0.2;
float stepT = stepDist / rayLength; float stepT = stepDist / rayLength;
float t = rayT0; float t = rayT0;
@@ -207,8 +207,8 @@ distanceSquared(float3 a, float3 b) {
static inline float static inline float
raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) { raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
float rayT0, rayT1; float rayT0, rayT1;
uniform float3 pMin = {.3, -.2, .3}, pMax = {1.8, 2.3, 1.8}; const uniform float3 pMin = {.3, -.2, .3}, pMax = {1.8, 2.3, 1.8};
uniform float3 lightPos = { -1, 4, 1.5 }; const uniform float3 lightPos = { -1, 4, 1.5 };
if (!IntersectP(ray, pMin, pMax, rayT0, rayT1)) if (!IntersectP(ray, pMin, pMax, rayT0, rayT1))
return 0.; return 0.;
@@ -217,11 +217,11 @@ raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
// Parameters that define the volume scattering characteristics and // Parameters that define the volume scattering characteristics and
// sampling rate for raymarching // sampling rate for raymarching
uniform float Le = .25; // Emission coefficient const uniform float Le = .25; // Emission coefficient
uniform float sigma_a = 10; // Absorption coefficient const uniform float sigma_a = 10; // Absorption coefficient
uniform float sigma_s = 10; // Scattering coefficient const uniform float sigma_s = 10; // Scattering coefficient
uniform float stepDist = 0.025; // Ray step amount const uniform float stepDist = 0.025; // Ray step amount
uniform float lightIntensity = 40; // Light source intensity const uniform float lightIntensity = 40; // Light source intensity
float tau = 0.f; // accumulated beam transmittance float tau = 0.f; // accumulated beam transmittance
float L = 0; // radiance along the ray float L = 0; // radiance along the ray
@@ -375,15 +375,14 @@ volume_task(uniform float density[], uniform int _nVoxels[3],
#define camera2world _camera2world #define camera2world _camera2world
#endif #endif
uniform int dx = 8, dy = 8; // must match value in volume_ispc_tasks const uniform int dx = 8, dy = 8; // must match value in volume_ispc_tasks
uniform int xbuckets = (width + (dx-1)) / dx; const uniform int xbuckets = (width + (dx-1)) / dx;
uniform int ybuckets = (height + (dy-1)) / dy; const uniform int ybuckets = (height + (dy-1)) / dy;
uniform int x0 = (taskIndex % xbuckets) * dx; const uniform int x0 = (taskIndex % xbuckets) * dx;
uniform int y0 = (taskIndex / xbuckets) * dy; const uniform int y0 = (taskIndex / xbuckets) * dy;
uniform int x1 = x0 + dx, y1 = y0 + dy; const uniform int x1 = min(x0 + dx, width);
x1 = min(x1, width); const uniform int y1 = min(y0 + dy, height);
y1 = min(y1, height);
volume_tile(x0, y0, x1, y1, density, nVoxels, raster2camera, volume_tile(x0, y0, x1, y1, density, nVoxels, raster2camera,
camera2world, width, height, image); camera2world, width, height, image);
@@ -406,8 +405,8 @@ volume_ispc_tasks(uniform float density[], uniform int nVoxels[3],
const uniform float camera2world[4][4], const uniform float camera2world[4][4],
uniform int width, uniform int height, uniform float image[]) { uniform int width, uniform int height, uniform float image[]) {
// Launch tasks to work on (dx,dy)-sized tiles of the image // Launch tasks to work on (dx,dy)-sized tiles of the image
uniform int dx = 8, dy = 8; const uniform int dx = 8, dy = 8;
uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy); const uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world, launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
width, height, image); width, height, image);
sync; sync;

208
stmt.cpp
View File

@@ -152,6 +152,21 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *
return value; return value;
llvm::Type *elTy = pt->getElementType(); llvm::Type *elTy = pt->getElementType();
/* convert elTy addrspace(3)* to i64* addrspace(3)* */
llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace);
value = ctx->BitCastInst(value, Int64Ptr3, "gep2gen_cast1");
/* convert i64* addrspace(3) to i64* */
llvm::Function *__cvt2gen = m->module->getFunction(
addressSpace == 3 ? "__cvt_loc2gen" : "__cvt_const2gen");
std::vector<llvm::Value *> __cvt2gen_args;
__cvt2gen_args.push_back(value);
value = llvm::CallInst::Create(__cvt2gen, __cvt2gen_args, "gep2gen_cvt", ctx->GetCurrentBasicBlock());
/* compute offset */
if (addressSpace == 3)
{
assert(elTy->isArrayTy()); assert(elTy->isArrayTy());
const int numElTot = elTy->getArrayNumElements(); const int numElTot = elTy->getArrayNumElements();
const int numEl = numElTot/4; const int numEl = numElTot/4;
@@ -166,24 +181,10 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *
Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array"); Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array");
#endif #endif
/* convert elTy addrspace(3)* to i64* addrspace(3)* */
llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace);
value = ctx->BitCastInst(value, Int64Ptr3, "gep2gen_cast1");
/* convert i64* addrspace(3) to i64* */
llvm::Function *__cvt2gen = m->module->getFunction(
addressSpace == 3 ? "__cvt_loc2gen" : "__cvt_const2gen");
std::vector<llvm::Value *> __cvt2gen_args;
__cvt2gen_args.push_back(value);
value = llvm::CallInst::Create(__cvt2gen, __cvt2gen_args, "gep2gen_cvt", ctx->GetCurrentBasicBlock());
/* convert i64* to errElTy* */ /* convert i64* to errElTy* */
llvm::PointerType *arrElTyPt0 = llvm::PointerType::get(arrElTy, 0); llvm::PointerType *arrElTyPt0 = llvm::PointerType::get(arrElTy, 0);
value = ctx->BitCastInst(value, arrElTyPt0, "gep2gen_cast2"); value = ctx->BitCastInst(value, arrElTyPt0, "gep2gen_cast2");
/* compute offset */
if (addressSpace == 3)
{
llvm::Function *funcTid = m->module->getFunction("__tid_x"); llvm::Function *funcTid = m->module->getFunction("__tid_x");
llvm::Function *funcWarpSz = m->module->getFunction("__warpsize"); llvm::Function *funcWarpSz = m->module->getFunction("__warpsize");
llvm::Value *tid = ctx->CallInst(funcTid, NULL, std::vector<llvm::Value*>(), "gep2gen_tid"); llvm::Value *tid = ctx->CallInst(funcTid, NULL, std::vector<llvm::Value*>(), "gep2gen_tid");
@@ -268,12 +269,14 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
Error(sym->pos, Error(sym->pos,
"Non-constant static variable ""\"%s\" is not supported with ""\"nvptx\" target.", "Non-constant static variable ""\"%s\" is not supported with ""\"nvptx\" target.",
sym->name.c_str()); sym->name.c_str());
#if 0
if (g->target->getISA() == Target::NVPTX && sym->type->IsVaryingType()) if (g->target->getISA() == Target::NVPTX && sym->type->IsVaryingType())
Error(sym->pos, PerformanceWarning(sym->pos,
"const static varying variable ""\"%s\" is not supported with ""\"nvptx\" target.", "\"const static varying\" variable ""\"%s\" is stored in __global address space with ""\"nvptx\" target.",
sym->name.c_str());
if (g->target->getISA() == Target::NVPTX && sym->type->IsUniformType())
PerformanceWarning(sym->pos,
"\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.",
sym->name.c_str()); sym->name.c_str());
#endif
// For static variables, we need a compile-time constant value // For static variables, we need a compile-time constant value
// for its initializer; if there's no initializer, we use a // for its initializer; if there's no initializer, we use a
@@ -310,159 +313,30 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
llvm::GlobalValue::InternalLinkage, cinit, llvm::GlobalValue::InternalLinkage, cinit,
llvm::Twine("static_") + llvm::Twine("static_") +
llvm::Twine(sym->pos.first_line) + llvm::Twine(sym->pos.first_line) +
llvm::Twine("_") + sym->name.c_str());
// Tell the FunctionEmitContext about the variable
ctx->EmitVariableDebugInfo(sym);
}
else {
#if 0
if (sym->type->IsUniformType() &&
sym->type->IsArrayType() &&
g->target->getISA() == Target::NVPTX)
{
/* deal with "const uniform" or "uniform" arrays for nvptx target */
if (!sym->type->IsConstType())
PerformanceWarning(sym->pos,
"\"uniform\" arrays might be slow with \"nvptx\" target. "
"Unless data sharing between program instances is required, use \"varying\" or \"uniform new\"+\"delete\" if possible.");
if (initExpr != NULL && !sym->type->IsConstType())
Error(initExpr->pos,
"It is not possible to initialize non-constant \"uniform\" array \"%s\" with \"nvptx\" target. "
"Please use \"varying\", \"const static uniform\" or define initializer in the global scope.",
sym->name.c_str());
if (sym->type->IsConstType())
{
llvm::Constant *cinit = NULL;
if (initExpr != NULL) {
if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
continue;
// FIXME: we only need this for function pointers; it was
// already done for atomic types and enums in
// DeclStmt::TypeCheck()...
if (dynamic_cast<ExprList *>(initExpr) == NULL) {
initExpr = TypeConvertExpr(initExpr, sym->type,
"initializer");
// FIXME: and this is only needed to re-establish
// constant-ness so that GetConstant below works for
// constant artithmetic expressions...
initExpr = ::Optimize(initExpr);
}
cinit = initExpr->GetConstant(sym->type);
if (cinit == NULL)
Error(initExpr->pos, "Initializer for static variable "
"\"%s\" must be a constant.", sym->name.c_str());
}
else
{
Error(sym->pos, "Missing initializer for const variable "
"\"%s\".", sym->name.c_str());
}
if (cinit == NULL)
cinit = llvm::Constant::getNullValue(llvmType);
sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType,
sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage,
cinit,
llvm::Twine("local_") +
llvm::Twine(sym->pos.first_line) +
llvm::Twine("_") + sym->name.c_str(), llvm::Twine("_") + sym->name.c_str(),
NULL, NULL,
llvm::GlobalVariable::NotThreadLocal, llvm::GlobalVariable::NotThreadLocal,
/*AddressSpace=*/4); /* constant address space */ /*AddressSpace=*/ sym->type->IsUniformType() ? 4 : 0);
// Tell the FunctionEmitContext about the variable
ctx->EmitVariableDebugInfo(sym);
}
else
{
/* fails if pointer passed to function argument, need conversion beforehand */
llvm::Constant *cinit = NULL;
const ArrayType *at = CastType<ArrayType>(sym->type);
const int nel = at->GetElementCount();
/* we must scale # elements by 4, because a thread-block will run 4 warps
* or 128 threads.
* ***note-to-me***:please define these value (128threads/4warps)
* in nvptx-target definition
* instead of compile-time constants
*/
const int nel4 = nel*4;
ArrayType nat(at->GetElementType(), nel4);
llvm::Type *llvmType = nat.LLVMType(g->ctx);
cinit = llvm::UndefValue::get(llvmType);
sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType,
sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage,
cinit,
llvm::Twine("local_") +
llvm::Twine(sym->pos.first_line) +
llvm::Twine("_") + sym->name.c_str(),
NULL,
llvm::GlobalVariable::NotThreadLocal,
/*AddressSpace=*/3);
// Tell the FunctionEmitContext about the variable
ctx->EmitVariableDebugInfo(sym);
}
}
else if (
sym->type->IsUniformType() &&
g->target->getISA() == Target::NVPTX)
{
#if 1
// For non-static variables, allocate storage on the stack
sym->storagePtr = ctx->AllocaInst(llvmType, sym->name.c_str());
#else
PerformanceWarning(sym->pos,
"\"uniform\" variables might be slow with \"nvptx\" target. "
"Please use \"varying\" if possible.");
ArrayType nat(sym->type, 4);
llvm::Type *llvmType = nat.LLVMType(g->ctx);
llvm::Constant *cinit = llvm::UndefValue::get(llvmType);
sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType,
sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage,
cinit,
llvm::Twine("local_") +
llvm::Twine(sym->pos.first_line) +
llvm::Twine("_") + sym->name.c_str(),
NULL,
llvm::GlobalVariable::NotThreadLocal,
/*AddressSpace=*/3);
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos); sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
llvm::PointerType *ptrTy = // Tell the FunctionEmitContext about the variable
llvm::PointerType::get(sym->type->LLVMType(g->ctx),0);
sym->storagePtr = ctx->BitCastInst(sym->storagePtr, ptrTy, "uniform_alloc");
#endif
// Tell the FunctionEmitContext about the variable; must do
// this before the initializer stuff.
ctx->EmitVariableDebugInfo(sym); ctx->EmitVariableDebugInfo(sym);
if (initExpr == 0 && sym->type->IsConstType())
Error(sym->pos, "Missing initializer for const variable "
"\"%s\".", sym->name.c_str());
// And then get it initialized...
sym->parentFunction = ctx->GetFunction();
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
} }
#else else if (sym->type->IsUniformType() &&
if (sym->type->IsUniformType() && /* NVPTX:
* only non-constant uniform data types are stored in shared memory
* constant uniform are automatically promoted to varying
*/
!sym->type->IsConstType() &&
#if 1
sym->type->IsArrayType() && sym->type->IsArrayType() &&
#endif
g->target->getISA() == Target::NVPTX) g->target->getISA() == Target::NVPTX)
{ {
PerformanceWarning(sym->pos, PerformanceWarning(sym->pos,
"\"uniform\" data types might be slow with \"nvptx\" target. " "Non-constant \"uniform\" data types might be slow with \"nvptx\" target. "
"Unless data sharing between program instances is desired, try either \"varying\" or \"uniform new uniform \"+\"delete\" if possible."); "Unless data sharing between program instances is desired, try \"const [static] uniform\", \"varying\" or \"uniform new uniform \"+\"delete\" if possible.");
/* with __shared__ memory everything must be an array */
int nel = 4; int nel = 4;
ArrayType *nat; ArrayType *nat;
if (sym->type->IsArrayType()) if (sym->type->IsArrayType())
@@ -480,11 +354,12 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
} }
else else
nat = new ArrayType(sym->type, nel); nat = new ArrayType(sym->type, nel);
llvm::Type *llvmType = nat->LLVMType(g->ctx);
llvm::Constant *cinit = llvm::UndefValue::get(llvmType); llvm::Type *llvmTypeUn = nat->LLVMType(g->ctx);
llvm::Constant *cinit = llvm::UndefValue::get(llvmTypeUn);
sym->storagePtr = sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType, new llvm::GlobalVariable(*m->module, llvmTypeUn,
sym->type->IsConstType(), sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage, llvm::GlobalValue::InternalLinkage,
cinit, cinit,
@@ -495,9 +370,8 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
llvm::GlobalVariable::NotThreadLocal, llvm::GlobalVariable::NotThreadLocal,
/*AddressSpace=*/3); /*AddressSpace=*/3);
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos); sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
llvm::PointerType *ptrTy = llvm::PointerType *ptrTy = llvm::PointerType::get(sym->type->LLVMType(g->ctx),0);
llvm::PointerType::get(sym->type->LLVMType(g->ctx),0); sym->storagePtr = ctx->BitCastInst(sym->storagePtr, ptrTy, "uniform_decl");
sym->storagePtr = ctx->BitCastInst(sym->storagePtr, ptrTy, "uniform_alloc");
// Tell the FunctionEmitContext about the variable; must do // Tell the FunctionEmitContext about the variable; must do
// this before the initializer stuff. // this before the initializer stuff.
@@ -511,7 +385,6 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
sym->parentFunction = ctx->GetFunction(); sym->parentFunction = ctx->GetFunction();
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos); InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
} }
#endif
else else
{ {
// For non-static variables, allocate storage on the stack // For non-static variables, allocate storage on the stack
@@ -530,7 +403,6 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos); InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
} }
} }
}
} }