+some uniform related improvements
This commit is contained in:
@@ -4,7 +4,7 @@ CU_SRC=ao.cu
|
|||||||
CXX_SRC=ao.cpp ao_serial.cpp
|
CXX_SRC=ao.cpp ao_serial.cpp
|
||||||
PTXCC_REGMAX=64
|
PTXCC_REGMAX=64
|
||||||
|
|
||||||
LLVM_GPU=1
|
# LLVM_GPU=1
|
||||||
NVVM_GPU=1
|
NVVM_GPU=1
|
||||||
|
|
||||||
include ../common_gpu.mk
|
include ../common_gpu.mk
|
||||||
|
|||||||
@@ -82,8 +82,8 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
|
|||||||
uniform float x1, uniform float y1,
|
uniform float x1, uniform float y1,
|
||||||
uniform int width, uniform int height,
|
uniform int width, uniform int height,
|
||||||
uniform int maxIterations, uniform int output[]) {
|
uniform int maxIterations, uniform int output[]) {
|
||||||
uniform float dx = (x1 - x0) / width;
|
const uniform float dx = (x1 - x0) / width;
|
||||||
uniform float dy = (y1 - y0) / height;
|
const uniform float dy = (y1 - y0) / height;
|
||||||
const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */
|
const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */
|
||||||
const uniform int yspan = 16;
|
const uniform int yspan = 16;
|
||||||
|
|
||||||
|
|||||||
@@ -38,6 +38,13 @@ typedef bool bool_t;
|
|||||||
#endif
|
#endif
|
||||||
typedef float<3> float3;
|
typedef float<3> float3;
|
||||||
|
|
||||||
|
#ifdef __NVPTX__
|
||||||
|
#define uniform_t varying
|
||||||
|
#else
|
||||||
|
#define uniform_t uniform
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct int3
|
struct int3
|
||||||
{
|
{
|
||||||
@@ -124,8 +131,8 @@ inline
|
|||||||
#endif
|
#endif
|
||||||
static bool_t BBoxIntersect(const uniform float bounds[2][3],
|
static bool_t BBoxIntersect(const uniform float bounds[2][3],
|
||||||
const Ray &ray) {
|
const Ray &ray) {
|
||||||
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
|
const uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
|
||||||
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
|
const uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
|
||||||
float t0 = ray.mint, t1 = ray.maxt;
|
float t0 = ray.mint, t1 = ray.maxt;
|
||||||
|
|
||||||
// Check all three axis-aligned slabs. Don't try to early out; it's
|
// Check all three axis-aligned slabs. Don't try to early out; it's
|
||||||
@@ -164,12 +171,12 @@ static bool_t BBoxIntersect(const uniform float bounds[2][3],
|
|||||||
#if 1
|
#if 1
|
||||||
inline
|
inline
|
||||||
#endif
|
#endif
|
||||||
static bool_t TriIntersect(const uniform Triangle &tri, Ray &ray) {
|
static bool_t TriIntersect(const uniform_t Triangle tri, Ray &ray) {
|
||||||
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
|
const uniform_t float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
|
||||||
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
|
const uniform_t float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
|
||||||
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
|
const uniform_t float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
|
||||||
uniform float3 e1 = p1 - p0;
|
const uniform_t float3 e1 = p1 - p0;
|
||||||
uniform float3 e2 = p2 - p0;
|
const uniform_t float3 e2 = p2 - p0;
|
||||||
|
|
||||||
float3 s1 = Cross(ray.dir, e2);
|
float3 s1 = Cross(ray.dir, e2);
|
||||||
float divisor = Dot(s1, e1);
|
float divisor = Dot(s1, e1);
|
||||||
@@ -218,13 +225,13 @@ BVHIntersect(const uniform LinearBVHNode nodes[],
|
|||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
// Check ray against BVH node
|
// Check ray against BVH node
|
||||||
uniform LinearBVHNode node = nodes[nodeNum];
|
const uniform LinearBVHNode node = nodes[nodeNum];
|
||||||
if (any(BBoxIntersect(node.bounds, ray))) {
|
if (any(BBoxIntersect(node.bounds, ray))) {
|
||||||
uniform unsigned int nPrimitives = node.nPrimitives;
|
const uniform unsigned int nPrimitives = node.nPrimitives;
|
||||||
if (nPrimitives > 0) {
|
if (nPrimitives > 0) {
|
||||||
// Intersect ray with primitives in leaf BVH node
|
// Intersect ray with primitives in leaf BVH node
|
||||||
uniform unsigned int primitivesOffset = node.offset;
|
const uniform unsigned int primitivesOffset = node.offset;
|
||||||
for (uniform unsigned int i = 0; i < nPrimitives; ++i) {
|
for (uniform_t unsigned int i = 0; i < nPrimitives; ++i) {
|
||||||
if (TriIntersect(tris[primitivesOffset+i], ray))
|
if (TriIntersect(tris[primitivesOffset+i], ray))
|
||||||
hit = true;
|
hit = true;
|
||||||
}
|
}
|
||||||
@@ -277,8 +284,8 @@ static void raytrace_tile(uniform int x0, uniform int x1,
|
|||||||
uniform float image[], uniform int id[],
|
uniform float image[], uniform int id[],
|
||||||
const uniform LinearBVHNode nodes[],
|
const uniform LinearBVHNode nodes[],
|
||||||
const uniform Triangle triangles[]) {
|
const uniform Triangle triangles[]) {
|
||||||
uniform float widthScale = (float)(baseWidth) / (float)(width);
|
const uniform float widthScale = (float)(baseWidth) / (float)(width);
|
||||||
uniform float heightScale = (float)(baseHeight) / (float)(height);
|
const uniform float heightScale = (float)(baseHeight) / (float)(height);
|
||||||
|
|
||||||
foreach_tiled (y = y0 ... y1, x = x0 ... x1) {
|
foreach_tiled (y = y0 ... y1, x = x0 ... x1) {
|
||||||
Ray ray;
|
Ray ray;
|
||||||
@@ -313,12 +320,12 @@ task void raytrace_tile_task(uniform int width, uniform int height,
|
|||||||
uniform float image[], uniform int id[],
|
uniform float image[], uniform int id[],
|
||||||
const uniform LinearBVHNode nodes[],
|
const uniform LinearBVHNode nodes[],
|
||||||
const uniform Triangle triangles[]) {
|
const uniform Triangle triangles[]) {
|
||||||
uniform int dx = 64, dy = 8; // must match dx, dy below
|
const uniform int dx = 64, dy = 8; // must match dx, dy below
|
||||||
uniform int xBuckets = (width + (dx-1)) / dx;
|
const uniform int xBuckets = (width + (dx-1)) / dx;
|
||||||
uniform int x0 = (taskIndex % xBuckets) * dx;
|
const uniform int x0 = (taskIndex % xBuckets) * dx;
|
||||||
uniform int x1 = min(x0 + dx, width);
|
const uniform int x1 = min(x0 + dx, width);
|
||||||
uniform int y0 = (taskIndex / xBuckets) * dy;
|
const uniform int y0 = (taskIndex / xBuckets) * dy;
|
||||||
uniform int y1 = min(y0 + dy, height);
|
const uniform int y1 = min(y0 + dy, height);
|
||||||
|
|
||||||
raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight,
|
raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight,
|
||||||
raster2camera, camera2world, image,
|
raster2camera, camera2world, image,
|
||||||
@@ -333,10 +340,10 @@ export void raytrace_ispc_tasks(uniform int width, uniform int height,
|
|||||||
uniform float image[], uniform int id[],
|
uniform float image[], uniform int id[],
|
||||||
const uniform LinearBVHNode nodes[],
|
const uniform LinearBVHNode nodes[],
|
||||||
const uniform Triangle triangles[]) {
|
const uniform Triangle triangles[]) {
|
||||||
uniform int dx = 64, dy = 8;
|
const uniform int dx = 64, dy = 8;
|
||||||
uniform int xBuckets = (width + (dx-1)) / dx;
|
const uniform int xBuckets = (width + (dx-1)) / dx;
|
||||||
uniform int yBuckets = (height + (dy-1)) / dy;
|
const uniform int yBuckets = (height + (dy-1)) / dy;
|
||||||
uniform int nTasks = xBuckets * yBuckets;
|
const uniform int nTasks = xBuckets * yBuckets;
|
||||||
launch[nTasks] raytrace_tile_task(width, height, baseWidth, baseHeight,
|
launch[nTasks] raytrace_tile_task(width, height, baseWidth, baseHeight,
|
||||||
raster2camera, camera2world,
|
raster2camera, camera2world,
|
||||||
image, id, nodes, triangles);
|
image, id, nodes, triangles);
|
||||||
|
|||||||
@@ -181,7 +181,7 @@ transmittance(uniform float3 p0, float3 p1, uniform float3 pMin,
|
|||||||
float tau = 0;
|
float tau = 0;
|
||||||
float rayLength = sqrt(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y +
|
float rayLength = sqrt(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y +
|
||||||
ray.dir.z * ray.dir.z);
|
ray.dir.z * ray.dir.z);
|
||||||
uniform float stepDist = 0.2;
|
const uniform float stepDist = 0.2;
|
||||||
float stepT = stepDist / rayLength;
|
float stepT = stepDist / rayLength;
|
||||||
|
|
||||||
float t = rayT0;
|
float t = rayT0;
|
||||||
@@ -207,8 +207,8 @@ distanceSquared(float3 a, float3 b) {
|
|||||||
static inline float
|
static inline float
|
||||||
raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
|
raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
|
||||||
float rayT0, rayT1;
|
float rayT0, rayT1;
|
||||||
uniform float3 pMin = {.3, -.2, .3}, pMax = {1.8, 2.3, 1.8};
|
const uniform float3 pMin = {.3, -.2, .3}, pMax = {1.8, 2.3, 1.8};
|
||||||
uniform float3 lightPos = { -1, 4, 1.5 };
|
const uniform float3 lightPos = { -1, 4, 1.5 };
|
||||||
|
|
||||||
if (!IntersectP(ray, pMin, pMax, rayT0, rayT1))
|
if (!IntersectP(ray, pMin, pMax, rayT0, rayT1))
|
||||||
return 0.;
|
return 0.;
|
||||||
@@ -217,11 +217,11 @@ raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
|
|||||||
|
|
||||||
// Parameters that define the volume scattering characteristics and
|
// Parameters that define the volume scattering characteristics and
|
||||||
// sampling rate for raymarching
|
// sampling rate for raymarching
|
||||||
uniform float Le = .25; // Emission coefficient
|
const uniform float Le = .25; // Emission coefficient
|
||||||
uniform float sigma_a = 10; // Absorption coefficient
|
const uniform float sigma_a = 10; // Absorption coefficient
|
||||||
uniform float sigma_s = 10; // Scattering coefficient
|
const uniform float sigma_s = 10; // Scattering coefficient
|
||||||
uniform float stepDist = 0.025; // Ray step amount
|
const uniform float stepDist = 0.025; // Ray step amount
|
||||||
uniform float lightIntensity = 40; // Light source intensity
|
const uniform float lightIntensity = 40; // Light source intensity
|
||||||
|
|
||||||
float tau = 0.f; // accumulated beam transmittance
|
float tau = 0.f; // accumulated beam transmittance
|
||||||
float L = 0; // radiance along the ray
|
float L = 0; // radiance along the ray
|
||||||
@@ -375,15 +375,14 @@ volume_task(uniform float density[], uniform int _nVoxels[3],
|
|||||||
#define camera2world _camera2world
|
#define camera2world _camera2world
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uniform int dx = 8, dy = 8; // must match value in volume_ispc_tasks
|
const uniform int dx = 8, dy = 8; // must match value in volume_ispc_tasks
|
||||||
uniform int xbuckets = (width + (dx-1)) / dx;
|
const uniform int xbuckets = (width + (dx-1)) / dx;
|
||||||
uniform int ybuckets = (height + (dy-1)) / dy;
|
const uniform int ybuckets = (height + (dy-1)) / dy;
|
||||||
|
|
||||||
uniform int x0 = (taskIndex % xbuckets) * dx;
|
const uniform int x0 = (taskIndex % xbuckets) * dx;
|
||||||
uniform int y0 = (taskIndex / xbuckets) * dy;
|
const uniform int y0 = (taskIndex / xbuckets) * dy;
|
||||||
uniform int x1 = x0 + dx, y1 = y0 + dy;
|
const uniform int x1 = min(x0 + dx, width);
|
||||||
x1 = min(x1, width);
|
const uniform int y1 = min(y0 + dy, height);
|
||||||
y1 = min(y1, height);
|
|
||||||
|
|
||||||
volume_tile(x0, y0, x1, y1, density, nVoxels, raster2camera,
|
volume_tile(x0, y0, x1, y1, density, nVoxels, raster2camera,
|
||||||
camera2world, width, height, image);
|
camera2world, width, height, image);
|
||||||
@@ -406,8 +405,8 @@ volume_ispc_tasks(uniform float density[], uniform int nVoxels[3],
|
|||||||
const uniform float camera2world[4][4],
|
const uniform float camera2world[4][4],
|
||||||
uniform int width, uniform int height, uniform float image[]) {
|
uniform int width, uniform int height, uniform float image[]) {
|
||||||
// Launch tasks to work on (dx,dy)-sized tiles of the image
|
// Launch tasks to work on (dx,dy)-sized tiles of the image
|
||||||
uniform int dx = 8, dy = 8;
|
const uniform int dx = 8, dy = 8;
|
||||||
uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
|
const uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
|
||||||
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
|
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
|
||||||
width, height, image);
|
width, height, image);
|
||||||
sync;
|
sync;
|
||||||
|
|||||||
290
stmt.cpp
290
stmt.cpp
@@ -152,19 +152,6 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *
|
|||||||
return value;
|
return value;
|
||||||
|
|
||||||
llvm::Type *elTy = pt->getElementType();
|
llvm::Type *elTy = pt->getElementType();
|
||||||
assert(elTy->isArrayTy());
|
|
||||||
const int numElTot = elTy->getArrayNumElements();
|
|
||||||
const int numEl = numElTot/4;
|
|
||||||
#if 0
|
|
||||||
fprintf(stderr, " --- detected addrspace(3) sz= %d --- \n", numEl);
|
|
||||||
#endif
|
|
||||||
llvm::ArrayType *arrTy = llvm::dyn_cast<llvm::ArrayType>(pt->getArrayElementType());
|
|
||||||
assert(arrTy != NULL);
|
|
||||||
llvm::Type *arrElTy = arrTy->getElementType();
|
|
||||||
#if 0
|
|
||||||
if (arrElTy->isArrayTy())
|
|
||||||
Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* convert elTy addrspace(3)* to i64* addrspace(3)* */
|
/* convert elTy addrspace(3)* to i64* addrspace(3)* */
|
||||||
llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace);
|
llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace);
|
||||||
@@ -177,13 +164,27 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *
|
|||||||
__cvt2gen_args.push_back(value);
|
__cvt2gen_args.push_back(value);
|
||||||
value = llvm::CallInst::Create(__cvt2gen, __cvt2gen_args, "gep2gen_cvt", ctx->GetCurrentBasicBlock());
|
value = llvm::CallInst::Create(__cvt2gen, __cvt2gen_args, "gep2gen_cvt", ctx->GetCurrentBasicBlock());
|
||||||
|
|
||||||
/* convert i64* to errElTy* */
|
|
||||||
llvm::PointerType *arrElTyPt0 = llvm::PointerType::get(arrElTy, 0);
|
|
||||||
value = ctx->BitCastInst(value, arrElTyPt0, "gep2gen_cast2");
|
|
||||||
|
|
||||||
/* compute offset */
|
/* compute offset */
|
||||||
if (addressSpace == 3)
|
if (addressSpace == 3)
|
||||||
{
|
{
|
||||||
|
assert(elTy->isArrayTy());
|
||||||
|
const int numElTot = elTy->getArrayNumElements();
|
||||||
|
const int numEl = numElTot/4;
|
||||||
|
#if 0
|
||||||
|
fprintf(stderr, " --- detected addrspace(3) sz= %d --- \n", numEl);
|
||||||
|
#endif
|
||||||
|
llvm::ArrayType *arrTy = llvm::dyn_cast<llvm::ArrayType>(pt->getArrayElementType());
|
||||||
|
assert(arrTy != NULL);
|
||||||
|
llvm::Type *arrElTy = arrTy->getElementType();
|
||||||
|
#if 0
|
||||||
|
if (arrElTy->isArrayTy())
|
||||||
|
Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* convert i64* to errElTy* */
|
||||||
|
llvm::PointerType *arrElTyPt0 = llvm::PointerType::get(arrElTy, 0);
|
||||||
|
value = ctx->BitCastInst(value, arrElTyPt0, "gep2gen_cast2");
|
||||||
|
|
||||||
llvm::Function *funcTid = m->module->getFunction("__tid_x");
|
llvm::Function *funcTid = m->module->getFunction("__tid_x");
|
||||||
llvm::Function *funcWarpSz = m->module->getFunction("__warpsize");
|
llvm::Function *funcWarpSz = m->module->getFunction("__warpsize");
|
||||||
llvm::Value *tid = ctx->CallInst(funcTid, NULL, std::vector<llvm::Value*>(), "gep2gen_tid");
|
llvm::Value *tid = ctx->CallInst(funcTid, NULL, std::vector<llvm::Value*>(), "gep2gen_tid");
|
||||||
@@ -268,12 +269,14 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
Error(sym->pos,
|
Error(sym->pos,
|
||||||
"Non-constant static variable ""\"%s\" is not supported with ""\"nvptx\" target.",
|
"Non-constant static variable ""\"%s\" is not supported with ""\"nvptx\" target.",
|
||||||
sym->name.c_str());
|
sym->name.c_str());
|
||||||
#if 0
|
|
||||||
if (g->target->getISA() == Target::NVPTX && sym->type->IsVaryingType())
|
if (g->target->getISA() == Target::NVPTX && sym->type->IsVaryingType())
|
||||||
Error(sym->pos,
|
PerformanceWarning(sym->pos,
|
||||||
"const static varying variable ""\"%s\" is not supported with ""\"nvptx\" target.",
|
"\"const static varying\" variable ""\"%s\" is stored in __global address space with ""\"nvptx\" target.",
|
||||||
|
sym->name.c_str());
|
||||||
|
if (g->target->getISA() == Target::NVPTX && sym->type->IsUniformType())
|
||||||
|
PerformanceWarning(sym->pos,
|
||||||
|
"\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.",
|
||||||
sym->name.c_str());
|
sym->name.c_str());
|
||||||
#endif
|
|
||||||
|
|
||||||
// For static variables, we need a compile-time constant value
|
// For static variables, we need a compile-time constant value
|
||||||
// for its initializer; if there's no initializer, we use a
|
// for its initializer; if there's no initializer, we use a
|
||||||
@@ -310,208 +313,78 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
llvm::GlobalValue::InternalLinkage, cinit,
|
llvm::GlobalValue::InternalLinkage, cinit,
|
||||||
llvm::Twine("static_") +
|
llvm::Twine("static_") +
|
||||||
llvm::Twine(sym->pos.first_line) +
|
llvm::Twine(sym->pos.first_line) +
|
||||||
llvm::Twine("_") + sym->name.c_str());
|
llvm::Twine("_") + sym->name.c_str(),
|
||||||
|
NULL,
|
||||||
|
llvm::GlobalVariable::NotThreadLocal,
|
||||||
|
/*AddressSpace=*/ sym->type->IsUniformType() ? 4 : 0);
|
||||||
|
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
|
||||||
// Tell the FunctionEmitContext about the variable
|
// Tell the FunctionEmitContext about the variable
|
||||||
ctx->EmitVariableDebugInfo(sym);
|
ctx->EmitVariableDebugInfo(sym);
|
||||||
}
|
}
|
||||||
else {
|
else if (sym->type->IsUniformType() &&
|
||||||
#if 0
|
/* NVPTX:
|
||||||
if (sym->type->IsUniformType() &&
|
* only non-constant uniform data types are stored in shared memory
|
||||||
sym->type->IsArrayType() &&
|
* constant uniform are automatically promoted to varying
|
||||||
g->target->getISA() == Target::NVPTX)
|
*/
|
||||||
|
!sym->type->IsConstType() &&
|
||||||
|
#if 1
|
||||||
|
sym->type->IsArrayType() &&
|
||||||
|
#endif
|
||||||
|
g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
/* deal with "const uniform" or "uniform" arrays for nvptx target */
|
PerformanceWarning(sym->pos,
|
||||||
if (!sym->type->IsConstType())
|
"Non-constant \"uniform\" data types might be slow with \"nvptx\" target. "
|
||||||
PerformanceWarning(sym->pos,
|
"Unless data sharing between program instances is desired, try \"const [static] uniform\", \"varying\" or \"uniform new uniform \"+\"delete\" if possible.");
|
||||||
"\"uniform\" arrays might be slow with \"nvptx\" target. "
|
|
||||||
"Unless data sharing between program instances is required, use \"varying\" or \"uniform new\"+\"delete\" if possible.");
|
|
||||||
if (initExpr != NULL && !sym->type->IsConstType())
|
|
||||||
Error(initExpr->pos,
|
|
||||||
"It is not possible to initialize non-constant \"uniform\" array \"%s\" with \"nvptx\" target. "
|
|
||||||
"Please use \"varying\", \"const static uniform\" or define initializer in the global scope.",
|
|
||||||
sym->name.c_str());
|
|
||||||
|
|
||||||
if (sym->type->IsConstType())
|
/* with __shared__ memory everything must be an array */
|
||||||
|
int nel = 4;
|
||||||
|
ArrayType *nat;
|
||||||
|
if (sym->type->IsArrayType())
|
||||||
{
|
{
|
||||||
llvm::Constant *cinit = NULL;
|
|
||||||
if (initExpr != NULL) {
|
|
||||||
if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
|
|
||||||
continue;
|
|
||||||
// FIXME: we only need this for function pointers; it was
|
|
||||||
// already done for atomic types and enums in
|
|
||||||
// DeclStmt::TypeCheck()...
|
|
||||||
if (dynamic_cast<ExprList *>(initExpr) == NULL) {
|
|
||||||
initExpr = TypeConvertExpr(initExpr, sym->type,
|
|
||||||
"initializer");
|
|
||||||
// FIXME: and this is only needed to re-establish
|
|
||||||
// constant-ness so that GetConstant below works for
|
|
||||||
// constant artithmetic expressions...
|
|
||||||
initExpr = ::Optimize(initExpr);
|
|
||||||
}
|
|
||||||
|
|
||||||
cinit = initExpr->GetConstant(sym->type);
|
|
||||||
if (cinit == NULL)
|
|
||||||
Error(initExpr->pos, "Initializer for static variable "
|
|
||||||
"\"%s\" must be a constant.", sym->name.c_str());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Error(sym->pos, "Missing initializer for const variable "
|
|
||||||
"\"%s\".", sym->name.c_str());
|
|
||||||
}
|
|
||||||
if (cinit == NULL)
|
|
||||||
cinit = llvm::Constant::getNullValue(llvmType);
|
|
||||||
|
|
||||||
sym->storagePtr =
|
|
||||||
new llvm::GlobalVariable(*m->module, llvmType,
|
|
||||||
sym->type->IsConstType(),
|
|
||||||
llvm::GlobalValue::InternalLinkage,
|
|
||||||
cinit,
|
|
||||||
llvm::Twine("local_") +
|
|
||||||
llvm::Twine(sym->pos.first_line) +
|
|
||||||
llvm::Twine("_") + sym->name.c_str(),
|
|
||||||
NULL,
|
|
||||||
llvm::GlobalVariable::NotThreadLocal,
|
|
||||||
/*AddressSpace=*/4); /* constant address space */
|
|
||||||
// Tell the FunctionEmitContext about the variable
|
|
||||||
ctx->EmitVariableDebugInfo(sym);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* fails if pointer passed to function argument, need conversion beforehand */
|
|
||||||
llvm::Constant *cinit = NULL;
|
|
||||||
const ArrayType *at = CastType<ArrayType>(sym->type);
|
const ArrayType *at = CastType<ArrayType>(sym->type);
|
||||||
const int nel = at->GetElementCount();
|
nel = at->GetElementCount();
|
||||||
/* we must scale # elements by 4, because a thread-block will run 4 warps
|
/* we must scale # elements by 4, because a thread-block will run 4 warps
|
||||||
* or 128 threads.
|
* or 128 threads.
|
||||||
* ***note-to-me***:please define these value (128threads/4warps)
|
* ***note-to-me***:please define these value (128threads/4warps)
|
||||||
* in nvptx-target definition
|
* in nvptx-target definition
|
||||||
* instead of compile-time constants
|
* instead of compile-time constants
|
||||||
*/
|
*/
|
||||||
const int nel4 = nel*4;
|
nel *= 4;
|
||||||
ArrayType nat(at->GetElementType(), nel4);
|
nat = new ArrayType(at->GetElementType(), nel);
|
||||||
llvm::Type *llvmType = nat.LLVMType(g->ctx);
|
|
||||||
cinit = llvm::UndefValue::get(llvmType);
|
|
||||||
|
|
||||||
sym->storagePtr =
|
|
||||||
new llvm::GlobalVariable(*m->module, llvmType,
|
|
||||||
sym->type->IsConstType(),
|
|
||||||
llvm::GlobalValue::InternalLinkage,
|
|
||||||
cinit,
|
|
||||||
llvm::Twine("local_") +
|
|
||||||
llvm::Twine(sym->pos.first_line) +
|
|
||||||
llvm::Twine("_") + sym->name.c_str(),
|
|
||||||
NULL,
|
|
||||||
llvm::GlobalVariable::NotThreadLocal,
|
|
||||||
/*AddressSpace=*/3);
|
|
||||||
// Tell the FunctionEmitContext about the variable
|
|
||||||
ctx->EmitVariableDebugInfo(sym);
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
nat = new ArrayType(sym->type, nel);
|
||||||
|
|
||||||
|
llvm::Type *llvmTypeUn = nat->LLVMType(g->ctx);
|
||||||
|
llvm::Constant *cinit = llvm::UndefValue::get(llvmTypeUn);
|
||||||
|
|
||||||
|
sym->storagePtr =
|
||||||
|
new llvm::GlobalVariable(*m->module, llvmTypeUn,
|
||||||
|
sym->type->IsConstType(),
|
||||||
|
llvm::GlobalValue::InternalLinkage,
|
||||||
|
cinit,
|
||||||
|
llvm::Twine("local_") +
|
||||||
|
llvm::Twine(sym->pos.first_line) +
|
||||||
|
llvm::Twine("_") + sym->name.c_str(),
|
||||||
|
NULL,
|
||||||
|
llvm::GlobalVariable::NotThreadLocal,
|
||||||
|
/*AddressSpace=*/3);
|
||||||
|
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
|
||||||
|
llvm::PointerType *ptrTy = llvm::PointerType::get(sym->type->LLVMType(g->ctx),0);
|
||||||
|
sym->storagePtr = ctx->BitCastInst(sym->storagePtr, ptrTy, "uniform_decl");
|
||||||
|
|
||||||
|
// Tell the FunctionEmitContext about the variable; must do
|
||||||
|
// this before the initializer stuff.
|
||||||
|
ctx->EmitVariableDebugInfo(sym);
|
||||||
|
|
||||||
|
if (initExpr == 0 && sym->type->IsConstType())
|
||||||
|
Error(sym->pos, "Missing initializer for const variable "
|
||||||
|
"\"%s\".", sym->name.c_str());
|
||||||
|
|
||||||
|
// And then get it initialized...
|
||||||
|
sym->parentFunction = ctx->GetFunction();
|
||||||
|
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
||||||
}
|
}
|
||||||
else if (
|
|
||||||
sym->type->IsUniformType() &&
|
|
||||||
g->target->getISA() == Target::NVPTX)
|
|
||||||
{
|
|
||||||
#if 1
|
|
||||||
// For non-static variables, allocate storage on the stack
|
|
||||||
sym->storagePtr = ctx->AllocaInst(llvmType, sym->name.c_str());
|
|
||||||
#else
|
|
||||||
PerformanceWarning(sym->pos,
|
|
||||||
"\"uniform\" variables might be slow with \"nvptx\" target. "
|
|
||||||
"Please use \"varying\" if possible.");
|
|
||||||
|
|
||||||
ArrayType nat(sym->type, 4);
|
|
||||||
llvm::Type *llvmType = nat.LLVMType(g->ctx);
|
|
||||||
llvm::Constant *cinit = llvm::UndefValue::get(llvmType);
|
|
||||||
|
|
||||||
sym->storagePtr =
|
|
||||||
new llvm::GlobalVariable(*m->module, llvmType,
|
|
||||||
sym->type->IsConstType(),
|
|
||||||
llvm::GlobalValue::InternalLinkage,
|
|
||||||
cinit,
|
|
||||||
llvm::Twine("local_") +
|
|
||||||
llvm::Twine(sym->pos.first_line) +
|
|
||||||
llvm::Twine("_") + sym->name.c_str(),
|
|
||||||
NULL,
|
|
||||||
llvm::GlobalVariable::NotThreadLocal,
|
|
||||||
/*AddressSpace=*/3);
|
|
||||||
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
|
|
||||||
llvm::PointerType *ptrTy =
|
|
||||||
llvm::PointerType::get(sym->type->LLVMType(g->ctx),0);
|
|
||||||
sym->storagePtr = ctx->BitCastInst(sym->storagePtr, ptrTy, "uniform_alloc");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
// Tell the FunctionEmitContext about the variable; must do
|
|
||||||
// this before the initializer stuff.
|
|
||||||
ctx->EmitVariableDebugInfo(sym);
|
|
||||||
|
|
||||||
if (initExpr == 0 && sym->type->IsConstType())
|
|
||||||
Error(sym->pos, "Missing initializer for const variable "
|
|
||||||
"\"%s\".", sym->name.c_str());
|
|
||||||
|
|
||||||
// And then get it initialized...
|
|
||||||
sym->parentFunction = ctx->GetFunction();
|
|
||||||
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if (sym->type->IsUniformType() &&
|
|
||||||
sym->type->IsArrayType() &&
|
|
||||||
g->target->getISA() == Target::NVPTX)
|
|
||||||
{
|
|
||||||
PerformanceWarning(sym->pos,
|
|
||||||
"\"uniform\" data types might be slow with \"nvptx\" target. "
|
|
||||||
"Unless data sharing between program instances is desired, try either \"varying\" or \"uniform new uniform \"+\"delete\" if possible.");
|
|
||||||
|
|
||||||
int nel = 4;
|
|
||||||
ArrayType *nat;
|
|
||||||
if (sym->type->IsArrayType())
|
|
||||||
{
|
|
||||||
const ArrayType *at = CastType<ArrayType>(sym->type);
|
|
||||||
nel = at->GetElementCount();
|
|
||||||
/* we must scale # elements by 4, because a thread-block will run 4 warps
|
|
||||||
* or 128 threads.
|
|
||||||
* ***note-to-me***:please define these value (128threads/4warps)
|
|
||||||
* in nvptx-target definition
|
|
||||||
* instead of compile-time constants
|
|
||||||
*/
|
|
||||||
nel *= 4;
|
|
||||||
nat = new ArrayType(at->GetElementType(), nel);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
nat = new ArrayType(sym->type, nel);
|
|
||||||
llvm::Type *llvmType = nat->LLVMType(g->ctx);
|
|
||||||
llvm::Constant *cinit = llvm::UndefValue::get(llvmType);
|
|
||||||
|
|
||||||
sym->storagePtr =
|
|
||||||
new llvm::GlobalVariable(*m->module, llvmType,
|
|
||||||
sym->type->IsConstType(),
|
|
||||||
llvm::GlobalValue::InternalLinkage,
|
|
||||||
cinit,
|
|
||||||
llvm::Twine("local_") +
|
|
||||||
llvm::Twine(sym->pos.first_line) +
|
|
||||||
llvm::Twine("_") + sym->name.c_str(),
|
|
||||||
NULL,
|
|
||||||
llvm::GlobalVariable::NotThreadLocal,
|
|
||||||
/*AddressSpace=*/3);
|
|
||||||
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
|
|
||||||
llvm::PointerType *ptrTy =
|
|
||||||
llvm::PointerType::get(sym->type->LLVMType(g->ctx),0);
|
|
||||||
sym->storagePtr = ctx->BitCastInst(sym->storagePtr, ptrTy, "uniform_alloc");
|
|
||||||
|
|
||||||
// Tell the FunctionEmitContext about the variable; must do
|
|
||||||
// this before the initializer stuff.
|
|
||||||
ctx->EmitVariableDebugInfo(sym);
|
|
||||||
|
|
||||||
if (initExpr == 0 && sym->type->IsConstType())
|
|
||||||
Error(sym->pos, "Missing initializer for const variable "
|
|
||||||
"\"%s\".", sym->name.c_str());
|
|
||||||
|
|
||||||
// And then get it initialized...
|
|
||||||
sym->parentFunction = ctx->GetFunction();
|
|
||||||
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// For non-static variables, allocate storage on the stack
|
// For non-static variables, allocate storage on the stack
|
||||||
@@ -529,7 +402,6 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
sym->parentFunction = ctx->GetFunction();
|
sym->parentFunction = ctx->GetFunction();
|
||||||
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user