+fixed some example, found some bugs, and bugs in ptxas/cuda
This commit is contained in:
2
Makefile
2
Makefile
@@ -36,7 +36,7 @@
|
|||||||
|
|
||||||
# If you have your own special version of llvm and/or clang, change
|
# If you have your own special version of llvm and/or clang, change
|
||||||
# these variables to match.
|
# these variables to match.
|
||||||
LLVM_CONFIG=$(shell which $(HOME)/usr/local/llvm/bin-trunk/bin/llvm-config)
|
LLVM_CONFIG=$(shell which $(HOME)/usr/local/llvm/bin-3.4/bin/llvm-config)
|
||||||
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
|
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
|
||||||
|
|
||||||
# Enable ARM by request
|
# Enable ARM by request
|
||||||
|
|||||||
@@ -654,8 +654,18 @@ declare i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone
|
|||||||
declare i32 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone
|
declare i32 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone
|
||||||
|
|
||||||
define float @__reduce_add_float(<1 x float> %v) nounwind readonly alwaysinline {
|
define float @__reduce_add_float(<1 x float> %v) nounwind readonly alwaysinline {
|
||||||
%r = extractelement <1 x float> %v, i32 0
|
%value = extractelement <1 x float> %v, i32 0
|
||||||
ret float %r
|
%call = tail call float @__shfl_xor_float_nvptx(float %value, i32 16)
|
||||||
|
%call1 = fadd float %call, %value
|
||||||
|
%call.1 = tail call float @__shfl_xor_float_nvptx(float %call1, i32 8)
|
||||||
|
%call1.1 = fadd float %call1, %call.1
|
||||||
|
%call.2 = tail call float @__shfl_xor_float_nvptx(float %call1.1, i32 4)
|
||||||
|
%call1.2 = fadd float %call1.1, %call.2
|
||||||
|
%call.3 = tail call float @__shfl_xor_float_nvptx(float %call1.2, i32 2)
|
||||||
|
%call1.3 = fadd float %call1.2, %call.3
|
||||||
|
%call.4 = tail call float @__shfl_xor_float_nvptx(float %call1.3, i32 1)
|
||||||
|
%call1.4 = fadd float %call1.3, %call.4
|
||||||
|
ret float %call1.4
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @__reduce_min_float(<1 x float>) nounwind readnone {
|
define float @__reduce_min_float(<1 x float>) nounwind readnone {
|
||||||
|
|||||||
2
ctx.cpp
2
ctx.cpp
@@ -1410,7 +1410,7 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
|||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
if (1 || g->target->getISA() != Target::NVPTX)
|
||||||
{
|
{
|
||||||
llvm::SmallVector<llvm::Constant*, 16> array;
|
llvm::SmallVector<llvm::Constant*, 16> array;
|
||||||
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
|
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
|
||||||
|
|||||||
2
func.cpp
2
func.cpp
@@ -512,6 +512,7 @@ Function::GenerateIR() {
|
|||||||
if (g->target->getISA() == Target::NVPTX)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
functionName += std::string("___export"); /* add ___export to the end, for ptxcc to recognize it is exported */
|
functionName += std::string("___export"); /* add ___export to the end, for ptxcc to recognize it is exported */
|
||||||
|
#if 0
|
||||||
llvm::NamedMDNode* annotations =
|
llvm::NamedMDNode* annotations =
|
||||||
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||||
llvm::SmallVector<llvm::Value*, 3> av;
|
llvm::SmallVector<llvm::Value*, 3> av;
|
||||||
@@ -519,6 +520,7 @@ Function::GenerateIR() {
|
|||||||
av.push_back(llvm::MDString::get(*g->ctx, "kernel"));
|
av.push_back(llvm::MDString::get(*g->ctx, "kernel"));
|
||||||
av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1));
|
av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1));
|
||||||
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
llvm::Function *appFunction =
|
llvm::Function *appFunction =
|
||||||
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
|
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
|
||||||
|
|||||||
21
module.cpp
21
module.cpp
@@ -427,15 +427,6 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
if (g->target->getISA() == Target::NVPTX &&
|
|
||||||
type->IsVaryingType())
|
|
||||||
{
|
|
||||||
Error(pos, "Global \"varying\" variables are not yet supported in \"nvptx\" target.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (Type::Equal(type, AtomicType::Void)) {
|
if (Type::Equal(type, AtomicType::Void)) {
|
||||||
Error(pos, "\"void\" type global variable is illegal.");
|
Error(pos, "\"void\" type global variable is illegal.");
|
||||||
return;
|
return;
|
||||||
@@ -453,6 +444,17 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
if (g->target->getISA() == Target::NVPTX &&
|
||||||
|
at != NULL &&
|
||||||
|
type->IsVaryingType())
|
||||||
|
{
|
||||||
|
Error(pos, "Global \"varying\" variables are not yet supported in \"nvptx\" target.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
llvm::Type *llvmType = type->LLVMType(g->ctx);
|
llvm::Type *llvmType = type->LLVMType(g->ctx);
|
||||||
if (llvmType == NULL)
|
if (llvmType == NULL)
|
||||||
return;
|
return;
|
||||||
@@ -2130,6 +2132,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
|||||||
opts.addMacroDef("cif=if");
|
opts.addMacroDef("cif=if");
|
||||||
opts.addMacroDef("cfor=for");
|
opts.addMacroDef("cfor=for");
|
||||||
opts.addMacroDef("cwhile=while");
|
opts.addMacroDef("cwhile=while");
|
||||||
|
opts.addMacroDef("ccontinue=continue");
|
||||||
opts.addMacroDef("cdo=do");
|
opts.addMacroDef("cdo=do");
|
||||||
opts.addMacroDef("taskIndex=blockIndex0()");
|
opts.addMacroDef("taskIndex=blockIndex0()");
|
||||||
opts.addMacroDef("taskCount=blockCount0()");
|
opts.addMacroDef("taskCount=blockCount0()");
|
||||||
|
|||||||
4
opt.cpp
4
opt.cpp
@@ -497,7 +497,11 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
// run absolutely no optimizations, since the front-end needs us to
|
// run absolutely no optimizations, since the front-end needs us to
|
||||||
// take the various __pseudo_* functions it has emitted and turn
|
// take the various __pseudo_* functions it has emitted and turn
|
||||||
// them into something that can actually execute.
|
// them into something that can actually execute.
|
||||||
|
|
||||||
|
if (g->opt.disableGatherScatterOptimizations == false &&
|
||||||
|
g->target->getVectorWidth() > 1)
|
||||||
optPM.add(CreateImproveMemoryOpsPass(), 100);
|
optPM.add(CreateImproveMemoryOpsPass(), 100);
|
||||||
|
|
||||||
if (g->opt.disableHandlePseudoMemoryOps == false)
|
if (g->opt.disableHandlePseudoMemoryOps == false)
|
||||||
optPM.add(CreateReplacePseudoMemoryOpsPass());
|
optPM.add(CreateReplacePseudoMemoryOpsPass());
|
||||||
|
|
||||||
|
|||||||
@@ -257,7 +257,7 @@ def run_test(testname):
|
|||||||
cc_cmd = "%s %s -DTEST_SIG=%d -o %s" % \
|
cc_cmd = "%s %s -DTEST_SIG=%d -o %s" % \
|
||||||
(nvptxcc_exe_rel, obj_name, match, exe_name)
|
(nvptxcc_exe_rel, obj_name, match, exe_name)
|
||||||
|
|
||||||
ispc_cmd = ispc_exe_rel + " --woff %s -o %s --arch=%s --target=%s" % \
|
ispc_cmd = ispc_exe_rel + " --woff %s -o %s -O3 --arch=%s --target=%s" % \
|
||||||
(filename, obj_name, options.arch, options.target)
|
(filename, obj_name, options.arch, options.target)
|
||||||
if options.no_opt:
|
if options.no_opt:
|
||||||
ispc_cmd += " -O0"
|
ispc_cmd += " -O0"
|
||||||
@@ -271,7 +271,7 @@ def run_test(testname):
|
|||||||
print "Grepping: %s" % grep_cmd
|
print "Grepping: %s" % grep_cmd
|
||||||
sp = subprocess.Popen(grep_cmd, shell=True)
|
sp = subprocess.Popen(grep_cmd, shell=True)
|
||||||
sp.communicate()
|
sp.communicate()
|
||||||
ispc_cmd = ispc_exe_rel + " --woff %s -o %s --emit-asm --target=%s" % \
|
ispc_cmd = ispc_exe_rel + " --woff %s -o %s -O3 --emit-asm --target=%s" % \
|
||||||
(filename4ptx, obj_name, options.target)
|
(filename4ptx, obj_name, options.target)
|
||||||
|
|
||||||
# compile the ispc code, make the executable, and run it...
|
# compile the ispc code, make the executable, and run it...
|
||||||
@@ -287,7 +287,7 @@ def run_test(testname):
|
|||||||
basename = os.path.basename(filename)
|
basename = os.path.basename(filename)
|
||||||
os.unlink("%s.pdb" % basename)
|
os.unlink("%s.pdb" % basename)
|
||||||
os.unlink("%s.ilk" % basename)
|
os.unlink("%s.ilk" % basename)
|
||||||
os.unlink(obj_name)
|
# os.unlink(obj_name)
|
||||||
except:
|
except:
|
||||||
None
|
None
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
|
|||||||
|
|
||||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
float a = aFOO[programIndex];
|
float a = aFOO[programIndex];
|
||||||
float b = (programCount == 1) ? 3 : broadcast(a, 2);
|
float b = (programCount == 1) ? 4 : broadcast(a, 2);
|
||||||
RET[programIndex] = b;
|
RET[programIndex] = b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,8 +19,11 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
|
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[0] = RET[4] = RET[8] = RET[12] = 2;
|
for (int i = 0; i < programCount; i += 4)
|
||||||
RET[1] = RET[5] = RET[9] = RET[13] = 3;
|
{
|
||||||
RET[2] = RET[6] = RET[10] = RET[14] = 5;
|
RET[i+0] = 2;
|
||||||
RET[3] = RET[7] = RET[11] = RET[15] = 6;
|
RET[i+1] = 3;
|
||||||
|
RET[i+2] = 5;
|
||||||
|
RET[i+3] = 6;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,6 +18,9 @@ export void f_fu(uniform float RET[4], uniform float aFOO[4], uniform float b) {
|
|||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[programIndex] = 3;
|
RET[programIndex] = 3;
|
||||||
RET[0] = RET[4] = RET[8] = RET[12] = 1;
|
for (int i = 0; i < programCount; i += 4)
|
||||||
RET[3] = RET[7] = RET[11] = RET[15] = 29;
|
{
|
||||||
|
RET[i+0] = 1;
|
||||||
|
RET[i+3] = 29;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[programIndex] = 32;
|
RET[programIndex] = 32;
|
||||||
RET[2] = RET[6] = RET[10] = RET[14] = 38;
|
for (int i = 0; i < programCount; i += 4)
|
||||||
RET[3] = RET[7] = RET[11] = RET[15] = 39;
|
{
|
||||||
|
RET[i+2] = 38;
|
||||||
|
RET[i+3] = 39;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,14 +4,14 @@ export uniform int width() { return programCount; }
|
|||||||
|
|
||||||
|
|
||||||
struct Foo {
|
struct Foo {
|
||||||
uniform float x[17];
|
uniform float x[programCount+1];
|
||||||
};
|
};
|
||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
float a = aFOO[programIndex];
|
float a = aFOO[programIndex];
|
||||||
uniform Foo foo;
|
uniform Foo foo;
|
||||||
uniform int i;
|
uniform int i;
|
||||||
cfor (i = 0; i < 17; ++i)
|
cfor (i = 0; i < programCount+1; ++i)
|
||||||
foo.x[i] = i;
|
foo.x[i] = i;
|
||||||
|
|
||||||
if ((int)a & 1)
|
if ((int)a & 1)
|
||||||
|
|||||||
@@ -4,9 +4,9 @@ export uniform int width() { return programCount; }
|
|||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
float a = aFOO[programIndex];
|
float a = aFOO[programIndex];
|
||||||
uniform double udx[25][25];
|
uniform double udx[programCount+1][programCount+1];
|
||||||
cfor (uniform int i = 0; i < 25; ++i)
|
cfor (uniform int i = 0; i < programCount+1; ++i)
|
||||||
cfor (uniform int j = 0; j < 25; ++j)
|
cfor (uniform int j = 0; j < programCount+1; ++j)
|
||||||
udx[i][j] = 10*i+j;
|
udx[i][j] = 10*i+j;
|
||||||
|
|
||||||
int x = 1;
|
int x = 1;
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ export uniform int width() { return programCount; }
|
|||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
float a = aFOO[programIndex];
|
float a = aFOO[programIndex];
|
||||||
uniform float udx[20][20];
|
uniform float udx[programCount+1][programCount+1];
|
||||||
cfor (uniform int i = 0; i < 20; ++i)
|
cfor (uniform int i = 0; i < programCount+1; ++i)
|
||||||
cfor (uniform int j = 0; j < 20; ++j)
|
cfor (uniform int j = 0; j < programCount+1x; ++j)
|
||||||
udx[i][j] = 100*i+j;
|
udx[i][j] = 100*i+j;
|
||||||
|
|
||||||
int x = 1;
|
int x = 1;
|
||||||
|
|||||||
@@ -13,9 +13,9 @@ float func(Foo foo[], int offset) {
|
|||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
float a = aFOO[programIndex];
|
float a = aFOO[programIndex];
|
||||||
Foo foo[17];
|
Foo foo[programCount+1];
|
||||||
uniform int i;
|
uniform int i;
|
||||||
cfor (i = 0; i < 17; ++i)
|
cfor (i = 0; i < programCount+1; ++i)
|
||||||
foo[i].f = i*a;
|
foo[i].f = i*a;
|
||||||
RET[programIndex] = func(foo, (int)a);
|
RET[programIndex] = func(foo, (int)a);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,9 +13,9 @@ float func(Foo foo[], int offset) {
|
|||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
float a = aFOO[programIndex];
|
float a = aFOO[programIndex];
|
||||||
Foo foo[17];
|
Foo foo[programCount+1];
|
||||||
uniform int i;
|
uniform int i;
|
||||||
cfor (i = 0; i < 17; ++i)
|
cfor (i = 0; i < programCount+1; ++i)
|
||||||
foo[i].f = i*a;
|
foo[i].f = i*a;
|
||||||
RET[programIndex] = func(foo, (int)a);
|
RET[programIndex] = func(foo, (int)a);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,9 +9,9 @@ struct Foo {
|
|||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
float a = aFOO[programIndex];
|
float a = aFOO[programIndex];
|
||||||
Foo foo[17];
|
Foo foo[programCount+1];
|
||||||
uniform int i;
|
uniform int i;
|
||||||
cfor (i = 0; i < 17; ++i)
|
cfor (i = 0; i < programCount+1; ++i)
|
||||||
foo[i].f = i*a;
|
foo[i].f = i*a;
|
||||||
RET[programIndex] = foo[(int)a].f;
|
RET[programIndex] = foo[(int)a].f;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,8 +17,11 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
|
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[0] = RET[4] = RET[8] = RET[12] = 1;
|
for (int i = 0; i < programCount; i += 4)
|
||||||
RET[1] = RET[5] = RET[9] = RET[13] = 3;
|
{
|
||||||
RET[2] = RET[6] = RET[10] = RET[14] = 3;
|
RET[i+0] = 1;
|
||||||
RET[3] = RET[7] = RET[11] = RET[15] = 29;
|
RET[i+1] = 3;
|
||||||
|
RET[i+2] = 3;
|
||||||
|
RET[i+3] = 29;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,8 +17,11 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
|
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[0] = RET[4] = RET[8] = RET[12] = 1;
|
for (int i = 0; i < programCount; i += 4)
|
||||||
RET[1] = RET[5] = RET[9] = RET[13] = 3;
|
{
|
||||||
RET[2] = RET[6] = RET[10] = RET[14] = 3;
|
RET[i+0] = 1;
|
||||||
RET[3] = RET[7] = RET[11] = RET[15] = 29;
|
RET[i+1] = 3;
|
||||||
|
RET[i+2] = 3;
|
||||||
|
RET[i+3] = 29;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,8 +17,11 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
|
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[0] = RET[4] = RET[8] = RET[12] = 1;
|
for (int i = 0; i < programCount; i += 4)
|
||||||
RET[1] = RET[5] = RET[9] = RET[13] = 3;
|
{
|
||||||
RET[2] = RET[6] = RET[10] = RET[14] = 3;
|
RET[i+0] = 1;
|
||||||
RET[3] = RET[7] = RET[11] = RET[15] = 29;
|
RET[i+1] = 3;
|
||||||
|
RET[i+2] = 3;
|
||||||
|
RET[i+3] = 29;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,8 +8,11 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[0] = RET[4] = RET[8] = RET[12] = 0x0.0p+0;
|
for (int i = 0; i < programCount; i += 4)
|
||||||
RET[1] = RET[5] = RET[9] = RET[13] = 0x1.62e43p-1;
|
{
|
||||||
RET[2] = RET[6] = RET[10] = RET[14] = 0x1.193ea8p+0;
|
RET[i+0] = 0x0.0p+0;
|
||||||
RET[3] = RET[7] = RET[11] = RET[15] = 0x1.62e43p+0;
|
RET[i+1] = 0x1.62e43p-1;
|
||||||
|
RET[i+2] = 0x1.193ea8p+0;
|
||||||
|
RET[i+3] = 0x1.62e43p+0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user