This commit is contained in:
Evghenii
2014-02-20 11:20:13 +01:00
parent a8c5da0ae0
commit 24e1a98275
17 changed files with 4888 additions and 23 deletions

View File

@@ -705,6 +705,19 @@ define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline
ret float %ret ret float %ret
} }
define double @__rsqrt_uniform_double(double) nounwind readonly alwaysinline
{
%ret1 = call double @llvm.sqrt.f64(double %0)
%ret = fdiv double 1., %ret1
ret double %ret
}
define double @__rcp_uniform_double(double) nounwind readonly alwaysinline
{
%ret = fdiv double 1., %0
ret double %ret
}
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone alwaysinline define <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone alwaysinline
{ {
%v = extractelement <1 x float> %0, i32 0 %v = extractelement <1 x float> %0, i32 0
@@ -712,6 +725,13 @@ define <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
%rv = insertelement <1 x float> undef, float %r, i32 0 %rv = insertelement <1 x float> undef, float %r, i32 0
ret <WIDTH x float> %rv ret <WIDTH x float> %rv
} }
define <WIDTH x double> @__rcp_varying_double(<WIDTH x double>) nounwind readnone alwaysinline
{
%v = extractelement <1 x double> %0, i32 0
%r = call double @__rcp_uniform_double(double %v)
%rv = insertelement <1 x double> undef, double %r, i32 0
ret <WIDTH x double> %rv
}
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline
{ {
%v = extractelement <1 x float> %0, i32 0 %v = extractelement <1 x float> %0, i32 0
@@ -719,6 +739,14 @@ define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone
%rv = insertelement <1 x float> undef, float %r, i32 0 %rv = insertelement <1 x float> undef, float %r, i32 0
ret <WIDTH x float> %rv ret <WIDTH x float> %rv
} }
define <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>) nounwind readnone alwaysinline
{
%v = extractelement <1 x double> %0, i32 0
%r = call double @__rsqrt_uniform_double(double %v)
%rv = insertelement <1 x double> undef, double %r, i32 0
ret <WIDTH x double> %rv
}
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline
{ {
%v = extractelement <1 x float> %0, i32 0 %v = extractelement <1 x float> %0, i32 0
@@ -726,7 +754,6 @@ define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
%rv = insertelement <1 x float> undef, float %r, i32 0 %rv = insertelement <1 x float> undef, float %r, i32 0
ret <WIDTH x float> %rv ret <WIDTH x float> %rv
} }
;; declare double @__sqrt_uniform_double(double) nounwind readnone ;; declare double @__sqrt_uniform_double(double) nounwind readnone
define double @__sqrt_uniform_double(double) nounwind readonly alwaysinline { define double @__sqrt_uniform_double(double) nounwind readonly alwaysinline {
%ret = call double @llvm.sqrt.f64(double %0) %ret = call double @llvm.sqrt.f64(double %0)

25
examples/cuda_helpers.cuh Normal file
View File

@@ -0,0 +1,25 @@
#pragma once
#define programCount 32
#define programIndex (threadIdx.x & 31)
#define taskIndex0 (blockIdx.x*4 + (threadIdx.x >> 5))
#define taskCount0 (gridDim.x*4)
#define taskIndex1 (blockIdx.y)
#define taskCount1 (gridDim.y)
#define taskIndex2 (blockIdx.z)
#define taskCount2 (gridDim.z)
#define taskIndex (taskIndex0 + taskCount0*(taskIndex1 + taskCount1*taskIndex2))
#define taskCount (taskCount0*taskCount1*taskCount2)
#define warpIdx (threadIdx.x >> 5)
#define launch(ntx,nty,ntz,func) if (programIndex==0) func<<<dim3(((ntx)+4-1)/4,nty,ntz),128>>>
#define sync cudaDeviceSynchronize()
#define cif if
__device__ __forceinline__ static double __shfl(double x, int lane)
{
return __hiloint2double(
__shfl_xor(__double2hiint(x), lane),
__shfl_xor(__double2loint(x), lane));
}
#define shuffle(x,y) __shfl(x,y)
#define broadcast(x,y) __shfl(x,y)

54
examples/ispc_malloc.cpp Normal file
View File

@@ -0,0 +1,54 @@
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <cstring>
#include "ispc_malloc.h"
#ifdef _CUDA_
void * operator new(size_t size) throw(std::bad_alloc)
{
void *ptr;
ispc_malloc(&ptr, size);
return ptr;
}
void operator delete(void *ptr) throw()
{
ispc_free(ptr);
}
#else
void ispc_malloc(void **ptr, const size_t size)
{
*ptr = malloc(size);
}
void ispc_free(void *ptr)
{
free(ptr);
}
void ispc_memset(void *ptr, int value, size_t size)
{
memset(ptr, value, size);
}
void ispcSetMallocHeapLimit(size_t value)
{
}
void ispcSetStackLimit(size_t value)
{
}
unsigned long long ispcGetMallocHeapLimit()
{
return -1;
}
unsigned long long ispcGetStackLimit()
{
return -1;
}
void * ispcMemcpy(void *dest, void *src, size_t num)
{
memcpy(dest, src, num);
return dest;
}
#endif

10
examples/ispc_malloc.h Normal file
View File

@@ -0,0 +1,10 @@
#pragma once
extern void ispc_malloc(void **ptr, const size_t size);
extern void ispc_free(void *ptr);
extern void ispc_memset(void *ptr, int value, size_t size);
extern void ispcSetMallocHeapLimit(size_t value);
extern void ispcSetStackLimit(size_t value);
extern unsigned long long ispcGetMallocHeapLimit();
extern unsigned long long ispcGetStackLimit();
extern void * ispcMemcpy(void *dest, void *src, size_t num);

43
examples/nvcc_helpers.cu Normal file
View File

@@ -0,0 +1,43 @@
#ifndef _CUDA_
#error "Something went wrong..."
#endif
void ispc_malloc(void **ptr, const size_t size)
{
cudaMallocManaged(ptr, size);
}
void ispc_free(void *ptr)
{
cudaFree(ptr);
}
void ispc_memset(void *ptr, int value, size_t size)
{
cudaMemset(ptr, value, size);
}
void ispcSetMallocHeapLimit(size_t value)
{
cudaDeviceSetLimit(cudaLimitMallocHeapSize,value);
}
void ispcSetStackLimit(size_t value)
{
cudaDeviceSetLimit(cudaLimitStackSize,value);
}
unsigned long long ispcGetMallocHeapLimit()
{
size_t value;
cudaDeviceGetLimit(&value, cudaLimitMallocHeapSize);
return value;
}
unsigned long long ispcGetStackLimit()
{
size_t value;
cudaDeviceGetLimit(&value, cudaLimitStackSize);
return value;
}
void * ispcMemcpy(void *dest, void *src, size_t num)
{
cudaMemcpy(dest, src, num, cudaMemcpyDefault);
return dest;
}

20
nvptxcc
View File

@@ -1,20 +0,0 @@
#!/bin/sh
PATH=$ISPC_HOME/examples_ptx/ptxcc:$ISPC_HOME/examples_ptx/ptxgen:$PATH
PTXCC=ptxcc
ARGS=${@:2}
if [ "$NVVM" == "1" ];
then
LLVM32=$HOME/usr/local/llvm/bin-3.2
LLVMDIS=$LLVM32/bin/llvm-dis
PTXGEN=$ISPC_HOME/examples_ptx/ptxgen/ptxgen
$($LLVMDIS $1 -o $1.ll) && $($PTXGEN $1.ll > $1.ptx) && \
$($PTXCC $1.ptx -o $1.o -Xnvcc="-G") && \
$(nvcc test_static_nvptx.cpp examples_ptx/nvcc_helpers.cu examples_ptx/ispc_malloc.cpp $1.o -arch=sm_35 -Iexamples_ptx/ -D_CUDA_ -lcudadevrt $ARGS);
else
$($PTXCC $1 -o $1.o -Xnvcc="-G") && \
$(nvcc test_static_nvptx.cpp examples_ptx/nvcc_helpers.cu examples_ptx/ispc_malloc.cpp $1.o -arch=sm_35 -Iexamples_ptx/ -D_CUDA_ -lcudadevrt $ARGS);
fi

49
ptxtools/Makefile Normal file
View File

@@ -0,0 +1,49 @@
all: ptxcc ptxgen
CXX=clang++
CXXFLAGS += -O3
CXXFLAGS += -I/opt/local/include
LD=clang++
LDFLAGS += -L/opt/local/lib
FLEX=flex
BISON=bison
CUDATK=/usr/local/cuda
LIBDEVICE_MAJOR=1
LIBDEVICE_MINOR=0
ptxgrammar.cc : ptxgrammar.yy
$(BISON) -d -v -t ptxgrammar.yy -o ptxgrammar.cc
ptx.cc: ptx.ll ptxgrammar.cc
$(FLEX) -t ptx.ll > ptx.cc
%.o: %.cc
$(CXX) $(CXXFLAGS) -c $< -o $@
%.o: %.cpp
$(CXX) $(CXXFLAGS) -c $< -o $@
OBJ= ptxcc.o \
ptx.o \
ptxgrammar.o
ptxcc: $(OBJ)
$(LD) $(LDFLAGS) $^ -o $@
ptxgen: ptxgen.cpp
$(CXX) $(CXXFLAGS) -o $@ $< \
-L$(CUDATK)/nvvm/lib64 -lnvvm \
-I$(CUDATK)/nvvm/include \
-I$(CUDATK)/include \
-DLIBDEVICE_MAJOR_VERSION=$(LIBDEVICE_MAJOR) \
-DLIBDEVICE_MINOR_VERSION=$(LIBDEVICE_MINOR) \
-DLIBNVVM_HOME=$(CUDATK)/nvvm -Wl,-rpath,$(CUDATK)/nvvm/lib64
clean:
/bin/rm -f ptxgen ptxcc $(OBJ) ptxgrammar.hh ptxgrammar.cc ptx.cc ptxgrammar.output
$(OBJ): ptxgrammar.cc ptx.cc PTXParser.h PTXLexer.h

40
ptxtools/PTXLexer.h Normal file
View File

@@ -0,0 +1,40 @@
#pragma once
#include <cstring>
#include <cassert>
namespace parser
{
class PTXLexer;
class PTXParser;
}
#include "ptxgrammar.hh"
namespace parser
{
/*! \brief A wrapper around yyFlexLexer to allow for a local variable */
class PTXLexer : public ptxFlexLexer
{
public:
YYSTYPE* yylval;
int column;
int nextColumn;
public:
PTXLexer( std::istream* arg_yyin,
std::ostream* arg_yyout ) :
yyFlexLexer( arg_yyin, arg_yyout ), yylval( 0 ), column( 0 ),
nextColumn( 0 ) { }
int yylex();
int yylexPosition()
{
int token = yylex();
column = nextColumn;
nextColumn = column + strlen( YYText() );
return token;
}
};
}

254
ptxtools/PTXParser.h Normal file
View File

@@ -0,0 +1,254 @@
#pragma once
#undef yyFlexLexer
#define yyFlexLexer ptxFlexLexer
#include <FlexLexer.h>
#include "PTXLexer.h"
#include <vector>
#include <sstream>
#include <string>
namespace ptx
{
extern int yyparse( parser::PTXLexer&, parser::PTXParser& );
}
namespace parser
{
/*! \brief An implementation of the Parser interface for PTX */
class PTXParser
{
private:
typedef int token_t;
std::ostream &out;
std::string _identifier;
token_t _dataTypeId;
int _alignment;
bool isArgumentList, isReturnArgumentList;
struct argument_t
{
token_t type;
std::string name;
int dim;
argument_t(const token_t _type, const std::string &_name, const int _dim = 1) :
type(_type), name(_name), dim(_dim) {}
};
std::vector<argument_t> argumentList, returnArgumentList;
std::vector<int> arrayDimensionsList;
public:
PTXParser(std::ostream &_out) : out(_out)
{
isArgumentList = isReturnArgumentList = false;
_alignment = 1;
}
void printHeader()
{
std::stringstream s;
#if 0
s << "template<int N> struct __align__(N) b8_t { unsigned char _v[N]; __device__ b8_t() {}; __device__ b8_t (const int value) {}}; \n";
s << "template<int N> struct __align__(2*N) b16_t { unsigned short _v[N]; __device__ b16_t() {}; __device__ b16_t(const int value) {}}; \n";
#else
s << "template<int N> struct b8_t { unsigned char _v[N]; __device__ b8_t() {}; __device__ b8_t (const int value) {}}; \n";
s << "template<int N> struct b16_t { unsigned short _v[N]; __device__ b16_t() {}; __device__ b16_t(const int value) {}}; \n";
#endif
s << "struct b8d_t { unsigned char _v[1]; }; \n";
s << "struct b16d_t { unsigned short _v[1]; }; \n";
s << "typedef unsigned int b32_t; \n";
s << "typedef unsigned int u32_t; \n";
s << "typedef int s32_t; \n";
s << "typedef unsigned long long b64_t; \n";
s << "typedef unsigned long long u64_t; \n";
s << "typedef long long s64_t; \n";
s << "typedef float f32_t; \n";
s << "typedef double f64_t; \n";
s << " \n";
out << s.str();
}
#define LOC YYLTYPE& location
void identifier(const std::string &s) { _identifier = s; }
void dataTypeId(const token_t token) { _dataTypeId = token; }
void argumentListBegin(LOC) { isArgumentList = true; }
void argumentListEnd (LOC) { isArgumentList = false; }
void returnArgumentListBegin(LOC) { isReturnArgumentList = true; }
void returnArgumentListEnd (LOC) { isReturnArgumentList = false; }
void argumentDeclaration(LOC)
{
assert(arrayDimensionsList.size() <= 1);
const int dim = arrayDimensionsList.empty() ? 1 : arrayDimensionsList[0];
const argument_t arg(_dataTypeId, _identifier, dim);
if (isArgumentList)
argumentList.push_back(arg);
else if (isReturnArgumentList)
returnArgumentList.push_back(arg);
else
assert(0);
arrayDimensionsList.clear();
}
void alignment(const int value) { _alignment = value; }
void arrayDimensions(const int value)
{
arrayDimensionsList.push_back(value);
}
std::string printArgument(const argument_t arg, const bool printDataType = true)
{
std::stringstream s;
if (printDataType)
s << tokenToDataType(arg.type, arg.dim) << " ";
s << arg.name << " ";
return s.str();
}
std::string printArgumentList(const bool printDataType = true)
{
std::stringstream s;
if (argumentList.empty()) return s.str();
const int n = argumentList.size();
s << " " << printArgument(argumentList[0], printDataType);
for (int i = 1; i < n; i++)
s << ",\n " << printArgument(argumentList[i], printDataType);
return s.str();
}
void visibleEntryDeclaration(const std::string &calleeName, LOC)
{
std::stringstream s;
assert(returnArgumentList.empty());
s << "extern \"C\" \n";
s << "__global__ void " << calleeName << " (\n";
s << printArgumentList();
s << "\n ) { asm(\" // entry \"); }\n";
/* check if this is an "export" entry */
const int entryNameLength = calleeName.length();
const int hostNameLength = std::max(entryNameLength-9,0);
const std::string ___export(&calleeName.c_str()[hostNameLength]);
if (___export.compare("___export") == 0)
{
std::string hostCalleeName;
hostCalleeName.append(calleeName.c_str(), hostNameLength);
s << "/*** host interface ***/\n";
s << "extern \"C\" \n";
s << "__host__ void " << hostCalleeName << " (\n";
s << printArgumentList();
s << "\n )\n";
s << "{\n ";
// s << " cudaFuncSetCacheConfig (" << calleeName << ", ";
s << " cudaDeviceSetCacheConfig (";
#if 1
s << " cudaFuncCachePreferEqual ";
#elif 1
s << " cudaFuncCachePreferL1 ";
#else
s << " cudaFuncCachePreferShared ";
#endif
s << ");\n";
s << calleeName;
s << "<<<1,32>>>(\n";
s << printArgumentList(false);
s << ");\n";
s << " cudaDeviceSynchronize(); \n";
s << "}\n";
}
s << "\n";
argumentList.clear();
out << s.str();
}
void visibleFunctionDeclaration(const std::string &calleeName, LOC)
{
std::stringstream s;
assert(returnArgumentList.size() < 2);
s << "extern \"C\" \n";
s << "__device__ ";
if (returnArgumentList.empty())
s << " void ";
else
s << " " << tokenToDataType(returnArgumentList[0].type, returnArgumentList[0].dim);
s << calleeName << " (\n";
s << printArgumentList();
if (returnArgumentList.empty())
s << "\n ) { asm(\" // function \"); }\n\n";
else
{
s << "\n ) { asm(\" // function \"); return 0;} /* return value to disable warnings */\n\n";
// s << "\n ) { asm(\" // function \"); } /* this will generate warrning */\n\n";
}
argumentList.clear();
returnArgumentList.clear();
out << s.str();
}
void visibleInitializableDeclaration(const std::string &name, LOC)
{
assert(arrayDimensionsList.size() == 1);
std::stringstream s;
s << "extern \"C\" __device__ ";
if (_alignment > 0)
s << "__attribute__((aligned(" << _alignment << "))) ";
s << tokenToDataType(_dataTypeId, 0);
if (arrayDimensionsList[0] == 0)
s << name << ";\n\n";
else
s << name << "[" << arrayDimensionsList[0] << "] = {0};\n\n";
out << s.str();
arrayDimensionsList.clear();
}
#undef LOC
std::string tokenToDataType( token_t token , int dim)
{
std::stringstream s;
switch( token )
{
case TOKEN_B8:
if (dim > 0) s << "b8_t<"<<dim<<"> ";
else s << "b8d_t ";
break;
case TOKEN_U8: assert(0); s << "u8_t "; break;
case TOKEN_S8: assert(0); s << "s8_t "; break;
case TOKEN_B16:
if (dim > 0) s << "b16_t<"<<dim<<"> ";
else s << "b16d_t ";
break;
case TOKEN_U16: assert(0); s << "u16_t "; break;
case TOKEN_S16: assert(0); s << "s16_t "; break;
case TOKEN_B32: assert(dim <= 1); s << "b32_t "; break;
case TOKEN_U32: assert(dim <= 1); s << "u32_t "; break;
case TOKEN_S32: assert(dim <= 1); s << "s32_t "; break;
case TOKEN_B64: assert(dim <= 1); s << "b64_t "; break;
case TOKEN_U64: assert(dim <= 1); s << "u64_t "; break;
case TOKEN_S64: assert(dim <= 1); s << "s64_t "; break;
case TOKEN_F32: assert(dim <= 1); s << "f32_t "; break;
case TOKEN_F64: assert(dim <= 1); s << "f64_t "; break;
default: std::cerr << "token= " << token<< std::endl; assert(0);
}
return s.str();
}
};
}

19
ptxtools/alloy_ptxcc.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/bin/sh
PTXCC=$ISPC_HOME/ptxtools/ptxcc
PTXGEN=$ISPC_HOME/ptxtools/ptxgen
ARGS=${@:2}
if [ "$NVVM" == "1" ];
then
LLVM32=$HOME/usr/local/llvm/bin-3.2
LLVMDIS=$LLVM32/bin/llvm-dis
$($LLVMDIS $1 -o $1.ll) && $($PTXGEN $1.ll -o $1.ptx) && \
$($PTXCC $1.ptx -o $1.o -Xnvcc="-G") && \
$(nvcc test_static_nvptx.cpp examples/nvcc_helpers.cu examples/ispc_malloc.cpp $1.o -arch=sm_35 -Iexamples/ -D_CUDA_ -lcudadevrt $ARGS);
else
$($PTXCC $1 -o $1.o -Xnvcc="-G") && \
$(nvcc test_static_nvptx.cpp examples/nvcc_helpers.cu examples/ispc_malloc.cpp $1.o -arch=sm_35 -Iexamples/ -D_CUDA_ -lcudadevrt $ARGS);
fi

1458
ptxtools/main.cpp Normal file

File diff suppressed because it is too large Load Diff

79
ptxtools/ptx.ll Normal file
View File

@@ -0,0 +1,79 @@
%option yylineno
%option noyywrap
%option yyclass="parser::PTXLexer"
%option prefix="ptx"
%option c++
%{
#include "PTXLexer.h"
#include <cassert>
#include <sstream>
#include <cstring>
#ifdef LLSETTOKEN
#error "TOKEN is defined"
#endif
#define LLSETTOKEN(tok) yylval->ivalue = tok; return tok;
%}
COMMENT ("//"[^\n]*)
TAB [\t]*
%%
{COMMENT} {nextColumn += strlen(yytext); /* lCppComment(&yylloc); */ }
".version" { return TOKEN_VERSION; }
".target" { return TOKEN_TARGET; }
".address_size" { return TOKEN_ADDRESS_SIZE; }
".func" { return TOKEN_FUNC; }
".entry" { return TOKEN_ENTRY; }
".align" { return TOKEN_ALIGN; }
".visible" { return TOKEN_VISIBLE; }
".global" { return TOKEN_GLOBAL; }
".param" { return TOKEN_PARAM; }
".b0" { LLSETTOKEN( TOKEN_B32);} /* fix for buggy llvm-ptx generator */
".b8" { LLSETTOKEN( TOKEN_B8);}
".b16" { LLSETTOKEN( TOKEN_B16);}
".b32" { LLSETTOKEN( TOKEN_B32);}
".b64" { LLSETTOKEN( TOKEN_B64);}
".u8" { LLSETTOKEN( TOKEN_U8);}
".u16" { LLSETTOKEN( TOKEN_U16);}
".u32" { LLSETTOKEN( TOKEN_U32);}
".u64" { LLSETTOKEN( TOKEN_U64);}
".s8" { LLSETTOKEN( TOKEN_S8);}
".s16" { LLSETTOKEN( TOKEN_S16);}
".s32" { LLSETTOKEN( TOKEN_S32);}
".s64" { LLSETTOKEN( TOKEN_S64);}
".f32" { LLSETTOKEN( TOKEN_F32);}
".f64" { LLSETTOKEN( TOKEN_F64);}
"[" { return '[';}
"]" { return ']';}
"(" { return '(';}
")" { return ')';}
"," { return ',';}
";" { return ';';}
"=" { return '=';}
[0-9]+\.[0-9]+ { yylval->fvalue = atof(yytext); return TOKEN_FLOAT; }
[0-9]+ { yylval->ivalue = atoi(yytext); return TOKEN_INT; }
[a-zA-Z0-9_]+ { strcpy(yylval->svalue, yytext); return TOKEN_STRING;}
\n {
// yylloc.last_line++;
// yylloc.last_column = 1;
nextColumn = 1;
}
. ;
%%
/** Handle a C++-style comment--eat everything up until the end of the line.
*/
#if 0
static void
lCppComment(SourcePos *pos) {
char c;
do {
c = yyinput();
} while (c != 0 && c != '\n');
if (c == '\n') {
pos->last_line++;
pos->last_column = 1;
}
}
#endif

275
ptxtools/ptxcc.cpp Normal file
View File

@@ -0,0 +1,275 @@
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <cassert>
#include <algorithm>
#include <sys/time.h>
#include "PTXParser.h"
/*
* The C++ code below is based on the following bash-script:
#!/bin/sh
PTXSRC=$1__tmp_ptx.ptx
PTXCU=$1___tmp_ptx.cu
PTXSH=$1___tmp_ptx.sh
NVCCPARM=${@:2}
DEPTX=dePTX
NVCC=nvcc
$(cat $1 | sed 's/\.b0/\.b32/g' > $PTXSRC) &&
$DEPTX < $PTXSRC > $PTXCU &&
$NVCC -arch=sm_35 -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \
sed 's/\#\$//g'| \
awk '{ if ($1 == "LIBRARIES=") print $1$2; else if ($1 == "cicc") print "cp '$PTXSRC'", $NF; else print $0 }' > $PTXSH &&
sh $PTXSH
# rm $PTXCU $PTXSH
*
*/
static char lRandomAlNum()
{
const char charset[] =
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz";
const size_t max_index = (sizeof(charset) - 1);
return charset[ rand() % max_index ];
}
static std::string lRandomString(const size_t length)
{
timeval t1;
gettimeofday(&t1, NULL);
srand(t1.tv_usec * t1.tv_sec);
std::string str(length,0);
std::generate_n( str.begin(), length, lRandomAlNum);
return str;
}
static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128])
{
// Copy over the command line arguments (passed in)
for (int i = 0; i < Argc; ++i)
argv[i] = Argv[i];
argc = Argc;
}
const char *lGetExt (const char *fspec)
{
char *e = strrchr (fspec, '.');
return e;
}
static std::vector<std::string> lSplitString(const std::string &s, char delim)
{
std::vector<std::string> elems;
std::stringstream ss(s);
std::string item;
while (std::getline(ss, item, delim)) {
if (!item.empty())
elems.push_back(item);
}
return elems;
}
static void lUsage(const int ret)
{
fprintf(stdout, "\nusage: ptxcc [options] file.ptx \n");
fprintf(stdout, " [--help]\t\t\t\t This help\n");
fprintf(stdout, " [--verbose]\t\t\t\t Be verbose\n");
fprintf(stdout, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35");
fprintf(stdout, " [-o <name>]\t\t\t\t Output file name\n");
fprintf(stdout, " [-Xnvcc=<arguments>]\t\t Arguments to pass through to \"nvcc\"\n");
fprintf(stdout, " \n");
exit(ret);
}
int main(int _argc, char * _argv[])
{
int argc;
char *argv[128];
lGetAllArgs(_argc, _argv, argc, argv);
std::string arch="sm_35";
std::string filePTX;
std::string fileOBJ;
std::string extString = ".ptx";
bool keepTemporaries = false;
bool verbose = false;
std::string nvccArguments;
for (int i = 1; i < argc; ++i)
{
if (!strcmp(argv[i], "--help"))
lUsage(0);
else if (!strncmp(argv[i], "--arch=", 7))
arch = std::string(argv[i]+7);
else if (!strncmp(argv[i], "--keep-temporaries", 11))
keepTemporaries = true;
else if (!strncmp(argv[i], "--verbose", 9))
verbose = true;
else if (!strncmp(argv[i], "-Xnvcc=", 7))
nvccArguments = std::string(argv[i]+7);
else if (!strcmp(argv[i], "-o"))
{
if (++i == argc)
{
fprintf(stderr, "No output file specified after -o option.\n");
lUsage(1);
}
fileOBJ = std::string(argv[i]);
}
else
{
const char * ext = strrchr(argv[i], '.');
if (ext == NULL)
{
fprintf(stderr, " Unknown argument: %s \n", argv[i]);
lUsage(1);
}
else if (strncmp(ext, extString.c_str(), 4))
{
fprintf(stderr, " Unkown extension of the input file: %s \n", ext);
lUsage(1);
}
else if (filePTX.empty())
{
filePTX = std::string(argv[i]);
if (fileOBJ.empty())
{
char * baseName = argv[i];
while (baseName != ext)
fileOBJ += std::string(baseName++,1);
}
fileOBJ += ".o";
}
}
}
#if 0
fprintf(stderr, " fileOBJ= %s\n", fileOBJ.c_str());
fprintf(stderr, " arch= %s\n", arch.c_str());
fprintf(stderr, " file= %s\n", filePTX.empty() ? "$stdin" : filePTX.c_str());
fprintf(stderr, " num_args= %d\n", (int)nvccArgumentList.size());
for (int i= 0; i < (int)nvccArgumentList.size(); i++)
fprintf(stderr, " arg= %d : %s \n", i, nvccArgumentList[i].c_str());
#endif
assert(arch == std::string("sm_35"));
if (filePTX.empty())
{
fprintf(stderr, "ptxcc fatal : No input file specified; use option --help for more information\n");
exit(1);
}
// open a file handle to a particular file:
std::ifstream inputPTX(filePTX.c_str());
if (!inputPTX)
{
fprintf(stderr, "ptxcc: error: %s: No such file\n", filePTX.c_str());
exit(1);
}
std::string randomBaseName = std::string("/tmp/") + lRandomString(8) + "_" + lSplitString(lSplitString(filePTX,'/').back(),'.')[0];
if (verbose)
fprintf(stderr, "baseFileName= %s\n", randomBaseName.c_str());
std::string fileCU= randomBaseName + ".cu";
std::ofstream outputCU(fileCU.c_str());
assert(outputCU);
std::istream & input = inputPTX;
std::ostream & output = outputCU;
std::ostream & error = std::cerr;
parser::PTXLexer lexer(&input, &error);
parser::PTXParser state(output);
// parse through the input until there is no more:
//
do {
ptx::yyparse(lexer, state);
}
while (!input.eof());
inputPTX.close();
outputCU.close();
// process output from nvcc
//
/* nvcc -dc -arch=$arch -dryrun -argumentlist fileCU */
std::string fileSH= randomBaseName + ".sh";
std::string nvccExe("nvcc");
std::string nvccCmd;
nvccCmd += nvccExe + std::string(" ");
nvccCmd += "-dc ";
nvccCmd += std::string("-arch=") + arch + std::string(" ");
nvccCmd += "-dryrun ";
nvccCmd += nvccArguments + std::string(" ");
nvccCmd += std::string("-o ") + fileOBJ + std::string(" ");
nvccCmd += fileCU + std::string(" ");
nvccCmd += std::string("2> ") + fileSH;
if (verbose)
fprintf(stderr , "%s\n", nvccCmd.c_str());
const int nvccRet = std::system(nvccCmd.c_str());
if (nvccRet)
fprintf(stderr, "FAIL: %s\n", nvccCmd.c_str());
std::ifstream inputSH(fileSH.c_str());
assert(inputSH);
std::vector<std::string> nvccSteps;
while (!inputSH.eof())
{
nvccSteps.push_back(std::string());
std::getline(inputSH, nvccSteps.back());
if (nvccRet)
fprintf(stderr, " %s\n", nvccSteps.back().c_str());
}
inputSH.close();
if (nvccRet)
exit(-1);
for (int i = 0; i < (int)nvccSteps.size(); i++)
{
std::string cmd = nvccSteps[i];
for (int j = 0; j < (int)cmd.size()-1; j++)
if (cmd[j] == '#' && cmd[j+1] == '$')
cmd[j] = cmd[j+1] = ' ';
std::vector<std::string> splitCmd = lSplitString(cmd, ' ');
if (!splitCmd.empty())
{
if (splitCmd[0] == std::string("cicc"))
cmd = std::string(" cp ") + filePTX + std::string(" ") + splitCmd.back();
if (splitCmd[0] == std::string("LIBRARIES="))
cmd = "";
}
nvccSteps[i] = cmd;
if (verbose)
fprintf(stderr, "%3d: %s\n", i, cmd.c_str());
const int ret = std::system(cmd.c_str());
if (ret)
{
fprintf(stderr, " Something went wrong .. \n");
for (int j = 0; j < i; j++)
fprintf(stderr, "PASS: %s\n", nvccSteps[j].c_str());
fprintf(stderr, "FAIL: %s\n", nvccSteps[i].c_str());
exit(-1);
}
}
if (!keepTemporaries)
{
/* remove temporaries */
}
}

408
ptxtools/ptxgen.cpp Normal file
View File

@@ -0,0 +1,408 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cassert>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <nvvm.h>
#include <sys/stat.h>
template<typename T>
static std::string lValueToString(const T& value)
{
std::ostringstream oss;
oss << value;
return oss.str();
}
typedef struct stat Stat;
#define PTXGENStatus int
enum {
PTXGEN_SUCCESS = 0x0000,
PTXGEN_FILE_IO_ERROR = 0x0001,
PTXGEN_BAD_ALLOC_ERROR = 0x0002,
PTXGEN_LIBNVVM_COMPILATION_ERROR = 0x0004,
PTXGEN_LIBNVVM_ERROR = 0x0008,
PTXGEN_INVALID_USAGE = 0x0010,
PTXGEN_LIBNVVM_HOME_UNDEFINED = 0x0020,
PTXGEN_LIBNVVM_VERIFICATION_ERROR = 0x0040
};
static PTXGENStatus getLibDeviceName(const int computeArch, std::string &libDeviceName)
{
const char *env = getenv("LIBNVVM_HOME");
#ifdef LIBNVVM_HOME
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
const std::string libnvvmPath(env ? env : TOSTRING(LIBNVVM_HOME));
#undef TOSTRING
#undef STRINGIFY
#else
const std::string libnvvmPath(env);
#endif
if (libnvvmPath.empty())
{
fprintf(stderr, "The environment variable LIBNVVM_HOME is undefined\n");
return PTXGEN_LIBNVVM_HOME_UNDEFINED;
}
/* Use libdevice for compute_20, if the target is not compute_20, compute_30,
* or compute_35. */
const std::string libdevice =
std::string("/libdevice/libdevice.compute_") +
lValueToString(computeArch)+ "." +
lValueToString(LIBDEVICE_MAJOR_VERSION) +
lValueToString(LIBDEVICE_MINOR_VERSION) +
".bc";
libDeviceName = libnvvmPath + libdevice;
return PTXGEN_SUCCESS;
}
static PTXGENStatus addFileToProgram(const std::string &filename, nvvmProgram prog)
{
char *buffer;
size_t size;
Stat fileStat;
/* Open the input file. */
FILE *f = fopen(filename.c_str(), "rb");
if (f == NULL) {
fprintf(stderr, "Failed to open %s\n", filename.c_str());
return PTXGEN_FILE_IO_ERROR;
}
/* Allocate buffer for the input. */
fstat(fileno(f), &fileStat);
buffer = (char *) malloc(fileStat.st_size);
if (buffer == NULL) {
fprintf(stderr, "Failed to allocate memory\n");
return PTXGEN_BAD_ALLOC_ERROR;
}
size = fread(buffer, 1, fileStat.st_size, f);
if (ferror(f)) {
fprintf(stderr, "Failed to read %s\n", filename.c_str());
fclose(f);
free(buffer);
return PTXGEN_FILE_IO_ERROR;
}
fclose(f);
if (nvvmAddModuleToProgram(prog, buffer, size, filename.c_str()) != NVVM_SUCCESS) {
fprintf(stderr,
"Failed to add the module %s to the compilation unit\n",
filename.c_str());
free(buffer);
return PTXGEN_LIBNVVM_ERROR;
}
free(buffer);
return PTXGEN_SUCCESS;
}
static PTXGENStatus generatePTX(
std::vector<std::string> nvvmOptions,
std::vector<std::string> nvvmFiles,
std::ostream &out,
const int computeArch)
{
nvvmProgram prog;
PTXGENStatus status;
/* Create the compiliation unit. */
if (nvvmCreateProgram(&prog) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to create the compilation unit\n");
return PTXGEN_LIBNVVM_ERROR;
}
/* Add libdevice. */
std::string libDeviceName;
status = getLibDeviceName(computeArch, libDeviceName);
if (status != PTXGEN_SUCCESS)
{
nvvmDestroyProgram(&prog);
return status;
}
status = addFileToProgram(libDeviceName, prog);
if (status != PTXGEN_SUCCESS)
{
fprintf(stderr, "Please double-check LIBNVVM_HOME environmental variable.\n");
nvvmDestroyProgram(&prog);
return status;
}
/* Add the module to the compilation unit. */
for (int i = 0; i < (int)nvvmFiles.size(); ++i)
{
status = addFileToProgram(nvvmFiles[i], prog);
if (status != PTXGEN_SUCCESS)
{
nvvmDestroyProgram(&prog);
return status;
}
}
const int numOptions = nvvmOptions.size();
std::vector<const char*> options(numOptions);
for (int i = 0; i < numOptions; i++)
options[i] = nvvmOptions[i].c_str();
/* Verify the compilation unit. */
if (nvvmVerifyProgram(prog, numOptions, &options[0]) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to verify the compilation unit\n");
status |= PTXGEN_LIBNVVM_VERIFICATION_ERROR;
}
/* Print warnings and errors. */
{
size_t logSize;
if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to get the compilation log size\n");
status |= PTXGEN_LIBNVVM_ERROR;
}
else
{
std::string log(logSize,0);
if (nvvmGetProgramLog(prog, &log[0]) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to get the compilation log\n");
status |= PTXGEN_LIBNVVM_ERROR;
}
else
{
fprintf(stderr, "%s\n", log.c_str());
}
}
}
if (status & PTXGEN_LIBNVVM_VERIFICATION_ERROR)
{
nvvmDestroyProgram(&prog);
return status;
}
/* Compile the compilation unit. */
if (nvvmCompileProgram(prog, numOptions, &options[0]) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to generate PTX from the compilation unit\n");
status |= PTXGEN_LIBNVVM_COMPILATION_ERROR;
}
else
{
size_t ptxSize;
if (nvvmGetCompiledResultSize(prog, &ptxSize) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to get the PTX output size\n");
status |= PTXGEN_LIBNVVM_ERROR;
}
else
{
std::string ptx(ptxSize,0);
if (nvvmGetCompiledResult(prog, &ptx[0]) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to get the PTX output\n");
status |= PTXGEN_LIBNVVM_ERROR;
}
else
{
out << ptx;
}
}
}
/* Print warnings and errors. */
{
size_t logSize;
if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to get the compilation log size\n");
status |= PTXGEN_LIBNVVM_ERROR;
}
else
{
std::string log(logSize,0);
if (nvvmGetProgramLog(prog, &log[0]) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to get the compilation log\n");
status |= PTXGEN_LIBNVVM_ERROR;
}
else
{
fprintf(stderr, "%s\n", log.c_str());
}
}
}
/* Release the resources. */
nvvmDestroyProgram(&prog);
return PTXGEN_SUCCESS;
}
static void showUsage()
{
fprintf(stderr,"Usage: ptxgen [OPTION]... [FILE]...\n"
" [FILE] could be a .bc file or a .ll file\n");
}
static void lUsage(const int ret)
{
fprintf(stdout, "\nusage: ptxgen [options] file.[ll,bc] \n");
fprintf(stdout, " [--help]\t\t This help\n");
fprintf(stdout, " [--verbose]\t\t Be verbose\n");
fprintf(stdout, " [--arch={%s}]\t GPU target architecture\n", "sm_35");
fprintf(stdout, " [-o <name>]\t\t Output file name\n");
fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n");
fprintf(stdout, " [--opt=]\t\t Optimization parameters \n");
fprintf(stdout, " \t\t\t 0 - disable optimizations \n");
fprintf(stdout, " \t\t\t 3 - defalt, enable optimizations \n");
fprintf(stdout, " [--ftz=]\t\t Flush-to-zero mode when performsing single-precision floating-point operations\n");
fprintf(stdout, " \t\t\t 0 - default, preserve denormal values\n");
fprintf(stdout, " \t\t\t 1 - flush denormal values to zero\n");
fprintf(stdout, " [--prec-sqrt=]\t Precision mode for single-precision floating-point square root\n");
fprintf(stdout, " \t\t\t 0 - use a faster approximation\n");
fprintf(stdout, " \t\t\t 1 - default, use IEEE round-to-nearest mode\n");
fprintf(stdout, " [--prec-div=]\t Precision mode for single-precision floating-point division and reciprocals\n");
fprintf(stdout, " \t\t\t 0 - use a faster approximation\n");
fprintf(stdout, " \t\t\t 1 - default, use IEEE round-to-nearest mode\n");
fprintf(stdout, " [--fma=]\t\t FMA contraction mode \n");
fprintf(stdout, " \t\t\t 0 - disable\n");
fprintf(stdout, " \t\t\t 1 - default, enable\n");
fprintf(stdout, " [--use_fast_math]\t Make use of fast maih. Implies --ftz=1 --prec-div=0 --prec-sqrt=0\n");
fprintf(stdout, " \n");
exit(ret);
}
int main(int argc, char *argv[])
{
int _opt = 3;
int _ftz = 0;
int _precSqrt = 1;
int _precDiv = 1;
int _fma = 1;
bool _useFastMath = false;
bool _debug = false;
bool _verbose = false;
std::string _arch = "sm_35";
std::string fileIR, filePTX;
for (int i = 1; i < argc; ++i)
{
if (!strcmp(argv[i], "--help"))
lUsage(0);
else if (!strncmp(argv[i], "--arch=", 7))
_arch = std::string(argv[i]+7);
else if (!strncmp(argv[i], "-g", 2))
_debug = true;
else if (!strncmp(argv[i], "--verbose", 9))
_verbose = true;
else if (!strncmp(argv[i], "--opt=", 6))
_opt = atoi(argv[i]+6);
else if (!strncmp(argv[i], "--ftz=", 6))
_ftz = atoi(argv[i]+6);
else if (!strncmp(argv[i], "--prec-sqrt=", 12))
_precSqrt = atoi(argv[i]+12);
else if (!strncmp(argv[i], "--prec-div=", 11))
_precDiv = atoi(argv[i]+11);
else if (!strncmp(argv[i], "--fma=", 6))
_fma = atoi(argv[i]+6);
else if (!strncmp(argv[i], "--use_fast_math", 15))
_useFastMath = true;
else if (!strcmp(argv[i], "-o"))
{
if (++i == argc)
{
fprintf(stderr, "No output file specified after -o option.\n");
lUsage(1);
}
filePTX = std::string(argv[i]);
}
else
{
const char * ext = strrchr(argv[i], '.');
if (ext == NULL)
{
fprintf(stderr, " Unknown argument: %s \n", argv[i]);
lUsage(1);
}
else if (strncmp(ext, ".ll", 3) && strncmp(ext, ".bc", 3))
{
fprintf(stderr, " Unkown extension of the input file: %s \n", ext);
lUsage(1);
}
else if (filePTX.empty())
{
fileIR = std::string(argv[i]);
if (filePTX.empty())
{
char * baseName = argv[i];
while (baseName != ext)
filePTX += std::string(baseName++,1);
}
filePTX += ".ptx";
}
}
}
if (fileIR.empty())
{
fprintf(stderr, "ptxgen fatal : No input file specified; use option --help for more information\n");
exit(1);
}
#if 0
fprintf(stderr, "fileIR= %s\n", fileIR.c_str());
fprintf(stderr, "filePTX= %s\n", filePTX.c_str());
fprintf(stderr, "arch= %s\n", _arch.c_str());
fprintf(stderr, "debug= %s\n", _debug ? "true" : "false");
fprintf(stderr, "verbose= %s\n", _verbose ? "true" : "false");
fprintf(stderr, "opt= %d\n", _opt);
fprintf(stderr, "ftz= %d\n", _ftz);
fprintf(stderr, "prec-sqrt= %d\n", _precSqrt);
fprintf(stderr, "prec-div= %d\n", _precDiv);
fprintf(stderr, "fma= %d\n", _fma);
fprintf(stderr, "use_fast_math= %s\n", _useFastMath ? "true" : "false");
#endif
int computeArch = 35;
assert(_arch == std::string("sm_35"));
if (_useFastMath)
{
_ftz = 1;
_precSqrt = _precDiv = 0;
}
std::vector<std::string> nvvmOptions;
nvvmOptions.push_back("-arch=compute_35");
nvvmOptions.push_back("-ftz=" + lValueToString(_ftz));
nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt));
nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv));
nvvmOptions.push_back("-fma=" + lValueToString(_fma));
if (_debug)
nvvmOptions.push_back("-g");
std::vector<std::string> nvvmFiles;
nvvmFiles.push_back(fileIR);
std::ofstream outputPTX(filePTX.c_str());
assert(outputPTX);
const int ret = generatePTX(nvvmOptions, nvvmFiles, outputPTX, computeArch);
outputPTX.open(filePTX.c_str());
return ret;
}

1929
ptxtools/ptxgrammar.cpp Normal file

File diff suppressed because it is too large Load Diff

215
ptxtools/ptxgrammar.yy Normal file
View File

@@ -0,0 +1,215 @@
%locations
%{
#include <iostream>
#include "PTXParser.h"
#include "PTXLexer.h"
#include <cassert>
#include <cstring>
#include <sstream>
#include <cstdio>
#define YYERROR_VERBOSE 1
#ifdef REPORT_BASE
#undef REPORT_BASE
#endif
#define REPORT_BASE 0
namespace ptx
{
int yylex( YYSTYPE* token, YYLTYPE* location, parser::PTXLexer& lexer,
parser::PTXParser& state );
void yyerror( YYLTYPE* location, parser::PTXLexer& lexer,
parser::PTXParser& state, char const* message );
std::string yyTypeToString( int );
%}
%union
{
char svalue[1024];
double fvalue;
int ivalue;
unsigned int uvalue;
}
%parse-param {parser::PTXLexer& lexer}
%parse-param {parser::PTXParser& state}
%lex-param {parser::PTXLexer& lexer}
%lex-param {parser::PTXParser& state}
%pure-parser
// define the constant-string tokens:
%token TOKEN_VERSION TOKEN_TARGET TOKEN_ADDRESS_SIZE
%token TOKEN_VISIBLE TOKEN_FUNC TOKEN_ENTRY
%token TOKEN_PARAM TOKEN_ALIGN
%token TOKEN_GLOBAL
%token<ivalue> TOKEN_B8 TOKEN_B16 TOKEN_B32 TOKEN_B64
%token<ivalue> TOKEN_U8 TOKEN_U16 TOKEN_U32 TOKEN_U64
%token<ivalue> TOKEN_S8 TOKEN_S16 TOKEN_S32 TOKEN_S64
%token<ivalue> TOKEN_F32 TOKEN_F64
// define the "terminal symbol" token types I'm going to use (in CAPS
// by convention), and associate each with a field of the union:
%token <ivalue> TOKEN_INT
%token <fvalue> TOKEN_FLOAT
%token <svalue> TOKEN_STRING
%type<svalue> identifier
%type<ivalue> arrayDimensionSet
%type<ivalue> alignment
%start ptxsource
%%
// the first rule defined is the highest-level rule, which in our
// case is just the concept of a whole "snazzle file":
ptxsource:
header ptxbody;
header:
version target address_size
{
// std::cerr << "Done reading PTX \n" << std::endl;
state.printHeader();
};
version:
TOKEN_VERSION TOKEN_FLOAT { assert($2 >= 3.0); } ;//std::cerr << "Reading PTX version " << $2 << std::endl; };
target:
TOKEN_TARGET TOKEN_STRING { assert(std::string($2) == std::string("sm_35")); } //std::cerr << "Target " << $2 << std::endl; };
address_size:
TOKEN_ADDRESS_SIZE TOKEN_INT { assert($2 == 64); } //std::cerr << "Address_Size " << $2 << std::endl; };
dataTypeId :
TOKEN_U8 | TOKEN_U16 | TOKEN_U32 | TOKEN_U64
| TOKEN_S8 | TOKEN_S16 | TOKEN_S32 | TOKEN_S64
| TOKEN_B8 | TOKEN_B16 | TOKEN_B32 | TOKEN_B64
| TOKEN_F32 | TOKEN_F64;
dataType: dataTypeId { state.dataTypeId($<ivalue>1); }
anytoken:
TOKEN_ALIGN
| TOKEN_PARAM
| dataTypeId
| TOKEN_STRING | TOKEN_FLOAT | TOKEN_INT
| TOKEN_FUNC | TOKEN_ENTRY
| TOKEN_GLOBAL
| '['
| ']'
| '('
| ')'
| ','
| ';'
| '='
;
ptxbody:
ptxbody visibleFunctionDeclaration | visibleFunctionDeclaration
| ptxbody visibleEntryDeclaration| visibleEntryDeclaration
| ptxbody visibleInitializableDeclaration| visibleInitializableDeclaration
| ptxbody anytoken | anytoken;
arrayDimensionSet : '[' TOKEN_INT ']' { $$ = $2; state.arrayDimensions($<ivalue>2); }
// arrayDimensionSet : arrayDimensionSet '[' TOKEN_INT ']' { $$ = $2; }
// arrayDimensionSet : '[' ']' { $$ = 0; }
arrayDimensions : /* empty string */;
arrayDimensions : arrayDimensionSet;
identifier: TOKEN_STRING { strcpy($$, $1); state.identifier($1); }
parameter : TOKEN_PARAM;
alignment : TOKEN_ALIGN TOKEN_INT {$$ = $2; state.alignment($<ivalue>2);}
addressableVariablePrefix : dataType { state.alignment(0); }
addressableVariablePrefix : alignment dataType;
argumentDeclaration : parameter addressableVariablePrefix identifier arrayDimensions
{
state.argumentDeclaration(@1);
}
argumentListBegin : '(' { state.argumentListBegin(@1); };
argumentListEnd : ')' {state.argumentListEnd(@1); };
argumentListBody : argumentDeclaration;
argumentListBody : /* empty string */;
argumentListBody : argumentListBody ',' argumentDeclaration;
argumentList: argumentListBegin argumentListBody argumentListEnd;
visibleEntryDeclaration: TOKEN_VISIBLE TOKEN_ENTRY identifier argumentList
{
state.visibleEntryDeclaration($<svalue>3, @1);
};
returnArgumentListBegin : '(' { state.returnArgumentListBegin(@1); }
returnArgumentListEnd : ')' {state.returnArgumentListEnd(@1); }
returnArgumentList : returnArgumentListBegin argumentListBody returnArgumentListEnd;
optionalReturnArgumentList : returnArgumentList | /* empty string */;
visibleFunctionDeclaration: TOKEN_VISIBLE TOKEN_FUNC optionalReturnArgumentList identifier argumentList
{
state.visibleFunctionDeclaration($<svalue>4, @1);
};
visibleInitializableDeclaration :
TOKEN_VISIBLE TOKEN_GLOBAL addressableVariablePrefix identifier arrayDimensionSet
{ state.visibleInitializableDeclaration($<svalue>4,@1); }
| TOKEN_VISIBLE TOKEN_GLOBAL addressableVariablePrefix identifier ';'
{state.arrayDimensions(0); state.visibleInitializableDeclaration($<svalue>4,@1); }
| TOKEN_VISIBLE TOKEN_GLOBAL addressableVariablePrefix identifier '='
{state.arrayDimensions(0); state.visibleInitializableDeclaration($<svalue>4,@1); }
%%
int yylex( YYSTYPE* token, YYLTYPE* location, parser::PTXLexer& lexer,
parser::PTXParser& state )
{
lexer.yylval = token;
int tokenValue = lexer.yylexPosition();
location->first_line = lexer.lineno();
location->first_column = lexer.column;
#if 0
report( " Lexer (" << location->first_line << ","
<< location->first_column
<< "): " << parser::PTXLexer::toString( tokenValue ) << " \""
<< lexer.YYText() << "\"");
#endif
return tokenValue;
}
static std::string toString( YYLTYPE& location, parser::PTXParser& state )
{
std::stringstream stream;
stream
#if 0
<< state.fileName
#else
<< "ptx "
#endif
<< " (" << location.first_line << ", "
<< location.first_column << "): ";
return stream.str();
}
void yyerror( YYLTYPE* location, parser::PTXLexer& lexer,
parser::PTXParser& state, char const* message )
{
std::stringstream stream;
stream << toString( *location, state )
<< " " << message;
fprintf(stderr, "--Parser ERROR-- %s %s \n", toString(*location, state).c_str(), message);
exit(-1);
}
}

View File

@@ -258,7 +258,7 @@ def run_test(testname):
if should_fail: if should_fail:
cc_cmd += " -DEXPECT_FAILURE" cc_cmd += " -DEXPECT_FAILURE"
if is_nvptx_target: if is_nvptx_target:
nvptxcc_exe = "nvptxcc" nvptxcc_exe = "ptxtools/alloy_ptxcc.sh"
nvptxcc_exe_rel = add_prefix(nvptxcc_exe) nvptxcc_exe_rel = add_prefix(nvptxcc_exe)
cc_cmd = "%s %s -DTEST_SIG=%d -o %s" % \ cc_cmd = "%s %s -DTEST_SIG=%d -o %s" % \
(nvptxcc_exe_rel, obj_name, match, exe_name) (nvptxcc_exe_rel, obj_name, match, exe_name)
@@ -270,7 +270,7 @@ def run_test(testname):
if is_generic_target: if is_generic_target:
ispc_cmd += " --emit-c++ --c++-include-file=%s" % add_prefix(options.include_file) ispc_cmd += " --emit-c++ --c++-include-file=%s" % add_prefix(options.include_file)
if is_nvptx_target: if is_nvptx_target:
filename4ptx = filename+".ptx.parsed_ispc" filename4ptx = "/tmp/"+os.path.basename(filename)+".parsed.ispc"
grep_cmd = "grep -v 'export uniform int width' %s > %s " % \ grep_cmd = "grep -v 'export uniform int width' %s > %s " % \
(filename, filename4ptx) (filename, filename4ptx)
if options.verbose: if options.verbose: