compiles
This commit is contained in:
@@ -705,6 +705,19 @@ define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define double @__rsqrt_uniform_double(double) nounwind readonly alwaysinline
|
||||
{
|
||||
%ret1 = call double @llvm.sqrt.f64(double %0)
|
||||
%ret = fdiv double 1., %ret1
|
||||
ret double %ret
|
||||
}
|
||||
define double @__rcp_uniform_double(double) nounwind readonly alwaysinline
|
||||
{
|
||||
%ret = fdiv double 1., %0
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
|
||||
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone alwaysinline
|
||||
{
|
||||
%v = extractelement <1 x float> %0, i32 0
|
||||
@@ -712,6 +725,13 @@ define <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <WIDTH x float> %rv
|
||||
}
|
||||
define <WIDTH x double> @__rcp_varying_double(<WIDTH x double>) nounwind readnone alwaysinline
|
||||
{
|
||||
%v = extractelement <1 x double> %0, i32 0
|
||||
%r = call double @__rcp_uniform_double(double %v)
|
||||
%rv = insertelement <1 x double> undef, double %r, i32 0
|
||||
ret <WIDTH x double> %rv
|
||||
}
|
||||
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline
|
||||
{
|
||||
%v = extractelement <1 x float> %0, i32 0
|
||||
@@ -719,6 +739,14 @@ define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <WIDTH x float> %rv
|
||||
}
|
||||
define <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>) nounwind readnone alwaysinline
|
||||
{
|
||||
%v = extractelement <1 x double> %0, i32 0
|
||||
%r = call double @__rsqrt_uniform_double(double %v)
|
||||
%rv = insertelement <1 x double> undef, double %r, i32 0
|
||||
ret <WIDTH x double> %rv
|
||||
}
|
||||
|
||||
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline
|
||||
{
|
||||
%v = extractelement <1 x float> %0, i32 0
|
||||
@@ -726,7 +754,6 @@ define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <WIDTH x float> %rv
|
||||
}
|
||||
|
||||
;; declare double @__sqrt_uniform_double(double) nounwind readnone
|
||||
define double @__sqrt_uniform_double(double) nounwind readonly alwaysinline {
|
||||
%ret = call double @llvm.sqrt.f64(double %0)
|
||||
|
||||
25
examples/cuda_helpers.cuh
Normal file
25
examples/cuda_helpers.cuh
Normal file
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#define programCount 32
|
||||
#define programIndex (threadIdx.x & 31)
|
||||
#define taskIndex0 (blockIdx.x*4 + (threadIdx.x >> 5))
|
||||
#define taskCount0 (gridDim.x*4)
|
||||
#define taskIndex1 (blockIdx.y)
|
||||
#define taskCount1 (gridDim.y)
|
||||
#define taskIndex2 (blockIdx.z)
|
||||
#define taskCount2 (gridDim.z)
|
||||
#define taskIndex (taskIndex0 + taskCount0*(taskIndex1 + taskCount1*taskIndex2))
|
||||
#define taskCount (taskCount0*taskCount1*taskCount2)
|
||||
#define warpIdx (threadIdx.x >> 5)
|
||||
#define launch(ntx,nty,ntz,func) if (programIndex==0) func<<<dim3(((ntx)+4-1)/4,nty,ntz),128>>>
|
||||
#define sync cudaDeviceSynchronize()
|
||||
#define cif if
|
||||
__device__ __forceinline__ static double __shfl(double x, int lane)
|
||||
{
|
||||
return __hiloint2double(
|
||||
__shfl_xor(__double2hiint(x), lane),
|
||||
__shfl_xor(__double2loint(x), lane));
|
||||
|
||||
}
|
||||
#define shuffle(x,y) __shfl(x,y)
|
||||
#define broadcast(x,y) __shfl(x,y)
|
||||
54
examples/ispc_malloc.cpp
Normal file
54
examples/ispc_malloc.cpp
Normal file
@@ -0,0 +1,54 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include "ispc_malloc.h"
|
||||
|
||||
#ifdef _CUDA_
|
||||
|
||||
void * operator new(size_t size) throw(std::bad_alloc)
|
||||
{
|
||||
void *ptr;
|
||||
ispc_malloc(&ptr, size);
|
||||
return ptr;
|
||||
}
|
||||
void operator delete(void *ptr) throw()
|
||||
{
|
||||
ispc_free(ptr);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void ispc_malloc(void **ptr, const size_t size)
|
||||
{
|
||||
*ptr = malloc(size);
|
||||
}
|
||||
void ispc_free(void *ptr)
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
void ispc_memset(void *ptr, int value, size_t size)
|
||||
{
|
||||
memset(ptr, value, size);
|
||||
}
|
||||
void ispcSetMallocHeapLimit(size_t value)
|
||||
{
|
||||
}
|
||||
void ispcSetStackLimit(size_t value)
|
||||
{
|
||||
}
|
||||
unsigned long long ispcGetMallocHeapLimit()
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
unsigned long long ispcGetStackLimit()
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
void * ispcMemcpy(void *dest, void *src, size_t num)
|
||||
{
|
||||
memcpy(dest, src, num);
|
||||
return dest;
|
||||
}
|
||||
|
||||
#endif
|
||||
10
examples/ispc_malloc.h
Normal file
10
examples/ispc_malloc.h
Normal file
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
extern void ispc_malloc(void **ptr, const size_t size);
|
||||
extern void ispc_free(void *ptr);
|
||||
extern void ispc_memset(void *ptr, int value, size_t size);
|
||||
extern void ispcSetMallocHeapLimit(size_t value);
|
||||
extern void ispcSetStackLimit(size_t value);
|
||||
extern unsigned long long ispcGetMallocHeapLimit();
|
||||
extern unsigned long long ispcGetStackLimit();
|
||||
extern void * ispcMemcpy(void *dest, void *src, size_t num);
|
||||
43
examples/nvcc_helpers.cu
Normal file
43
examples/nvcc_helpers.cu
Normal file
@@ -0,0 +1,43 @@
|
||||
#ifndef _CUDA_
|
||||
#error "Something went wrong..."
|
||||
#endif
|
||||
|
||||
void ispc_malloc(void **ptr, const size_t size)
|
||||
{
|
||||
cudaMallocManaged(ptr, size);
|
||||
}
|
||||
void ispc_free(void *ptr)
|
||||
{
|
||||
cudaFree(ptr);
|
||||
}
|
||||
void ispc_memset(void *ptr, int value, size_t size)
|
||||
{
|
||||
cudaMemset(ptr, value, size);
|
||||
}
|
||||
void ispcSetMallocHeapLimit(size_t value)
|
||||
{
|
||||
cudaDeviceSetLimit(cudaLimitMallocHeapSize,value);
|
||||
}
|
||||
void ispcSetStackLimit(size_t value)
|
||||
{
|
||||
cudaDeviceSetLimit(cudaLimitStackSize,value);
|
||||
}
|
||||
unsigned long long ispcGetMallocHeapLimit()
|
||||
{
|
||||
size_t value;
|
||||
cudaDeviceGetLimit(&value, cudaLimitMallocHeapSize);
|
||||
return value;
|
||||
}
|
||||
unsigned long long ispcGetStackLimit()
|
||||
{
|
||||
size_t value;
|
||||
cudaDeviceGetLimit(&value, cudaLimitStackSize);
|
||||
return value;
|
||||
}
|
||||
void * ispcMemcpy(void *dest, void *src, size_t num)
|
||||
{
|
||||
cudaMemcpy(dest, src, num, cudaMemcpyDefault);
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
20
nvptxcc
20
nvptxcc
@@ -1,20 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
PATH=$ISPC_HOME/examples_ptx/ptxcc:$ISPC_HOME/examples_ptx/ptxgen:$PATH
|
||||
PTXCC=ptxcc
|
||||
ARGS=${@:2}
|
||||
if [ "$NVVM" == "1" ];
|
||||
then
|
||||
LLVM32=$HOME/usr/local/llvm/bin-3.2
|
||||
LLVMDIS=$LLVM32/bin/llvm-dis
|
||||
PTXGEN=$ISPC_HOME/examples_ptx/ptxgen/ptxgen
|
||||
$($LLVMDIS $1 -o $1.ll) && $($PTXGEN $1.ll > $1.ptx) && \
|
||||
$($PTXCC $1.ptx -o $1.o -Xnvcc="-G") && \
|
||||
$(nvcc test_static_nvptx.cpp examples_ptx/nvcc_helpers.cu examples_ptx/ispc_malloc.cpp $1.o -arch=sm_35 -Iexamples_ptx/ -D_CUDA_ -lcudadevrt $ARGS);
|
||||
else
|
||||
$($PTXCC $1 -o $1.o -Xnvcc="-G") && \
|
||||
$(nvcc test_static_nvptx.cpp examples_ptx/nvcc_helpers.cu examples_ptx/ispc_malloc.cpp $1.o -arch=sm_35 -Iexamples_ptx/ -D_CUDA_ -lcudadevrt $ARGS);
|
||||
fi
|
||||
|
||||
|
||||
|
||||
49
ptxtools/Makefile
Normal file
49
ptxtools/Makefile
Normal file
@@ -0,0 +1,49 @@
|
||||
all: ptxcc ptxgen
|
||||
|
||||
CXX=clang++
|
||||
CXXFLAGS += -O3
|
||||
CXXFLAGS += -I/opt/local/include
|
||||
|
||||
LD=clang++
|
||||
LDFLAGS += -L/opt/local/lib
|
||||
|
||||
FLEX=flex
|
||||
BISON=bison
|
||||
|
||||
CUDATK=/usr/local/cuda
|
||||
LIBDEVICE_MAJOR=1
|
||||
LIBDEVICE_MINOR=0
|
||||
|
||||
ptxgrammar.cc : ptxgrammar.yy
|
||||
$(BISON) -d -v -t ptxgrammar.yy -o ptxgrammar.cc
|
||||
|
||||
ptx.cc: ptx.ll ptxgrammar.cc
|
||||
$(FLEX) -t ptx.ll > ptx.cc
|
||||
|
||||
%.o: %.cc
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
OBJ= ptxcc.o \
|
||||
ptx.o \
|
||||
ptxgrammar.o
|
||||
|
||||
ptxcc: $(OBJ)
|
||||
$(LD) $(LDFLAGS) $^ -o $@
|
||||
|
||||
ptxgen: ptxgen.cpp
|
||||
$(CXX) $(CXXFLAGS) -o $@ $< \
|
||||
-L$(CUDATK)/nvvm/lib64 -lnvvm \
|
||||
-I$(CUDATK)/nvvm/include \
|
||||
-I$(CUDATK)/include \
|
||||
-DLIBDEVICE_MAJOR_VERSION=$(LIBDEVICE_MAJOR) \
|
||||
-DLIBDEVICE_MINOR_VERSION=$(LIBDEVICE_MINOR) \
|
||||
-DLIBNVVM_HOME=$(CUDATK)/nvvm -Wl,-rpath,$(CUDATK)/nvvm/lib64
|
||||
|
||||
clean:
|
||||
/bin/rm -f ptxgen ptxcc $(OBJ) ptxgrammar.hh ptxgrammar.cc ptx.cc ptxgrammar.output
|
||||
|
||||
$(OBJ): ptxgrammar.cc ptx.cc PTXParser.h PTXLexer.h
|
||||
|
||||
40
ptxtools/PTXLexer.h
Normal file
40
ptxtools/PTXLexer.h
Normal file
@@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
|
||||
namespace parser
|
||||
{
|
||||
class PTXLexer;
|
||||
class PTXParser;
|
||||
}
|
||||
|
||||
#include "ptxgrammar.hh"
|
||||
|
||||
namespace parser
|
||||
{
|
||||
/*! \brief A wrapper around yyFlexLexer to allow for a local variable */
|
||||
class PTXLexer : public ptxFlexLexer
|
||||
{
|
||||
public:
|
||||
YYSTYPE* yylval;
|
||||
int column;
|
||||
int nextColumn;
|
||||
|
||||
public:
|
||||
PTXLexer( std::istream* arg_yyin,
|
||||
std::ostream* arg_yyout ) :
|
||||
yyFlexLexer( arg_yyin, arg_yyout ), yylval( 0 ), column( 0 ),
|
||||
nextColumn( 0 ) { }
|
||||
|
||||
int yylex();
|
||||
int yylexPosition()
|
||||
{
|
||||
int token = yylex();
|
||||
column = nextColumn;
|
||||
nextColumn = column + strlen( YYText() );
|
||||
return token;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
254
ptxtools/PTXParser.h
Normal file
254
ptxtools/PTXParser.h
Normal file
@@ -0,0 +1,254 @@
|
||||
#pragma once
|
||||
|
||||
#undef yyFlexLexer
|
||||
#define yyFlexLexer ptxFlexLexer
|
||||
#include <FlexLexer.h>
|
||||
|
||||
|
||||
#include "PTXLexer.h"
|
||||
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
namespace ptx
|
||||
{
|
||||
extern int yyparse( parser::PTXLexer&, parser::PTXParser& );
|
||||
}
|
||||
|
||||
namespace parser
|
||||
{
|
||||
/*! \brief An implementation of the Parser interface for PTX */
|
||||
class PTXParser
|
||||
{
|
||||
private:
|
||||
typedef int token_t;
|
||||
std::ostream &out;
|
||||
std::string _identifier;
|
||||
token_t _dataTypeId;
|
||||
int _alignment;
|
||||
|
||||
bool isArgumentList, isReturnArgumentList;
|
||||
struct argument_t
|
||||
{
|
||||
token_t type;
|
||||
std::string name;
|
||||
int dim;
|
||||
|
||||
argument_t(const token_t _type, const std::string &_name, const int _dim = 1) :
|
||||
type(_type), name(_name), dim(_dim) {}
|
||||
};
|
||||
std::vector<argument_t> argumentList, returnArgumentList;
|
||||
std::vector<int> arrayDimensionsList;
|
||||
|
||||
public:
|
||||
PTXParser(std::ostream &_out) : out(_out)
|
||||
{
|
||||
isArgumentList = isReturnArgumentList = false;
|
||||
_alignment = 1;
|
||||
}
|
||||
|
||||
void printHeader()
|
||||
{
|
||||
std::stringstream s;
|
||||
#if 0
|
||||
s << "template<int N> struct __align__(N) b8_t { unsigned char _v[N]; __device__ b8_t() {}; __device__ b8_t (const int value) {}}; \n";
|
||||
s << "template<int N> struct __align__(2*N) b16_t { unsigned short _v[N]; __device__ b16_t() {}; __device__ b16_t(const int value) {}}; \n";
|
||||
#else
|
||||
s << "template<int N> struct b8_t { unsigned char _v[N]; __device__ b8_t() {}; __device__ b8_t (const int value) {}}; \n";
|
||||
s << "template<int N> struct b16_t { unsigned short _v[N]; __device__ b16_t() {}; __device__ b16_t(const int value) {}}; \n";
|
||||
#endif
|
||||
s << "struct b8d_t { unsigned char _v[1]; }; \n";
|
||||
s << "struct b16d_t { unsigned short _v[1]; }; \n";
|
||||
|
||||
s << "typedef unsigned int b32_t; \n";
|
||||
s << "typedef unsigned int u32_t; \n";
|
||||
s << "typedef int s32_t; \n";
|
||||
|
||||
s << "typedef unsigned long long b64_t; \n";
|
||||
s << "typedef unsigned long long u64_t; \n";
|
||||
s << "typedef long long s64_t; \n";
|
||||
|
||||
s << "typedef float f32_t; \n";
|
||||
s << "typedef double f64_t; \n";
|
||||
s << " \n";
|
||||
out << s.str();
|
||||
}
|
||||
|
||||
#define LOC YYLTYPE& location
|
||||
|
||||
void identifier(const std::string &s) { _identifier = s; }
|
||||
void dataTypeId(const token_t token) { _dataTypeId = token; }
|
||||
void argumentListBegin(LOC) { isArgumentList = true; }
|
||||
void argumentListEnd (LOC) { isArgumentList = false; }
|
||||
void returnArgumentListBegin(LOC) { isReturnArgumentList = true; }
|
||||
void returnArgumentListEnd (LOC) { isReturnArgumentList = false; }
|
||||
void argumentDeclaration(LOC)
|
||||
{
|
||||
assert(arrayDimensionsList.size() <= 1);
|
||||
const int dim = arrayDimensionsList.empty() ? 1 : arrayDimensionsList[0];
|
||||
const argument_t arg(_dataTypeId, _identifier, dim);
|
||||
if (isArgumentList)
|
||||
argumentList.push_back(arg);
|
||||
else if (isReturnArgumentList)
|
||||
returnArgumentList.push_back(arg);
|
||||
else
|
||||
assert(0);
|
||||
arrayDimensionsList.clear();
|
||||
}
|
||||
void alignment(const int value) { _alignment = value; }
|
||||
|
||||
void arrayDimensions(const int value)
|
||||
{
|
||||
arrayDimensionsList.push_back(value);
|
||||
}
|
||||
|
||||
std::string printArgument(const argument_t arg, const bool printDataType = true)
|
||||
{
|
||||
std::stringstream s;
|
||||
if (printDataType)
|
||||
s << tokenToDataType(arg.type, arg.dim) << " ";
|
||||
s << arg.name << " ";
|
||||
return s.str();
|
||||
}
|
||||
|
||||
std::string printArgumentList(const bool printDataType = true)
|
||||
{
|
||||
std::stringstream s;
|
||||
if (argumentList.empty()) return s.str();
|
||||
const int n = argumentList.size();
|
||||
s << " " << printArgument(argumentList[0], printDataType);
|
||||
for (int i = 1; i < n; i++)
|
||||
s << ",\n " << printArgument(argumentList[i], printDataType);
|
||||
return s.str();
|
||||
}
|
||||
|
||||
void visibleEntryDeclaration(const std::string &calleeName, LOC)
|
||||
{
|
||||
std::stringstream s;
|
||||
assert(returnArgumentList.empty());
|
||||
s << "extern \"C\" \n";
|
||||
s << "__global__ void " << calleeName << " (\n";
|
||||
s << printArgumentList();
|
||||
s << "\n ) { asm(\" // entry \"); }\n";
|
||||
|
||||
|
||||
/* check if this is an "export" entry */
|
||||
const int entryNameLength = calleeName.length();
|
||||
const int hostNameLength = std::max(entryNameLength-9,0);
|
||||
const std::string ___export(&calleeName.c_str()[hostNameLength]);
|
||||
if (___export.compare("___export") == 0)
|
||||
{
|
||||
std::string hostCalleeName;
|
||||
hostCalleeName.append(calleeName.c_str(), hostNameLength);
|
||||
s << "/*** host interface ***/\n";
|
||||
s << "extern \"C\" \n";
|
||||
s << "__host__ void " << hostCalleeName << " (\n";
|
||||
s << printArgumentList();
|
||||
s << "\n )\n";
|
||||
s << "{\n ";
|
||||
// s << " cudaFuncSetCacheConfig (" << calleeName << ", ";
|
||||
s << " cudaDeviceSetCacheConfig (";
|
||||
#if 1
|
||||
s << " cudaFuncCachePreferEqual ";
|
||||
#elif 1
|
||||
s << " cudaFuncCachePreferL1 ";
|
||||
#else
|
||||
s << " cudaFuncCachePreferShared ";
|
||||
#endif
|
||||
s << ");\n";
|
||||
s << calleeName;
|
||||
s << "<<<1,32>>>(\n";
|
||||
s << printArgumentList(false);
|
||||
s << ");\n";
|
||||
s << " cudaDeviceSynchronize(); \n";
|
||||
s << "}\n";
|
||||
}
|
||||
s << "\n";
|
||||
argumentList.clear();
|
||||
|
||||
out << s.str();
|
||||
}
|
||||
|
||||
void visibleFunctionDeclaration(const std::string &calleeName, LOC)
|
||||
{
|
||||
std::stringstream s;
|
||||
assert(returnArgumentList.size() < 2);
|
||||
s << "extern \"C\" \n";
|
||||
s << "__device__ ";
|
||||
if (returnArgumentList.empty())
|
||||
s << " void ";
|
||||
else
|
||||
s << " " << tokenToDataType(returnArgumentList[0].type, returnArgumentList[0].dim);
|
||||
s << calleeName << " (\n";
|
||||
s << printArgumentList();
|
||||
|
||||
if (returnArgumentList.empty())
|
||||
s << "\n ) { asm(\" // function \"); }\n\n";
|
||||
else
|
||||
{
|
||||
s << "\n ) { asm(\" // function \"); return 0;} /* return value to disable warnings */\n\n";
|
||||
// s << "\n ) { asm(\" // function \"); } /* this will generate warrning */\n\n";
|
||||
}
|
||||
|
||||
argumentList.clear();
|
||||
returnArgumentList.clear();
|
||||
|
||||
out << s.str();
|
||||
}
|
||||
|
||||
void visibleInitializableDeclaration(const std::string &name, LOC)
|
||||
{
|
||||
assert(arrayDimensionsList.size() == 1);
|
||||
std::stringstream s;
|
||||
s << "extern \"C\" __device__ ";
|
||||
if (_alignment > 0)
|
||||
s << "__attribute__((aligned(" << _alignment << "))) ";
|
||||
s << tokenToDataType(_dataTypeId, 0);
|
||||
if (arrayDimensionsList[0] == 0)
|
||||
s << name << ";\n\n";
|
||||
else
|
||||
s << name << "[" << arrayDimensionsList[0] << "] = {0};\n\n";
|
||||
out << s.str();
|
||||
arrayDimensionsList.clear();
|
||||
}
|
||||
|
||||
#undef LOC
|
||||
|
||||
std::string tokenToDataType( token_t token , int dim)
|
||||
{
|
||||
std::stringstream s;
|
||||
switch( token )
|
||||
{
|
||||
case TOKEN_B8:
|
||||
if (dim > 0) s << "b8_t<"<<dim<<"> ";
|
||||
else s << "b8d_t ";
|
||||
break;
|
||||
case TOKEN_U8: assert(0); s << "u8_t "; break;
|
||||
case TOKEN_S8: assert(0); s << "s8_t "; break;
|
||||
|
||||
case TOKEN_B16:
|
||||
if (dim > 0) s << "b16_t<"<<dim<<"> ";
|
||||
else s << "b16d_t ";
|
||||
break;
|
||||
case TOKEN_U16: assert(0); s << "u16_t "; break;
|
||||
case TOKEN_S16: assert(0); s << "s16_t "; break;
|
||||
|
||||
case TOKEN_B32: assert(dim <= 1); s << "b32_t "; break;
|
||||
case TOKEN_U32: assert(dim <= 1); s << "u32_t "; break;
|
||||
case TOKEN_S32: assert(dim <= 1); s << "s32_t "; break;
|
||||
|
||||
case TOKEN_B64: assert(dim <= 1); s << "b64_t "; break;
|
||||
case TOKEN_U64: assert(dim <= 1); s << "u64_t "; break;
|
||||
case TOKEN_S64: assert(dim <= 1); s << "s64_t "; break;
|
||||
|
||||
case TOKEN_F32: assert(dim <= 1); s << "f32_t "; break;
|
||||
case TOKEN_F64: assert(dim <= 1); s << "f64_t "; break;
|
||||
default: std::cerr << "token= " << token<< std::endl; assert(0);
|
||||
}
|
||||
|
||||
return s.str();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
19
ptxtools/alloy_ptxcc.sh
Executable file
19
ptxtools/alloy_ptxcc.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/sh
|
||||
|
||||
PTXCC=$ISPC_HOME/ptxtools/ptxcc
|
||||
PTXGEN=$ISPC_HOME/ptxtools/ptxgen
|
||||
ARGS=${@:2}
|
||||
if [ "$NVVM" == "1" ];
|
||||
then
|
||||
LLVM32=$HOME/usr/local/llvm/bin-3.2
|
||||
LLVMDIS=$LLVM32/bin/llvm-dis
|
||||
$($LLVMDIS $1 -o $1.ll) && $($PTXGEN $1.ll -o $1.ptx) && \
|
||||
$($PTXCC $1.ptx -o $1.o -Xnvcc="-G") && \
|
||||
$(nvcc test_static_nvptx.cpp examples/nvcc_helpers.cu examples/ispc_malloc.cpp $1.o -arch=sm_35 -Iexamples/ -D_CUDA_ -lcudadevrt $ARGS);
|
||||
else
|
||||
$($PTXCC $1 -o $1.o -Xnvcc="-G") && \
|
||||
$(nvcc test_static_nvptx.cpp examples/nvcc_helpers.cu examples/ispc_malloc.cpp $1.o -arch=sm_35 -Iexamples/ -D_CUDA_ -lcudadevrt $ARGS);
|
||||
fi
|
||||
|
||||
|
||||
|
||||
1458
ptxtools/main.cpp
Normal file
1458
ptxtools/main.cpp
Normal file
File diff suppressed because it is too large
Load Diff
79
ptxtools/ptx.ll
Normal file
79
ptxtools/ptx.ll
Normal file
@@ -0,0 +1,79 @@
|
||||
%option yylineno
|
||||
%option noyywrap
|
||||
%option yyclass="parser::PTXLexer"
|
||||
%option prefix="ptx"
|
||||
%option c++
|
||||
|
||||
%{
|
||||
#include "PTXLexer.h"
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
#ifdef LLSETTOKEN
|
||||
#error "TOKEN is defined"
|
||||
#endif
|
||||
#define LLSETTOKEN(tok) yylval->ivalue = tok; return tok;
|
||||
%}
|
||||
|
||||
COMMENT ("//"[^\n]*)
|
||||
TAB [\t]*
|
||||
|
||||
%%
|
||||
{COMMENT} {nextColumn += strlen(yytext); /* lCppComment(&yylloc); */ }
|
||||
".version" { return TOKEN_VERSION; }
|
||||
".target" { return TOKEN_TARGET; }
|
||||
".address_size" { return TOKEN_ADDRESS_SIZE; }
|
||||
".func" { return TOKEN_FUNC; }
|
||||
".entry" { return TOKEN_ENTRY; }
|
||||
".align" { return TOKEN_ALIGN; }
|
||||
".visible" { return TOKEN_VISIBLE; }
|
||||
".global" { return TOKEN_GLOBAL; }
|
||||
".param" { return TOKEN_PARAM; }
|
||||
".b0" { LLSETTOKEN( TOKEN_B32);} /* fix for buggy llvm-ptx generator */
|
||||
".b8" { LLSETTOKEN( TOKEN_B8);}
|
||||
".b16" { LLSETTOKEN( TOKEN_B16);}
|
||||
".b32" { LLSETTOKEN( TOKEN_B32);}
|
||||
".b64" { LLSETTOKEN( TOKEN_B64);}
|
||||
".u8" { LLSETTOKEN( TOKEN_U8);}
|
||||
".u16" { LLSETTOKEN( TOKEN_U16);}
|
||||
".u32" { LLSETTOKEN( TOKEN_U32);}
|
||||
".u64" { LLSETTOKEN( TOKEN_U64);}
|
||||
".s8" { LLSETTOKEN( TOKEN_S8);}
|
||||
".s16" { LLSETTOKEN( TOKEN_S16);}
|
||||
".s32" { LLSETTOKEN( TOKEN_S32);}
|
||||
".s64" { LLSETTOKEN( TOKEN_S64);}
|
||||
".f32" { LLSETTOKEN( TOKEN_F32);}
|
||||
".f64" { LLSETTOKEN( TOKEN_F64);}
|
||||
"[" { return '[';}
|
||||
"]" { return ']';}
|
||||
"(" { return '(';}
|
||||
")" { return ')';}
|
||||
"," { return ',';}
|
||||
";" { return ';';}
|
||||
"=" { return '=';}
|
||||
[0-9]+\.[0-9]+ { yylval->fvalue = atof(yytext); return TOKEN_FLOAT; }
|
||||
[0-9]+ { yylval->ivalue = atoi(yytext); return TOKEN_INT; }
|
||||
[a-zA-Z0-9_]+ { strcpy(yylval->svalue, yytext); return TOKEN_STRING;}
|
||||
\n {
|
||||
// yylloc.last_line++;
|
||||
// yylloc.last_column = 1;
|
||||
nextColumn = 1;
|
||||
}
|
||||
. ;
|
||||
%%
|
||||
|
||||
/** Handle a C++-style comment--eat everything up until the end of the line.
|
||||
*/
|
||||
#if 0
|
||||
static void
|
||||
lCppComment(SourcePos *pos) {
|
||||
char c;
|
||||
do {
|
||||
c = yyinput();
|
||||
} while (c != 0 && c != '\n');
|
||||
if (c == '\n') {
|
||||
pos->last_line++;
|
||||
pos->last_column = 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
275
ptxtools/ptxcc.cpp
Normal file
275
ptxtools/ptxcc.cpp
Normal file
@@ -0,0 +1,275 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <sys/time.h>
|
||||
#include "PTXParser.h"
|
||||
|
||||
|
||||
/*
|
||||
* The C++ code below is based on the following bash-script:
|
||||
#!/bin/sh
|
||||
|
||||
PTXSRC=$1__tmp_ptx.ptx
|
||||
PTXCU=$1___tmp_ptx.cu
|
||||
PTXSH=$1___tmp_ptx.sh
|
||||
|
||||
NVCCPARM=${@:2}
|
||||
|
||||
DEPTX=dePTX
|
||||
NVCC=nvcc
|
||||
|
||||
$(cat $1 | sed 's/\.b0/\.b32/g' > $PTXSRC) &&
|
||||
$DEPTX < $PTXSRC > $PTXCU &&
|
||||
$NVCC -arch=sm_35 -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \
|
||||
sed 's/\#\$//g'| \
|
||||
awk '{ if ($1 == "LIBRARIES=") print $1$2; else if ($1 == "cicc") print "cp '$PTXSRC'", $NF; else print $0 }' > $PTXSH &&
|
||||
sh $PTXSH
|
||||
|
||||
# rm $PTXCU $PTXSH
|
||||
*
|
||||
*/
|
||||
|
||||
static char lRandomAlNum()
|
||||
{
|
||||
const char charset[] =
|
||||
"0123456789"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz";
|
||||
const size_t max_index = (sizeof(charset) - 1);
|
||||
return charset[ rand() % max_index ];
|
||||
}
|
||||
|
||||
static std::string lRandomString(const size_t length)
|
||||
{
|
||||
timeval t1;
|
||||
gettimeofday(&t1, NULL);
|
||||
srand(t1.tv_usec * t1.tv_sec);
|
||||
std::string str(length,0);
|
||||
std::generate_n( str.begin(), length, lRandomAlNum);
|
||||
return str;
|
||||
}
|
||||
|
||||
static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128])
|
||||
{
|
||||
// Copy over the command line arguments (passed in)
|
||||
for (int i = 0; i < Argc; ++i)
|
||||
argv[i] = Argv[i];
|
||||
argc = Argc;
|
||||
}
|
||||
const char *lGetExt (const char *fspec)
|
||||
{
|
||||
char *e = strrchr (fspec, '.');
|
||||
return e;
|
||||
}
|
||||
|
||||
static std::vector<std::string> lSplitString(const std::string &s, char delim)
|
||||
{
|
||||
std::vector<std::string> elems;
|
||||
std::stringstream ss(s);
|
||||
std::string item;
|
||||
while (std::getline(ss, item, delim)) {
|
||||
if (!item.empty())
|
||||
elems.push_back(item);
|
||||
}
|
||||
return elems;
|
||||
}
|
||||
|
||||
static void lUsage(const int ret)
|
||||
{
|
||||
fprintf(stdout, "\nusage: ptxcc [options] file.ptx \n");
|
||||
fprintf(stdout, " [--help]\t\t\t\t This help\n");
|
||||
fprintf(stdout, " [--verbose]\t\t\t\t Be verbose\n");
|
||||
fprintf(stdout, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35");
|
||||
fprintf(stdout, " [-o <name>]\t\t\t\t Output file name\n");
|
||||
fprintf(stdout, " [-Xnvcc=<arguments>]\t\t Arguments to pass through to \"nvcc\"\n");
|
||||
fprintf(stdout, " \n");
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
int main(int _argc, char * _argv[])
|
||||
{
|
||||
int argc;
|
||||
char *argv[128];
|
||||
lGetAllArgs(_argc, _argv, argc, argv);
|
||||
|
||||
std::string arch="sm_35";
|
||||
std::string filePTX;
|
||||
std::string fileOBJ;
|
||||
std::string extString = ".ptx";
|
||||
bool keepTemporaries = false;
|
||||
bool verbose = false;
|
||||
std::string nvccArguments;
|
||||
|
||||
for (int i = 1; i < argc; ++i)
|
||||
{
|
||||
if (!strcmp(argv[i], "--help"))
|
||||
lUsage(0);
|
||||
else if (!strncmp(argv[i], "--arch=", 7))
|
||||
arch = std::string(argv[i]+7);
|
||||
else if (!strncmp(argv[i], "--keep-temporaries", 11))
|
||||
keepTemporaries = true;
|
||||
else if (!strncmp(argv[i], "--verbose", 9))
|
||||
verbose = true;
|
||||
else if (!strncmp(argv[i], "-Xnvcc=", 7))
|
||||
nvccArguments = std::string(argv[i]+7);
|
||||
else if (!strcmp(argv[i], "-o"))
|
||||
{
|
||||
if (++i == argc)
|
||||
{
|
||||
fprintf(stderr, "No output file specified after -o option.\n");
|
||||
lUsage(1);
|
||||
}
|
||||
fileOBJ = std::string(argv[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char * ext = strrchr(argv[i], '.');
|
||||
if (ext == NULL)
|
||||
{
|
||||
fprintf(stderr, " Unknown argument: %s \n", argv[i]);
|
||||
lUsage(1);
|
||||
}
|
||||
else if (strncmp(ext, extString.c_str(), 4))
|
||||
{
|
||||
fprintf(stderr, " Unkown extension of the input file: %s \n", ext);
|
||||
lUsage(1);
|
||||
}
|
||||
else if (filePTX.empty())
|
||||
{
|
||||
filePTX = std::string(argv[i]);
|
||||
if (fileOBJ.empty())
|
||||
{
|
||||
char * baseName = argv[i];
|
||||
while (baseName != ext)
|
||||
fileOBJ += std::string(baseName++,1);
|
||||
}
|
||||
fileOBJ += ".o";
|
||||
}
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
fprintf(stderr, " fileOBJ= %s\n", fileOBJ.c_str());
|
||||
fprintf(stderr, " arch= %s\n", arch.c_str());
|
||||
fprintf(stderr, " file= %s\n", filePTX.empty() ? "$stdin" : filePTX.c_str());
|
||||
fprintf(stderr, " num_args= %d\n", (int)nvccArgumentList.size());
|
||||
for (int i= 0; i < (int)nvccArgumentList.size(); i++)
|
||||
fprintf(stderr, " arg= %d : %s \n", i, nvccArgumentList[i].c_str());
|
||||
#endif
|
||||
assert(arch == std::string("sm_35"));
|
||||
if (filePTX.empty())
|
||||
{
|
||||
fprintf(stderr, "ptxcc fatal : No input file specified; use option --help for more information\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// open a file handle to a particular file:
|
||||
std::ifstream inputPTX(filePTX.c_str());
|
||||
if (!inputPTX)
|
||||
{
|
||||
fprintf(stderr, "ptxcc: error: %s: No such file\n", filePTX.c_str());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string randomBaseName = std::string("/tmp/") + lRandomString(8) + "_" + lSplitString(lSplitString(filePTX,'/').back(),'.')[0];
|
||||
if (verbose)
|
||||
fprintf(stderr, "baseFileName= %s\n", randomBaseName.c_str());
|
||||
|
||||
std::string fileCU= randomBaseName + ".cu";
|
||||
std::ofstream outputCU(fileCU.c_str());
|
||||
assert(outputCU);
|
||||
|
||||
std::istream & input = inputPTX;
|
||||
std::ostream & output = outputCU;
|
||||
std::ostream & error = std::cerr;
|
||||
parser::PTXLexer lexer(&input, &error);
|
||||
parser::PTXParser state(output);
|
||||
|
||||
// parse through the input until there is no more:
|
||||
//
|
||||
|
||||
do {
|
||||
ptx::yyparse(lexer, state);
|
||||
}
|
||||
while (!input.eof());
|
||||
|
||||
inputPTX.close();
|
||||
outputCU.close();
|
||||
|
||||
// process output from nvcc
|
||||
//
|
||||
/* nvcc -dc -arch=$arch -dryrun -argumentlist fileCU */
|
||||
|
||||
std::string fileSH= randomBaseName + ".sh";
|
||||
|
||||
std::string nvccExe("nvcc");
|
||||
std::string nvccCmd;
|
||||
nvccCmd += nvccExe + std::string(" ");
|
||||
nvccCmd += "-dc ";
|
||||
nvccCmd += std::string("-arch=") + arch + std::string(" ");
|
||||
nvccCmd += "-dryrun ";
|
||||
nvccCmd += nvccArguments + std::string(" ");
|
||||
nvccCmd += std::string("-o ") + fileOBJ + std::string(" ");
|
||||
nvccCmd += fileCU + std::string(" ");
|
||||
nvccCmd += std::string("2> ") + fileSH;
|
||||
if (verbose)
|
||||
fprintf(stderr , "%s\n", nvccCmd.c_str());
|
||||
const int nvccRet = std::system(nvccCmd.c_str());
|
||||
if (nvccRet)
|
||||
fprintf(stderr, "FAIL: %s\n", nvccCmd.c_str());
|
||||
|
||||
|
||||
std::ifstream inputSH(fileSH.c_str());
|
||||
assert(inputSH);
|
||||
std::vector<std::string> nvccSteps;
|
||||
while (!inputSH.eof())
|
||||
{
|
||||
nvccSteps.push_back(std::string());
|
||||
std::getline(inputSH, nvccSteps.back());
|
||||
if (nvccRet)
|
||||
fprintf(stderr, " %s\n", nvccSteps.back().c_str());
|
||||
}
|
||||
inputSH.close();
|
||||
if (nvccRet)
|
||||
exit(-1);
|
||||
|
||||
|
||||
for (int i = 0; i < (int)nvccSteps.size(); i++)
|
||||
{
|
||||
std::string cmd = nvccSteps[i];
|
||||
for (int j = 0; j < (int)cmd.size()-1; j++)
|
||||
if (cmd[j] == '#' && cmd[j+1] == '$')
|
||||
cmd[j] = cmd[j+1] = ' ';
|
||||
std::vector<std::string> splitCmd = lSplitString(cmd, ' ');
|
||||
|
||||
if (!splitCmd.empty())
|
||||
{
|
||||
if (splitCmd[0] == std::string("cicc"))
|
||||
cmd = std::string(" cp ") + filePTX + std::string(" ") + splitCmd.back();
|
||||
if (splitCmd[0] == std::string("LIBRARIES="))
|
||||
cmd = "";
|
||||
}
|
||||
nvccSteps[i] = cmd;
|
||||
if (verbose)
|
||||
fprintf(stderr, "%3d: %s\n", i, cmd.c_str());
|
||||
const int ret = std::system(cmd.c_str());
|
||||
if (ret)
|
||||
{
|
||||
fprintf(stderr, " Something went wrong .. \n");
|
||||
for (int j = 0; j < i; j++)
|
||||
fprintf(stderr, "PASS: %s\n", nvccSteps[j].c_str());
|
||||
fprintf(stderr, "FAIL: %s\n", nvccSteps[i].c_str());
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!keepTemporaries)
|
||||
{
|
||||
/* remove temporaries */
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
408
ptxtools/ptxgen.cpp
Normal file
408
ptxtools/ptxgen.cpp
Normal file
@@ -0,0 +1,408 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <nvvm.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
template<typename T>
|
||||
static std::string lValueToString(const T& value)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << value;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
typedef struct stat Stat;
|
||||
|
||||
|
||||
#define PTXGENStatus int
|
||||
enum {
|
||||
PTXGEN_SUCCESS = 0x0000,
|
||||
PTXGEN_FILE_IO_ERROR = 0x0001,
|
||||
PTXGEN_BAD_ALLOC_ERROR = 0x0002,
|
||||
PTXGEN_LIBNVVM_COMPILATION_ERROR = 0x0004,
|
||||
PTXGEN_LIBNVVM_ERROR = 0x0008,
|
||||
PTXGEN_INVALID_USAGE = 0x0010,
|
||||
PTXGEN_LIBNVVM_HOME_UNDEFINED = 0x0020,
|
||||
PTXGEN_LIBNVVM_VERIFICATION_ERROR = 0x0040
|
||||
};
|
||||
|
||||
static PTXGENStatus getLibDeviceName(const int computeArch, std::string &libDeviceName)
|
||||
{
|
||||
const char *env = getenv("LIBNVVM_HOME");
|
||||
#ifdef LIBNVVM_HOME
|
||||
#define STRINGIFY(x) #x
|
||||
#define TOSTRING(x) STRINGIFY(x)
|
||||
const std::string libnvvmPath(env ? env : TOSTRING(LIBNVVM_HOME));
|
||||
#undef TOSTRING
|
||||
#undef STRINGIFY
|
||||
#else
|
||||
const std::string libnvvmPath(env);
|
||||
#endif
|
||||
|
||||
if (libnvvmPath.empty())
|
||||
{
|
||||
fprintf(stderr, "The environment variable LIBNVVM_HOME is undefined\n");
|
||||
return PTXGEN_LIBNVVM_HOME_UNDEFINED;
|
||||
}
|
||||
|
||||
/* Use libdevice for compute_20, if the target is not compute_20, compute_30,
|
||||
* or compute_35. */
|
||||
const std::string libdevice =
|
||||
std::string("/libdevice/libdevice.compute_") +
|
||||
lValueToString(computeArch)+ "." +
|
||||
lValueToString(LIBDEVICE_MAJOR_VERSION) +
|
||||
lValueToString(LIBDEVICE_MINOR_VERSION) +
|
||||
".bc";
|
||||
|
||||
libDeviceName = libnvvmPath + libdevice;
|
||||
|
||||
return PTXGEN_SUCCESS;
|
||||
}
|
||||
|
||||
static PTXGENStatus addFileToProgram(const std::string &filename, nvvmProgram prog)
|
||||
{
|
||||
char *buffer;
|
||||
size_t size;
|
||||
Stat fileStat;
|
||||
|
||||
/* Open the input file. */
|
||||
FILE *f = fopen(filename.c_str(), "rb");
|
||||
if (f == NULL) {
|
||||
fprintf(stderr, "Failed to open %s\n", filename.c_str());
|
||||
return PTXGEN_FILE_IO_ERROR;
|
||||
}
|
||||
|
||||
/* Allocate buffer for the input. */
|
||||
fstat(fileno(f), &fileStat);
|
||||
buffer = (char *) malloc(fileStat.st_size);
|
||||
if (buffer == NULL) {
|
||||
fprintf(stderr, "Failed to allocate memory\n");
|
||||
return PTXGEN_BAD_ALLOC_ERROR;
|
||||
}
|
||||
size = fread(buffer, 1, fileStat.st_size, f);
|
||||
if (ferror(f)) {
|
||||
fprintf(stderr, "Failed to read %s\n", filename.c_str());
|
||||
fclose(f);
|
||||
free(buffer);
|
||||
return PTXGEN_FILE_IO_ERROR;
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
if (nvvmAddModuleToProgram(prog, buffer, size, filename.c_str()) != NVVM_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Failed to add the module %s to the compilation unit\n",
|
||||
filename.c_str());
|
||||
free(buffer);
|
||||
return PTXGEN_LIBNVVM_ERROR;
|
||||
}
|
||||
|
||||
free(buffer);
|
||||
return PTXGEN_SUCCESS;
|
||||
}
|
||||
|
||||
static PTXGENStatus generatePTX(
|
||||
std::vector<std::string> nvvmOptions,
|
||||
std::vector<std::string> nvvmFiles,
|
||||
std::ostream &out,
|
||||
const int computeArch)
|
||||
{
|
||||
nvvmProgram prog;
|
||||
PTXGENStatus status;
|
||||
|
||||
/* Create the compiliation unit. */
|
||||
if (nvvmCreateProgram(&prog) != NVVM_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "Failed to create the compilation unit\n");
|
||||
return PTXGEN_LIBNVVM_ERROR;
|
||||
}
|
||||
|
||||
|
||||
/* Add libdevice. */
|
||||
std::string libDeviceName;
|
||||
status = getLibDeviceName(computeArch, libDeviceName);
|
||||
if (status != PTXGEN_SUCCESS)
|
||||
{
|
||||
nvvmDestroyProgram(&prog);
|
||||
return status;
|
||||
}
|
||||
status = addFileToProgram(libDeviceName, prog);
|
||||
if (status != PTXGEN_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "Please double-check LIBNVVM_HOME environmental variable.\n");
|
||||
nvvmDestroyProgram(&prog);
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Add the module to the compilation unit. */
|
||||
for (int i = 0; i < (int)nvvmFiles.size(); ++i)
|
||||
{
|
||||
status = addFileToProgram(nvvmFiles[i], prog);
|
||||
if (status != PTXGEN_SUCCESS)
|
||||
{
|
||||
nvvmDestroyProgram(&prog);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
const int numOptions = nvvmOptions.size();
|
||||
std::vector<const char*> options(numOptions);
|
||||
for (int i = 0; i < numOptions; i++)
|
||||
options[i] = nvvmOptions[i].c_str();
|
||||
|
||||
/* Verify the compilation unit. */
|
||||
if (nvvmVerifyProgram(prog, numOptions, &options[0]) != NVVM_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "Failed to verify the compilation unit\n");
|
||||
status |= PTXGEN_LIBNVVM_VERIFICATION_ERROR;
|
||||
}
|
||||
|
||||
/* Print warnings and errors. */
|
||||
{
|
||||
size_t logSize;
|
||||
if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "Failed to get the compilation log size\n");
|
||||
status |= PTXGEN_LIBNVVM_ERROR;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string log(logSize,0);
|
||||
if (nvvmGetProgramLog(prog, &log[0]) != NVVM_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "Failed to get the compilation log\n");
|
||||
status |= PTXGEN_LIBNVVM_ERROR;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s\n", log.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (status & PTXGEN_LIBNVVM_VERIFICATION_ERROR)
|
||||
{
|
||||
nvvmDestroyProgram(&prog);
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Compile the compilation unit. */
|
||||
if (nvvmCompileProgram(prog, numOptions, &options[0]) != NVVM_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "Failed to generate PTX from the compilation unit\n");
|
||||
status |= PTXGEN_LIBNVVM_COMPILATION_ERROR;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t ptxSize;
|
||||
if (nvvmGetCompiledResultSize(prog, &ptxSize) != NVVM_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "Failed to get the PTX output size\n");
|
||||
status |= PTXGEN_LIBNVVM_ERROR;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string ptx(ptxSize,0);
|
||||
if (nvvmGetCompiledResult(prog, &ptx[0]) != NVVM_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "Failed to get the PTX output\n");
|
||||
status |= PTXGEN_LIBNVVM_ERROR;
|
||||
}
|
||||
else
|
||||
{
|
||||
out << ptx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Print warnings and errors. */
|
||||
{
|
||||
size_t logSize;
|
||||
if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "Failed to get the compilation log size\n");
|
||||
status |= PTXGEN_LIBNVVM_ERROR;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string log(logSize,0);
|
||||
if (nvvmGetProgramLog(prog, &log[0]) != NVVM_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "Failed to get the compilation log\n");
|
||||
status |= PTXGEN_LIBNVVM_ERROR;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s\n", log.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Release the resources. */
|
||||
nvvmDestroyProgram(&prog);
|
||||
|
||||
return PTXGEN_SUCCESS;
|
||||
}
|
||||
|
||||
static void showUsage()
|
||||
{
|
||||
fprintf(stderr,"Usage: ptxgen [OPTION]... [FILE]...\n"
|
||||
" [FILE] could be a .bc file or a .ll file\n");
|
||||
}
|
||||
|
||||
static void lUsage(const int ret)
|
||||
{
|
||||
fprintf(stdout, "\nusage: ptxgen [options] file.[ll,bc] \n");
|
||||
fprintf(stdout, " [--help]\t\t This help\n");
|
||||
fprintf(stdout, " [--verbose]\t\t Be verbose\n");
|
||||
fprintf(stdout, " [--arch={%s}]\t GPU target architecture\n", "sm_35");
|
||||
fprintf(stdout, " [-o <name>]\t\t Output file name\n");
|
||||
fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n");
|
||||
fprintf(stdout, " [--opt=]\t\t Optimization parameters \n");
|
||||
fprintf(stdout, " \t\t\t 0 - disable optimizations \n");
|
||||
fprintf(stdout, " \t\t\t 3 - defalt, enable optimizations \n");
|
||||
fprintf(stdout, " [--ftz=]\t\t Flush-to-zero mode when performsing single-precision floating-point operations\n");
|
||||
fprintf(stdout, " \t\t\t 0 - default, preserve denormal values\n");
|
||||
fprintf(stdout, " \t\t\t 1 - flush denormal values to zero\n");
|
||||
fprintf(stdout, " [--prec-sqrt=]\t Precision mode for single-precision floating-point square root\n");
|
||||
fprintf(stdout, " \t\t\t 0 - use a faster approximation\n");
|
||||
fprintf(stdout, " \t\t\t 1 - default, use IEEE round-to-nearest mode\n");
|
||||
fprintf(stdout, " [--prec-div=]\t Precision mode for single-precision floating-point division and reciprocals\n");
|
||||
fprintf(stdout, " \t\t\t 0 - use a faster approximation\n");
|
||||
fprintf(stdout, " \t\t\t 1 - default, use IEEE round-to-nearest mode\n");
|
||||
fprintf(stdout, " [--fma=]\t\t FMA contraction mode \n");
|
||||
fprintf(stdout, " \t\t\t 0 - disable\n");
|
||||
fprintf(stdout, " \t\t\t 1 - default, enable\n");
|
||||
fprintf(stdout, " [--use_fast_math]\t Make use of fast maih. Implies --ftz=1 --prec-div=0 --prec-sqrt=0\n");
|
||||
fprintf(stdout, " \n");
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int _opt = 3;
|
||||
int _ftz = 0;
|
||||
int _precSqrt = 1;
|
||||
int _precDiv = 1;
|
||||
int _fma = 1;
|
||||
bool _useFastMath = false;
|
||||
bool _debug = false;
|
||||
bool _verbose = false;
|
||||
std::string _arch = "sm_35";
|
||||
std::string fileIR, filePTX;
|
||||
|
||||
for (int i = 1; i < argc; ++i)
|
||||
{
|
||||
if (!strcmp(argv[i], "--help"))
|
||||
lUsage(0);
|
||||
else if (!strncmp(argv[i], "--arch=", 7))
|
||||
_arch = std::string(argv[i]+7);
|
||||
else if (!strncmp(argv[i], "-g", 2))
|
||||
_debug = true;
|
||||
else if (!strncmp(argv[i], "--verbose", 9))
|
||||
_verbose = true;
|
||||
else if (!strncmp(argv[i], "--opt=", 6))
|
||||
_opt = atoi(argv[i]+6);
|
||||
else if (!strncmp(argv[i], "--ftz=", 6))
|
||||
_ftz = atoi(argv[i]+6);
|
||||
else if (!strncmp(argv[i], "--prec-sqrt=", 12))
|
||||
_precSqrt = atoi(argv[i]+12);
|
||||
else if (!strncmp(argv[i], "--prec-div=", 11))
|
||||
_precDiv = atoi(argv[i]+11);
|
||||
else if (!strncmp(argv[i], "--fma=", 6))
|
||||
_fma = atoi(argv[i]+6);
|
||||
else if (!strncmp(argv[i], "--use_fast_math", 15))
|
||||
_useFastMath = true;
|
||||
else if (!strcmp(argv[i], "-o"))
|
||||
{
|
||||
if (++i == argc)
|
||||
{
|
||||
fprintf(stderr, "No output file specified after -o option.\n");
|
||||
lUsage(1);
|
||||
}
|
||||
filePTX = std::string(argv[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char * ext = strrchr(argv[i], '.');
|
||||
if (ext == NULL)
|
||||
{
|
||||
fprintf(stderr, " Unknown argument: %s \n", argv[i]);
|
||||
lUsage(1);
|
||||
}
|
||||
else if (strncmp(ext, ".ll", 3) && strncmp(ext, ".bc", 3))
|
||||
{
|
||||
fprintf(stderr, " Unkown extension of the input file: %s \n", ext);
|
||||
lUsage(1);
|
||||
}
|
||||
else if (filePTX.empty())
|
||||
{
|
||||
fileIR = std::string(argv[i]);
|
||||
if (filePTX.empty())
|
||||
{
|
||||
char * baseName = argv[i];
|
||||
while (baseName != ext)
|
||||
filePTX += std::string(baseName++,1);
|
||||
}
|
||||
filePTX += ".ptx";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fileIR.empty())
|
||||
{
|
||||
fprintf(stderr, "ptxgen fatal : No input file specified; use option --help for more information\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#if 0
|
||||
fprintf(stderr, "fileIR= %s\n", fileIR.c_str());
|
||||
fprintf(stderr, "filePTX= %s\n", filePTX.c_str());
|
||||
fprintf(stderr, "arch= %s\n", _arch.c_str());
|
||||
fprintf(stderr, "debug= %s\n", _debug ? "true" : "false");
|
||||
fprintf(stderr, "verbose= %s\n", _verbose ? "true" : "false");
|
||||
fprintf(stderr, "opt= %d\n", _opt);
|
||||
fprintf(stderr, "ftz= %d\n", _ftz);
|
||||
fprintf(stderr, "prec-sqrt= %d\n", _precSqrt);
|
||||
fprintf(stderr, "prec-div= %d\n", _precDiv);
|
||||
fprintf(stderr, "fma= %d\n", _fma);
|
||||
fprintf(stderr, "use_fast_math= %s\n", _useFastMath ? "true" : "false");
|
||||
#endif
|
||||
|
||||
int computeArch = 35;
|
||||
assert(_arch == std::string("sm_35"));
|
||||
|
||||
if (_useFastMath)
|
||||
{
|
||||
_ftz = 1;
|
||||
_precSqrt = _precDiv = 0;
|
||||
}
|
||||
|
||||
std::vector<std::string> nvvmOptions;
|
||||
nvvmOptions.push_back("-arch=compute_35");
|
||||
nvvmOptions.push_back("-ftz=" + lValueToString(_ftz));
|
||||
nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt));
|
||||
nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv));
|
||||
nvvmOptions.push_back("-fma=" + lValueToString(_fma));
|
||||
if (_debug)
|
||||
nvvmOptions.push_back("-g");
|
||||
|
||||
std::vector<std::string> nvvmFiles;
|
||||
nvvmFiles.push_back(fileIR);
|
||||
|
||||
std::ofstream outputPTX(filePTX.c_str());
|
||||
assert(outputPTX);
|
||||
|
||||
const int ret = generatePTX(nvvmOptions, nvvmFiles, outputPTX, computeArch);
|
||||
outputPTX.open(filePTX.c_str());
|
||||
return ret;
|
||||
}
|
||||
|
||||
1929
ptxtools/ptxgrammar.cpp
Normal file
1929
ptxtools/ptxgrammar.cpp
Normal file
File diff suppressed because it is too large
Load Diff
215
ptxtools/ptxgrammar.yy
Normal file
215
ptxtools/ptxgrammar.yy
Normal file
@@ -0,0 +1,215 @@
|
||||
%locations
|
||||
|
||||
%{
|
||||
#include <iostream>
|
||||
#include "PTXParser.h"
|
||||
#include "PTXLexer.h"
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
#include <cstdio>
|
||||
|
||||
#define YYERROR_VERBOSE 1
|
||||
|
||||
#ifdef REPORT_BASE
|
||||
#undef REPORT_BASE
|
||||
#endif
|
||||
|
||||
#define REPORT_BASE 0
|
||||
|
||||
namespace ptx
|
||||
{
|
||||
|
||||
int yylex( YYSTYPE* token, YYLTYPE* location, parser::PTXLexer& lexer,
|
||||
parser::PTXParser& state );
|
||||
void yyerror( YYLTYPE* location, parser::PTXLexer& lexer,
|
||||
parser::PTXParser& state, char const* message );
|
||||
|
||||
std::string yyTypeToString( int );
|
||||
|
||||
%}
|
||||
|
||||
%union
|
||||
{
|
||||
char svalue[1024];
|
||||
double fvalue;
|
||||
int ivalue;
|
||||
unsigned int uvalue;
|
||||
}
|
||||
|
||||
%parse-param {parser::PTXLexer& lexer}
|
||||
%parse-param {parser::PTXParser& state}
|
||||
%lex-param {parser::PTXLexer& lexer}
|
||||
%lex-param {parser::PTXParser& state}
|
||||
%pure-parser
|
||||
|
||||
// define the constant-string tokens:
|
||||
%token TOKEN_VERSION TOKEN_TARGET TOKEN_ADDRESS_SIZE
|
||||
%token TOKEN_VISIBLE TOKEN_FUNC TOKEN_ENTRY
|
||||
%token TOKEN_PARAM TOKEN_ALIGN
|
||||
%token TOKEN_GLOBAL
|
||||
%token<ivalue> TOKEN_B8 TOKEN_B16 TOKEN_B32 TOKEN_B64
|
||||
%token<ivalue> TOKEN_U8 TOKEN_U16 TOKEN_U32 TOKEN_U64
|
||||
%token<ivalue> TOKEN_S8 TOKEN_S16 TOKEN_S32 TOKEN_S64
|
||||
%token<ivalue> TOKEN_F32 TOKEN_F64
|
||||
|
||||
// define the "terminal symbol" token types I'm going to use (in CAPS
|
||||
// by convention), and associate each with a field of the union:
|
||||
%token <ivalue> TOKEN_INT
|
||||
%token <fvalue> TOKEN_FLOAT
|
||||
%token <svalue> TOKEN_STRING
|
||||
|
||||
%type<svalue> identifier
|
||||
%type<ivalue> arrayDimensionSet
|
||||
%type<ivalue> alignment
|
||||
|
||||
%start ptxsource
|
||||
|
||||
%%
|
||||
// the first rule defined is the highest-level rule, which in our
|
||||
// case is just the concept of a whole "snazzle file":
|
||||
ptxsource:
|
||||
header ptxbody;
|
||||
|
||||
header:
|
||||
version target address_size
|
||||
{
|
||||
// std::cerr << "Done reading PTX \n" << std::endl;
|
||||
state.printHeader();
|
||||
};
|
||||
|
||||
version:
|
||||
TOKEN_VERSION TOKEN_FLOAT { assert($2 >= 3.0); } ;//std::cerr << "Reading PTX version " << $2 << std::endl; };
|
||||
target:
|
||||
TOKEN_TARGET TOKEN_STRING { assert(std::string($2) == std::string("sm_35")); } //std::cerr << "Target " << $2 << std::endl; };
|
||||
address_size:
|
||||
TOKEN_ADDRESS_SIZE TOKEN_INT { assert($2 == 64); } //std::cerr << "Address_Size " << $2 << std::endl; };
|
||||
|
||||
|
||||
dataTypeId :
|
||||
TOKEN_U8 | TOKEN_U16 | TOKEN_U32 | TOKEN_U64
|
||||
| TOKEN_S8 | TOKEN_S16 | TOKEN_S32 | TOKEN_S64
|
||||
| TOKEN_B8 | TOKEN_B16 | TOKEN_B32 | TOKEN_B64
|
||||
| TOKEN_F32 | TOKEN_F64;
|
||||
|
||||
dataType: dataTypeId { state.dataTypeId($<ivalue>1); }
|
||||
|
||||
anytoken:
|
||||
TOKEN_ALIGN
|
||||
| TOKEN_PARAM
|
||||
| dataTypeId
|
||||
| TOKEN_STRING | TOKEN_FLOAT | TOKEN_INT
|
||||
| TOKEN_FUNC | TOKEN_ENTRY
|
||||
| TOKEN_GLOBAL
|
||||
| '['
|
||||
| ']'
|
||||
| '('
|
||||
| ')'
|
||||
| ','
|
||||
| ';'
|
||||
| '='
|
||||
;
|
||||
|
||||
ptxbody:
|
||||
ptxbody visibleFunctionDeclaration | visibleFunctionDeclaration
|
||||
| ptxbody visibleEntryDeclaration| visibleEntryDeclaration
|
||||
| ptxbody visibleInitializableDeclaration| visibleInitializableDeclaration
|
||||
| ptxbody anytoken | anytoken;
|
||||
|
||||
|
||||
|
||||
arrayDimensionSet : '[' TOKEN_INT ']' { $$ = $2; state.arrayDimensions($<ivalue>2); }
|
||||
// arrayDimensionSet : arrayDimensionSet '[' TOKEN_INT ']' { $$ = $2; }
|
||||
// arrayDimensionSet : '[' ']' { $$ = 0; }
|
||||
arrayDimensions : /* empty string */;
|
||||
arrayDimensions : arrayDimensionSet;
|
||||
|
||||
identifier: TOKEN_STRING { strcpy($$, $1); state.identifier($1); }
|
||||
parameter : TOKEN_PARAM;
|
||||
|
||||
alignment : TOKEN_ALIGN TOKEN_INT {$$ = $2; state.alignment($<ivalue>2);}
|
||||
addressableVariablePrefix : dataType { state.alignment(0); }
|
||||
addressableVariablePrefix : alignment dataType;
|
||||
|
||||
argumentDeclaration : parameter addressableVariablePrefix identifier arrayDimensions
|
||||
{
|
||||
state.argumentDeclaration(@1);
|
||||
}
|
||||
|
||||
|
||||
argumentListBegin : '(' { state.argumentListBegin(@1); };
|
||||
argumentListEnd : ')' {state.argumentListEnd(@1); };
|
||||
argumentListBody : argumentDeclaration;
|
||||
argumentListBody : /* empty string */;
|
||||
argumentListBody : argumentListBody ',' argumentDeclaration;
|
||||
argumentList: argumentListBegin argumentListBody argumentListEnd;
|
||||
|
||||
visibleEntryDeclaration: TOKEN_VISIBLE TOKEN_ENTRY identifier argumentList
|
||||
{
|
||||
state.visibleEntryDeclaration($<svalue>3, @1);
|
||||
};
|
||||
|
||||
returnArgumentListBegin : '(' { state.returnArgumentListBegin(@1); }
|
||||
returnArgumentListEnd : ')' {state.returnArgumentListEnd(@1); }
|
||||
returnArgumentList : returnArgumentListBegin argumentListBody returnArgumentListEnd;
|
||||
optionalReturnArgumentList : returnArgumentList | /* empty string */;
|
||||
visibleFunctionDeclaration: TOKEN_VISIBLE TOKEN_FUNC optionalReturnArgumentList identifier argumentList
|
||||
{
|
||||
state.visibleFunctionDeclaration($<svalue>4, @1);
|
||||
};
|
||||
|
||||
visibleInitializableDeclaration :
|
||||
TOKEN_VISIBLE TOKEN_GLOBAL addressableVariablePrefix identifier arrayDimensionSet
|
||||
{ state.visibleInitializableDeclaration($<svalue>4,@1); }
|
||||
| TOKEN_VISIBLE TOKEN_GLOBAL addressableVariablePrefix identifier ';'
|
||||
{state.arrayDimensions(0); state.visibleInitializableDeclaration($<svalue>4,@1); }
|
||||
| TOKEN_VISIBLE TOKEN_GLOBAL addressableVariablePrefix identifier '='
|
||||
{state.arrayDimensions(0); state.visibleInitializableDeclaration($<svalue>4,@1); }
|
||||
|
||||
|
||||
%%
|
||||
|
||||
int yylex( YYSTYPE* token, YYLTYPE* location, parser::PTXLexer& lexer,
|
||||
parser::PTXParser& state )
|
||||
{
|
||||
lexer.yylval = token;
|
||||
|
||||
int tokenValue = lexer.yylexPosition();
|
||||
location->first_line = lexer.lineno();
|
||||
location->first_column = lexer.column;
|
||||
|
||||
#if 0
|
||||
report( " Lexer (" << location->first_line << ","
|
||||
<< location->first_column
|
||||
<< "): " << parser::PTXLexer::toString( tokenValue ) << " \""
|
||||
<< lexer.YYText() << "\"");
|
||||
#endif
|
||||
|
||||
return tokenValue;
|
||||
}
|
||||
|
||||
static std::string toString( YYLTYPE& location, parser::PTXParser& state )
|
||||
{
|
||||
std::stringstream stream;
|
||||
stream
|
||||
#if 0
|
||||
<< state.fileName
|
||||
#else
|
||||
<< "ptx "
|
||||
#endif
|
||||
<< " (" << location.first_line << ", "
|
||||
<< location.first_column << "): ";
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
void yyerror( YYLTYPE* location, parser::PTXLexer& lexer,
|
||||
parser::PTXParser& state, char const* message )
|
||||
{
|
||||
std::stringstream stream;
|
||||
stream << toString( *location, state )
|
||||
<< " " << message;
|
||||
fprintf(stderr, "--Parser ERROR-- %s %s \n", toString(*location, state).c_str(), message);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -258,7 +258,7 @@ def run_test(testname):
|
||||
if should_fail:
|
||||
cc_cmd += " -DEXPECT_FAILURE"
|
||||
if is_nvptx_target:
|
||||
nvptxcc_exe = "nvptxcc"
|
||||
nvptxcc_exe = "ptxtools/alloy_ptxcc.sh"
|
||||
nvptxcc_exe_rel = add_prefix(nvptxcc_exe)
|
||||
cc_cmd = "%s %s -DTEST_SIG=%d -o %s" % \
|
||||
(nvptxcc_exe_rel, obj_name, match, exe_name)
|
||||
@@ -270,7 +270,7 @@ def run_test(testname):
|
||||
if is_generic_target:
|
||||
ispc_cmd += " --emit-c++ --c++-include-file=%s" % add_prefix(options.include_file)
|
||||
if is_nvptx_target:
|
||||
filename4ptx = filename+".ptx.parsed_ispc"
|
||||
filename4ptx = "/tmp/"+os.path.basename(filename)+".parsed.ispc"
|
||||
grep_cmd = "grep -v 'export uniform int width' %s > %s " % \
|
||||
(filename, filename4ptx)
|
||||
if options.verbose:
|
||||
|
||||
Reference in New Issue
Block a user