Merge pull request #974 from egaburov/sm37
added support for multiple GPU targets: tested with sm_35 (K20/K40) and sm_37 (K80)
This commit is contained in:
@@ -2,7 +2,15 @@ PROG=ao
|
|||||||
ISPC_SRC=ao.ispc
|
ISPC_SRC=ao.ispc
|
||||||
CU_SRC=ao.cu
|
CU_SRC=ao.cu
|
||||||
CXX_SRC=ao.cpp
|
CXX_SRC=ao.cpp
|
||||||
|
|
||||||
|
#K80
|
||||||
|
#PTXCC_REGMAX=128
|
||||||
|
#NVARCH=sm_37
|
||||||
|
|
||||||
|
#K20/K40
|
||||||
PTXCC_REGMAX=64
|
PTXCC_REGMAX=64
|
||||||
|
NVARCH=sm_35
|
||||||
|
|
||||||
#ISPC_FLAGS= --opt=disable-uniform-control-flow
|
#ISPC_FLAGS= --opt=disable-uniform-control-flow
|
||||||
|
|
||||||
#LLVM_GPU=1
|
#LLVM_GPU=1
|
||||||
|
|||||||
@@ -1,20 +1,21 @@
|
|||||||
NVCC_SRC=../../util/nvcc_helpers.cu
|
NVCC_SRC=../../util/nvcc_helpers.cu
|
||||||
NVCC_OBJS=objs_ptx/nvcc_helpers_nvcc.o
|
NVCC_OBJS=objs_ptx/nvcc_helpers_nvcc.o
|
||||||
|
NVARCH ?= sm_35
|
||||||
#
|
#
|
||||||
CXX=g++ -ffast-math
|
CXX=g++ -ffast-math
|
||||||
CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_ptx/ -D_CUDA_ -I../../util -I../../
|
CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_ptx/ -D_CUDA_ -I../../util -I../../
|
||||||
#
|
#
|
||||||
NVCC=nvcc
|
NVCC=nvcc
|
||||||
NVCC_FLAGS+=-O3 -arch=sm_35 -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/
|
NVCC_FLAGS+=-O3 -arch=$(NVARCH) -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/
|
||||||
ifdef PTXCC_REGMAX
|
ifdef PTXCC_REGMAX
|
||||||
NVCC_FLAGS += --maxrregcount=$(PTXCC_REGMAX)
|
NVCC_FLAGS += --maxrregcount=$(PTXCC_REGMAX)
|
||||||
endif
|
endif
|
||||||
NVCC_FLAGS+=--use_fast_math
|
NVCC_FLAGS+=--use_fast_math
|
||||||
#
|
#
|
||||||
LD=nvcc
|
LD=nvcc
|
||||||
LDFLAGS=-lcudart -lcudadevrt -arch=sm_35
|
LDFLAGS=-lcudart -lcudadevrt -arch=$(NVARCH)
|
||||||
#
|
#
|
||||||
PTXCC=$(ISPC_HOME)/ptxtools/ptxcc
|
PTXCC=$(ISPC_HOME)/ptxtools/ptxcc --arch=$(NVARCH)
|
||||||
PTXCC_FLAGS+= -Xptxas=-v
|
PTXCC_FLAGS+= -Xptxas=-v
|
||||||
ifdef PTXCC_REGMAX
|
ifdef PTXCC_REGMAX
|
||||||
PTXCC_FLAGS += -maxrregcount=$(PTXCC_REGMAX)
|
PTXCC_FLAGS += -maxrregcount=$(PTXCC_REGMAX)
|
||||||
@@ -41,13 +42,13 @@ CXX_SRC+=ispc_malloc.cpp
|
|||||||
CXX_OBJS+=objs_ptx/ispc_malloc_gcc.o
|
CXX_OBJS+=objs_ptx/ispc_malloc_gcc.o
|
||||||
|
|
||||||
PTXGEN = $(ISPC_HOME)/ptxtools/ptxgen
|
PTXGEN = $(ISPC_HOME)/ptxtools/ptxgen
|
||||||
PTXGEN += --use_fast_math
|
PTXGEN += --use_fast_math --arch=$(NVARCH)
|
||||||
|
|
||||||
#LLVM32=$(HOME)/usr/local/llvm/bin-3.2
|
#LLVM32=$(HOME)/usr/local/llvm/bin-3.2
|
||||||
#LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
#LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||||
|
|
||||||
LLC=$(LLVM_ROOT)/bin/llc
|
LLC=$(LLVM_ROOT)/bin/llc
|
||||||
LLC_FLAGS=-march=nvptx64 -mcpu=sm_35
|
LLC_FLAGS=-march=nvptx64 -mcpu=$(NVARCH)
|
||||||
|
|
||||||
# .SUFFIXES: .bc .o .cu .ll
|
# .SUFFIXES: .bc .o .cu .ll
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,14 @@ PROG=deferred_shading
|
|||||||
ISPC_SRC=kernels.ispc
|
ISPC_SRC=kernels.ispc
|
||||||
CU_SRC=kernels.cu
|
CU_SRC=kernels.cu
|
||||||
CXX_SRC=common.cpp main.cpp
|
CXX_SRC=common.cpp main.cpp
|
||||||
|
|
||||||
|
#K80
|
||||||
|
#PTXCC_REGMAX=128
|
||||||
|
#NVARCH=sm_37
|
||||||
|
|
||||||
|
#K20/K40
|
||||||
PTXCC_REGMAX=64
|
PTXCC_REGMAX=64
|
||||||
|
NVARCH=sm_35
|
||||||
|
|
||||||
NVVM_GPU=1
|
NVVM_GPU=1
|
||||||
#LLVM_GPU=1
|
#LLVM_GPU=1
|
||||||
|
|||||||
@@ -2,10 +2,19 @@ PROG=mergeSort
|
|||||||
ISPC_SRC=mergeSort.ispc
|
ISPC_SRC=mergeSort.ispc
|
||||||
CU_SRC=mergeSort.cu
|
CU_SRC=mergeSort.cu
|
||||||
CXX_SRC=mergeSort.cpp mergeSort.cpp
|
CXX_SRC=mergeSort.cpp mergeSort.cpp
|
||||||
PTXCC_REGMAX=64
|
|
||||||
#PTXCC_FLAGS= -Xptxas=-O3
|
#PTXCC_FLAGS= -Xptxas=-O3
|
||||||
#NVCC_FLAGS=-Xptxas=-O0
|
#NVCC_FLAGS=-Xptxas=-O0
|
||||||
|
|
||||||
|
#K80
|
||||||
|
#PTXCC_REGMAX=128
|
||||||
|
#NVARCH=sm_37
|
||||||
|
|
||||||
|
#K20/K40
|
||||||
|
PTXCC_REGMAX=64
|
||||||
|
NVARCH=sm_35
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
LLVM_GPU=1
|
LLVM_GPU=1
|
||||||
NVVM_GPU=1
|
NVVM_GPU=1
|
||||||
|
|
||||||
|
|||||||
@@ -2,10 +2,18 @@ PROG=hermite4
|
|||||||
ISPC_SRC=hermite4.ispc
|
ISPC_SRC=hermite4.ispc
|
||||||
#CU_SRC=hermite4.cu
|
#CU_SRC=hermite4.cu
|
||||||
CXX_SRC=hermite4.cpp
|
CXX_SRC=hermite4.cpp
|
||||||
|
|
||||||
|
#K80
|
||||||
|
#PTXCC_REGMAX=128
|
||||||
|
#NVARCH=sm_37
|
||||||
|
|
||||||
|
#K20/K40
|
||||||
PTXCC_REGMAX=64
|
PTXCC_REGMAX=64
|
||||||
|
NVARCH=sm_35
|
||||||
|
|
||||||
#ISPC_FLAGS= --opt=disable-uniform-control-flow
|
#ISPC_FLAGS= --opt=disable-uniform-control-flow
|
||||||
|
|
||||||
#LLVM_GPU=1
|
# LLVM_GPU=1
|
||||||
NVVM_GPU=1
|
NVVM_GPU=1
|
||||||
|
|
||||||
include ../common_ptx.mk
|
include ../common_ptx.mk
|
||||||
|
|||||||
@@ -2,7 +2,16 @@ PROG=options
|
|||||||
ISPC_SRC=options.ispc
|
ISPC_SRC=options.ispc
|
||||||
CU_SRC=options.cu
|
CU_SRC=options.cu
|
||||||
CXX_SRC=options.cpp
|
CXX_SRC=options.cpp
|
||||||
|
|
||||||
|
#K80
|
||||||
|
#PTXCC_REGMAX=128
|
||||||
|
#NVARCH=sm_37
|
||||||
|
|
||||||
|
#K20/K40
|
||||||
PTXCC_REGMAX=128
|
PTXCC_REGMAX=128
|
||||||
|
NVARCH=sm_35
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#LLVM_GPU=1
|
#LLVM_GPU=1
|
||||||
|
|||||||
@@ -4,7 +4,16 @@ ISPC_SRC=radixSort.ispc
|
|||||||
CU_SRC=radixSort.cu
|
CU_SRC=radixSort.cu
|
||||||
# NVCC_FLAGS=-Xptxas=-O1
|
# NVCC_FLAGS=-Xptxas=-O1
|
||||||
CXX_SRC=radixSort.cpp radixSort.cpp
|
CXX_SRC=radixSort.cpp radixSort.cpp
|
||||||
|
|
||||||
|
#K80
|
||||||
|
#PTXCC_REGMAX=128
|
||||||
|
#NVARCH=sm_37
|
||||||
|
|
||||||
|
#K20/K40
|
||||||
PTXCC_REGMAX=64
|
PTXCC_REGMAX=64
|
||||||
|
NVARCH=sm_35
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
LLVM_GPU=1
|
LLVM_GPU=1
|
||||||
NVVM_GPU=1
|
NVVM_GPU=1
|
||||||
|
|||||||
@@ -2,7 +2,16 @@ PROG=rt
|
|||||||
ISPC_SRC=rt.ispc
|
ISPC_SRC=rt.ispc
|
||||||
CU_SRC=rt.cu
|
CU_SRC=rt.cu
|
||||||
CXX_SRC=rt.cpp
|
CXX_SRC=rt.cpp
|
||||||
|
|
||||||
|
#K20/K40
|
||||||
PTXCC_REGMAX=32
|
PTXCC_REGMAX=32
|
||||||
|
NVARCH=sm_35
|
||||||
|
|
||||||
|
#K80
|
||||||
|
#PTXCC_REGMAX=64
|
||||||
|
#NVARCH=sm_37
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#LLVM_GPU=1
|
#LLVM_GPU=1
|
||||||
NVVM_GPU=1
|
NVVM_GPU=1
|
||||||
|
|||||||
@@ -2,7 +2,16 @@ PROG=volume
|
|||||||
ISPC_SRC=volume.ispc
|
ISPC_SRC=volume.ispc
|
||||||
CU_SRC=volume.cu
|
CU_SRC=volume.cu
|
||||||
CXX_SRC=volume.cpp
|
CXX_SRC=volume.cpp
|
||||||
|
|
||||||
|
#K80
|
||||||
|
#PTXCC_REGMAX=128
|
||||||
|
#NVARCH=sm_37
|
||||||
|
|
||||||
|
#K20/K40
|
||||||
PTXCC_REGMAX=64
|
PTXCC_REGMAX=64
|
||||||
|
NVARCH=sm_35
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#LLVM_GPU=1
|
#LLVM_GPU=1
|
||||||
NVVM_GPU=1
|
NVVM_GPU=1
|
||||||
|
|||||||
40
ptxtools/GPUTargets.h
Normal file
40
ptxtools/GPUTargets.h
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
// -*- mode: c++ -*-
|
||||||
|
/*
|
||||||
|
Copyright (c) 2015, Evghenii Gaburov
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace GPUTargets
|
||||||
|
{
|
||||||
|
static const auto computeMode = {"sm_35", "sm_37"};
|
||||||
|
}
|
||||||
@@ -33,7 +33,7 @@
|
|||||||
all: ptxcc ptxgen
|
all: ptxcc ptxgen
|
||||||
|
|
||||||
CXX=clang++
|
CXX=clang++
|
||||||
CXXFLAGS += -O3 --std=c++11
|
CXXFLAGS += -O3 --std=c++11 -Wno-deprecated-register
|
||||||
CXXFLAGS += -I/opt/local/include
|
CXXFLAGS += -I/opt/local/include
|
||||||
|
|
||||||
LD=clang++
|
LD=clang++
|
||||||
@@ -77,5 +77,5 @@ ptxgen: ptxgen.cpp
|
|||||||
clean:
|
clean:
|
||||||
/bin/rm -f ptxgen ptxcc $(OBJ) ptxgrammar.hh ptxgrammar.cc ptx.cc ptxgrammar.output
|
/bin/rm -f ptxgen ptxcc $(OBJ) ptxgrammar.hh ptxgrammar.cc ptx.cc ptxgrammar.output
|
||||||
|
|
||||||
$(OBJ): ptxgrammar.cc ptx.cc PTXParser.h PTXLexer.h
|
$(OBJ): ptxgrammar.cc ptx.cc PTXParser.h PTXLexer.h GPUTargets.h
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
// -*- mode: c++ -*-
|
// -*- mode: c++ -*-
|
||||||
/*
|
/*
|
||||||
Copyright (c) 2014, Evghenii Gaburov
|
Copyright (c) 2014-2015, Evghenii Gaburov
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@@ -43,6 +43,7 @@ met:
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include "PTXParser.h"
|
#include "PTXParser.h"
|
||||||
|
#include "GPUTargets.h"
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -117,11 +118,15 @@ static std::vector<std::string> lSplitString(const std::string &s, char delim)
|
|||||||
static void lUsage(const int ret)
|
static void lUsage(const int ret)
|
||||||
{
|
{
|
||||||
fprintf(stdout, "\nusage: ptxcc [options] file.ptx \n");
|
fprintf(stdout, "\nusage: ptxcc [options] file.ptx \n");
|
||||||
fprintf(stdout, " [--help]\t\t\t\t This help\n");
|
fprintf(stdout, " [--help]\t\t\t This help\n");
|
||||||
fprintf(stdout, " [--verbose]\t\t\t\t Be verbose\n");
|
fprintf(stdout, " [--verbose]\t\t\t Be verbose\n");
|
||||||
fprintf(stdout, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35");
|
fprintf(stdout, " [--arch=]\t\t\t GPU target architectures:\n");
|
||||||
fprintf(stdout, " [-o <name>]\t\t\t\t Output file name\n");
|
fprintf(stdout, " \t\t\t\t ");
|
||||||
fprintf(stdout, " [-Xnvcc=<arguments>]\t\t Arguments to pass through to \"nvcc\"\n");
|
for (const auto& mode : GPUTargets::computeMode)
|
||||||
|
fprintf(stdout, "%s ", mode);
|
||||||
|
fprintf(stdout, "\n");
|
||||||
|
fprintf(stdout, " [-o <name>]\t\t\t Output file name\n");
|
||||||
|
fprintf(stdout, " [-Xnvcc=<arguments>]\t Arguments to pass through to \"nvcc\"\n");
|
||||||
fprintf(stdout, " \n");
|
fprintf(stdout, " \n");
|
||||||
exit(ret);
|
exit(ret);
|
||||||
}
|
}
|
||||||
@@ -132,7 +137,7 @@ int main(int _argc, char * _argv[])
|
|||||||
char *argv[128];
|
char *argv[128];
|
||||||
lGetAllArgs(_argc, _argv, argc, argv);
|
lGetAllArgs(_argc, _argv, argc, argv);
|
||||||
|
|
||||||
std::string arch="sm_35";
|
std::string arch = *GPUTargets::computeMode.begin();
|
||||||
std::string filePTX;
|
std::string filePTX;
|
||||||
std::string fileOBJ;
|
std::string fileOBJ;
|
||||||
std::string extString = ".ptx";
|
std::string extString = ".ptx";
|
||||||
@@ -195,7 +200,11 @@ int main(int _argc, char * _argv[])
|
|||||||
for (int i= 0; i < (int)nvccArgumentList.size(); i++)
|
for (int i= 0; i < (int)nvccArgumentList.size(); i++)
|
||||||
fprintf(stderr, " arg= %d : %s \n", i, nvccArgumentList[i].c_str());
|
fprintf(stderr, " arg= %d : %s \n", i, nvccArgumentList[i].c_str());
|
||||||
#endif
|
#endif
|
||||||
assert(arch == std::string("sm_35"));
|
if (std::find(GPUTargets::computeMode.begin(), GPUTargets::computeMode.end(), arch) == GPUTargets::computeMode.end())
|
||||||
|
{
|
||||||
|
fprintf(stderr, "ptxcc fatal : --arch=%s is not supported; use option --help for more information\n", arch.c_str());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
if (filePTX.empty())
|
if (filePTX.empty())
|
||||||
{
|
{
|
||||||
fprintf(stderr, "ptxcc fatal : No input file specified; use option --help for more information\n");
|
fprintf(stderr, "ptxcc fatal : No input file specified; use option --help for more information\n");
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
// -*- mode: c++ -*-
|
// -*- mode: c++ -*-
|
||||||
/*
|
/*
|
||||||
Copyright (c) 2014, Evghenii Gaburov
|
Copyright (c) 2014-2015, Evghenii Gaburov
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@@ -43,6 +43,8 @@ met:
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "GPUTargets.h"
|
||||||
|
|
||||||
#include <nvvm.h>
|
#include <nvvm.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@@ -51,9 +53,7 @@ met:
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
static std::string lValueToString(const T& value)
|
static std::string lValueToString(const T& value)
|
||||||
{
|
{
|
||||||
std::ostringstream oss;
|
return std::to_string(value);
|
||||||
oss << value;
|
|
||||||
return oss.str();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Exception : public std::exception
|
struct Exception : public std::exception
|
||||||
@@ -76,7 +76,7 @@ struct NVVMProg
|
|||||||
nvvmProgram get() const {return prog; }
|
nvvmProgram get() const {return prog; }
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::string getLibDeviceName(const int computeArch)
|
static std::string getLibDeviceName(int computeArch)
|
||||||
{
|
{
|
||||||
const char *env = getenv("LIBNVVM_HOME");
|
const char *env = getenv("LIBNVVM_HOME");
|
||||||
#ifdef LIBNVVM_HOME
|
#ifdef LIBNVVM_HOME
|
||||||
@@ -97,6 +97,7 @@ static std::string getLibDeviceName(const int computeArch)
|
|||||||
|
|
||||||
/* Use libdevice for compute_20, if the target is not compute_20, compute_30,
|
/* Use libdevice for compute_20, if the target is not compute_20, compute_30,
|
||||||
* or compute_35. */
|
* or compute_35. */
|
||||||
|
if (computeArch == 37) computeArch = 35;
|
||||||
const std::string libdevice =
|
const std::string libdevice =
|
||||||
std::string("/libdevice/libdevice.compute_") +
|
std::string("/libdevice/libdevice.compute_") +
|
||||||
lValueToString(computeArch)+ "." +
|
lValueToString(computeArch)+ "." +
|
||||||
@@ -219,7 +220,11 @@ static void lUsage(const int ret)
|
|||||||
fprintf(stdout, "\nusage: ptxgen [options] file.[ll,bc] \n");
|
fprintf(stdout, "\nusage: ptxgen [options] file.[ll,bc] \n");
|
||||||
fprintf(stdout, " [--help]\t\t This help\n");
|
fprintf(stdout, " [--help]\t\t This help\n");
|
||||||
fprintf(stdout, " [--verbose]\t\t Be verbose\n");
|
fprintf(stdout, " [--verbose]\t\t Be verbose\n");
|
||||||
fprintf(stdout, " [--arch={%s}]\t GPU target architecture\n", "sm_35");
|
fprintf(stdout, " [--arch=]\t\t GPU target architectures:\n");
|
||||||
|
fprintf(stdout, " \t\t\t ");
|
||||||
|
for (const auto& mode : GPUTargets::computeMode)
|
||||||
|
fprintf(stdout, "%s ", mode);
|
||||||
|
fprintf(stdout, "\n");
|
||||||
fprintf(stdout, " [-o <name>]\t\t Output file name\n");
|
fprintf(stdout, " [-o <name>]\t\t Output file name\n");
|
||||||
fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n");
|
fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n");
|
||||||
fprintf(stdout, " [--opt=]\t\t Optimization parameters \n");
|
fprintf(stdout, " [--opt=]\t\t Optimization parameters \n");
|
||||||
@@ -252,7 +257,7 @@ int main(int argc, char *argv[])
|
|||||||
bool _useFastMath = false;
|
bool _useFastMath = false;
|
||||||
bool _debug = false;
|
bool _debug = false;
|
||||||
bool _verbose = false;
|
bool _verbose = false;
|
||||||
std::string _arch = "sm_35";
|
std::string _arch = *GPUTargets::computeMode.begin();
|
||||||
std::string fileIR, filePTX;
|
std::string fileIR, filePTX;
|
||||||
|
|
||||||
for (int i = 1; i < argc; ++i)
|
for (int i = 1; i < argc; ++i)
|
||||||
@@ -333,8 +338,11 @@ int main(int argc, char *argv[])
|
|||||||
fprintf(stderr, "use_fast_math= %s\n", _useFastMath ? "true" : "false");
|
fprintf(stderr, "use_fast_math= %s\n", _useFastMath ? "true" : "false");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int computeArch = 35;
|
if (std::find(GPUTargets::computeMode.begin(), GPUTargets::computeMode.end(), _arch) == GPUTargets::computeMode.end())
|
||||||
assert(_arch == std::string("sm_35"));
|
{
|
||||||
|
fprintf(stderr, "ptxcc fatal : --arch=%s is not supported; use option --help for more information\n", _arch.c_str());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
if (_useFastMath)
|
if (_useFastMath)
|
||||||
{
|
{
|
||||||
@@ -342,8 +350,14 @@ int main(int argc, char *argv[])
|
|||||||
_precSqrt = _precDiv = 0;
|
_precSqrt = _precDiv = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* replace "sm" with "compute" */
|
||||||
|
assert(_arch[0] == 's' && _arch[1] == 'm' && _arch[2] == '_');
|
||||||
|
const std::string _mode = std::string("compute_") + &_arch[3];
|
||||||
|
const int computeArch = atoi(&_arch[3]);
|
||||||
|
|
||||||
|
|
||||||
std::vector<std::string> nvvmOptions;
|
std::vector<std::string> nvvmOptions;
|
||||||
nvvmOptions.push_back("-arch=compute_35");
|
nvvmOptions.push_back("-arch=" + _mode);
|
||||||
nvvmOptions.push_back("-ftz=" + lValueToString(_ftz));
|
nvvmOptions.push_back("-ftz=" + lValueToString(_ftz));
|
||||||
nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt));
|
nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt));
|
||||||
nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv));
|
nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv));
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2014, Evghenii Gaburov
|
Copyright (c) 2014-2015, Evghenii Gaburov
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@@ -43,6 +43,8 @@ met:
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "GPUTargets.h"
|
||||||
|
|
||||||
#define YYERROR_VERBOSE 1
|
#define YYERROR_VERBOSE 1
|
||||||
|
|
||||||
@@ -116,7 +118,13 @@ header:
|
|||||||
version:
|
version:
|
||||||
TOKEN_VERSION TOKEN_FLOAT { assert($2 >= 3.0); } ;//std::cerr << "Reading PTX version " << $2 << std::endl; };
|
TOKEN_VERSION TOKEN_FLOAT { assert($2 >= 3.0); } ;//std::cerr << "Reading PTX version " << $2 << std::endl; };
|
||||||
target:
|
target:
|
||||||
TOKEN_TARGET TOKEN_STRING { assert(std::string($2) == std::string("sm_35")); } //std::cerr << "Target " << $2 << std::endl; };
|
TOKEN_TARGET TOKEN_STRING {
|
||||||
|
if (std::find(GPUTargets::computeMode.begin(), GPUTargets::computeMode.end(), std::string($2)) == GPUTargets::computeMode.end())
|
||||||
|
{
|
||||||
|
fprintf(stderr, "ptxcc fatal : Found wrong Target=\"%s\" in ptx file\n", $2);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
address_size:
|
address_size:
|
||||||
TOKEN_ADDRESS_SIZE TOKEN_INT { assert($2 == 64); } //std::cerr << "Address_Size " << $2 << std::endl; };
|
TOKEN_ADDRESS_SIZE TOKEN_INT { assert($2 == 64); } //std::cerr << "Address_Size " << $2 << std::endl; };
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user