remove dependenace on llvm-dis from 3.2

This commit is contained in:
evghenii
2014-07-08 15:11:13 +02:00
parent fe150c539f
commit 2dbb4d9890
4 changed files with 164 additions and 46 deletions

View File

@@ -124,23 +124,23 @@ define i64* @__cvt_const2gen(i64 addrspace(4)*) nounwind readnone alwaysinline
;; i32
define internal i32 @__shfl_i32_nvptx(i32, i32) nounwind readnone alwaysinline
{
%shfl = tail call i32 asm sideeffect "shfl.idx.b32 $0, $1, $2, 0x1f;", "=r,r,r"(i32 %0, i32 %1) nounwind readnone alwaysinline
%shfl = tail call i32 asm sideeffect "shfl.idx.b32 $0, $1, $2, 0x1f;", "=r,r,r"(i32 %0, i32 %1)
ret i32 %shfl
}
define internal i32 @__shfl_xor_i32_nvptx(i32, i32) nounwind readnone alwaysinline
{
%shfl = tail call i32 asm sideeffect "shfl.bfly.b32 $0, $1, $2, 0x1f;", "=r,r,r"(i32 %0, i32 %1) nounwind readnone alwaysinline
%shfl = tail call i32 asm sideeffect "shfl.bfly.b32 $0, $1, $2, 0x1f;", "=r,r,r"(i32 %0, i32 %1)
ret i32 %shfl
}
;; float
define internal float @__shfl_float_nvptx(float, i32) nounwind readnone alwaysinline
{
%shfl = tail call float asm sideeffect "shfl.idx.b32 $0, $1, $2, 0x1f;", "=f,f,r"(float %0, i32 %1) nounwind readnone alwaysinline
%shfl = tail call float asm sideeffect "shfl.idx.b32 $0, $1, $2, 0x1f;", "=f,f,r"(float %0, i32 %1)
ret float %shfl
}
define internal float @__shfl_xor_float_nvptx(float, i32) nounwind readnone alwaysinline
{
%shfl = tail call float asm sideeffect "shfl.bfly.b32 $0, $1, $2, 0x1f;", "=f,f,r"(float %0, i32 %1) nounwind readnone alwaysinline
%shfl = tail call float asm sideeffect "shfl.bfly.b32 $0, $1, $2, 0x1f;", "=f,f,r"(float %0, i32 %1)
ret float %shfl
}
@@ -148,12 +148,12 @@ define internal float @__shfl_xor_float_nvptx(float, i32) nounwind readnone alwa
;; float/double
define internal float @__fminf_nvptx(float,float) nounwind readnone alwaysinline
{
%min = tail call float asm sideeffect "min.f32 $0, $1, $2;", "=f,f,f"(float %0, float %1) nounwind readnone alwaysinline
%min = tail call float asm sideeffect "min.f32 $0, $1, $2;", "=f,f,f"(float %0, float %1)
ret float %min
}
define internal float @__fmaxf_nvptx(float,float) nounwind readnone alwaysinline
{
%max = tail call float asm sideeffect "max.f32 $0, $1, $2;", "=f,f,f"(float %0, float %1) nounwind readnone alwaysinline
%max = tail call float asm sideeffect "max.f32 $0, $1, $2;", "=f,f,f"(float %0, float %1)
ret float %max
}
@@ -245,12 +245,12 @@ define internal i32 @__ballot_nvptx(i1) nounwind readnone alwaysinline
"{ .reg .pred %p1;
setp.ne.u32 %p1, $1, 0;
vote.ballot.b32 $0, %p1;
}", "=r,r"(i32 %conv) nounwind readnone alwaysinline
}", "=r,r"(i32 %conv)
ret i32 %res
}
define internal i32 @__lanemask_lt_nvptx() nounwind readnone alwaysinline
{
%mask = tail call i32 asm sideeffect "mov.u32 $0, %lanemask_lt;", "=r"() nounwind readnone alwaysinline
%mask = tail call i32 asm sideeffect "mov.u32 $0, %lanemask_lt;", "=r"()
ret i32 %mask
}
@@ -418,7 +418,7 @@ define float @__half_to_float_uniform(i16 %v) nounwind readnone alwaysinline
"{ .reg .f16 tmp;
mov.b16 tmp, $1;
cvt.f32.f16 $0, tmp;
}", "=f,h"(i16 %v) nounwind readnone alwaysinline
}", "=f,h"(i16 %v)
ret float %res
}
define i16 @__float_to_half_uniform(float %v) nounwind readnone alwaysinline
@@ -429,7 +429,7 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone alwaysinline
"{ .reg .f16 tmp;
cvt.rn.f16.f32 tmp, $1;
mov.b16 $0, tmp;
}", "=h,f"(float %v) nounwind readnone alwaysinline
}", "=h,f"(float %v)
ret i16 %half
}
define <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone alwaysinline
@@ -473,7 +473,7 @@ define internal float @__round_uniform_float_ptx(float) nounwind readnone always
cvt.rzi.f32.f32 f9, f4;
BB2_2:
mov.f32 $0, f9;
}", "=f,f"(float %0) nounwind readnone alwaysinline
}", "=f,f"(float %0)
ret float %2
}
define float @__round_uniform_float(float) nounwind readonly alwaysinline {
@@ -490,12 +490,12 @@ define float @__round_uniform_float(float) nounwind readonly alwaysinline {
}
define float @__floor_uniform_float(float) nounwind readnone alwaysinline
{
%2 = tail call float asm sideeffect "cvt.rmi.f32.f32 $0, $1;", "=f,f"(float %0) nounwind alwaysinline readnone
%2 = tail call float asm sideeffect "cvt.rmi.f32.f32 $0, $1;", "=f,f"(float %0)
ret float %2
}
define float @__ceil_uniform_float(float) nounwind readnone alwaysinline
{
%2 = tail call float asm sideeffect "cvt.rpi.f32.f32 $0, $1;", "=f,f"(float %0) nounwind alwaysinline readnone
%2 = tail call float asm sideeffect "cvt.rpi.f32.f32 $0, $1;", "=f,f"(float %0)
ret float %2
}
@@ -534,17 +534,17 @@ define double @__round_uniform_double(double) nounwind readnone alwaysinline
BB5_2:
mov.f64 $0, fd8;
}", "=d,d"(double %0) nounwind readnone alwaysinline
}", "=d,d"(double %0)
ret double %2
}
define double @__floor_uniform_double(double) nounwind readnone alwaysinline
{
%2 = tail call double asm sideeffect "cvt.rmi.f64.f64 $0, $1;", "=f,f"(double %0) nounwind alwaysinline readnone
%2 = tail call double asm sideeffect "cvt.rmi.f64.f64 $0, $1;", "=f,f"(double %0)
ret double %2
}
define double @__ceil_uniform_double(double) nounwind readnone alwaysinline
{
%2 = tail call double asm sideeffect "cvt.rpi.f64.f64 $0, $1;", "=f,f"(double %0) nounwind alwaysinline readnone
%2 = tail call double asm sideeffect "cvt.rpi.f64.f64 $0, $1;", "=f,f"(double %0)
ret double %2
}
@@ -763,7 +763,7 @@ declare i32 @llvm.ctpop.i32(i32) nounwind readnone
define i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
%call = call i32 @llvm.ctpop.i32(i32 %0)
ret i32 %call
;; %res = tail call i32 asm sideeffect "popc.b32 $0, $1;", "=r,r"(i32 %0) nounwind readnone alwaysinline
;; %res = tail call i32 asm sideeffect "popc.b32 $0, $1;", "=r,r"(i32 %0)
;; ret i32 %res
}
@@ -1117,7 +1117,7 @@ define internal i32 @__shfl_reduce_and_step_i32_nvptx(i32, i32) nounwind readnon
shfl.bfly.b32 r0|p, $1, $2, 0;
@p and.b32 r0, r0, $3;
mov.u32 $0, r0;
}", "=r,r,r,r"(i32 %0, i32 %1, i32 %0) nounwind readnone alwaysinline
}", "=r,r,r,r"(i32 %0, i32 %1, i32 %0)
ret i32 %shfl
}
shfl64(__shfl_reduce_and_step, i64)
@@ -1298,7 +1298,7 @@ define internal i32 @__shfl_scan_add_step_i32(i32 %partial, i32 %up_offset) noun
shfl.up.b32 r0|p, $1, $2, 0;
@p add.u32 r0, r0, $3;
mov.u32 $0, r0;
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) nounwind readnone alwaysinline
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial)
ret i32 %result;
}
define <1 x i32> @__exclusive_scan_add_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline
@@ -1325,7 +1325,7 @@ define internal i32 @__shfl_scan_or_step_i32(i32 %partial, i32 %up_offset) nounw
shfl.up.b32 r0|p, $1, $2, 0;
@p or.b32 r0, r0, $3;
mov.u32 $0, r0;
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) nounwind alwaysinline
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial)
ret i32 %result;
}
define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline
@@ -1341,7 +1341,7 @@ define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone
shfl.up.b32 r0|p, $1, 1, 0;
@!p mov.u32 r0, 0;
mov.u32 $0, r0;
}","=r,r"(i32 %v1); alwaysinline nounwind
}","=r,r"(i32 %v1)
%s1 = tail call i32 @__shfl_scan_or_step_i32(i32 %v, i32 1);
%s2 = tail call i32 @__shfl_scan_or_step_i32(i32 %s1, i32 2);
@@ -1360,7 +1360,7 @@ define internal i32 @__shfl_scan_and_step_i32(i32 %partial, i32 %up_offset) noun
shfl.up.b32 r0|p, $1, $2, 0;
@p and.b32 r0, r0, $3;
mov.u32 $0, r0;
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) alwaysinline
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial)
ret i32 %result;
}
define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline
@@ -1376,7 +1376,7 @@ define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnon
shfl.up.b32 r0|p, $1, 1, 0;
@!p mov.u32 r0, -1;
mov.u32 $0, r0;
}","=r,r"(i32 %v1); alwaysinline
}","=r,r"(i32 %v1)
%s1 = call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1);
%s2 = call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2);
@@ -1395,7 +1395,7 @@ define internal float @__shfl_scan_add_step_float(float %partial, i32 %up_offset
shfl.up.b32 f0|p, $1, $2, 0;
@p add.f32 f0, f0, $3;
mov.f32 $0, f0;
}", "=f,f,r,f"(float %partial, i32 %up_offset, float %partial) nounwind readnone alwaysinline
}", "=f,f,r,f"(float %partial, i32 %up_offset, float %partial)
ret float %result;
}
define <1 x float> @__exclusive_scan_add_float(<1 x float>, <1 x i1>) nounwind readnone alwaysinline
@@ -1427,7 +1427,7 @@ define internal double @__shfl_scan_add_step_double(double %partial, i32 %up_off
mov.b64 fd0, {r3,r4};
@p add.f64 fd0, fd0, $3;
mov.f64 $0, fd0;
}", "=d,d,r,d"(double %partial, i32 %up_offset, double %partial) nounwind readnone alwaysinline
}", "=d,d,r,d"(double %partial, i32 %up_offset, double %partial)
ret double %result;
}
define <1 x double> @__exclusive_scan_add_double(<1 x double>, <1 x i1>) nounwind readnone alwaysinline
@@ -1460,7 +1460,7 @@ define internal i64 @__shfl_scan_add_step_i64(i64 %partial, i32 %up_offset) noun
mov.b64 rl0, {r3,r4};
@p add.s64 rl0, rl0, $3;
mov.s64 $0, rl0;
}", "=l,l,r,l"(i64 %partial, i32 %up_offset, i64 %partial) nounwind readnone alwaysinline
}", "=l,l,r,l"(i64 %partial, i32 %up_offset, i64 %partial)
ret i64 %result;
}
define <1 x i64> @__exclusive_scan_add_i64(<1 x i64>, <1 x i1>) nounwind readnone alwaysinline

View File

@@ -21,14 +21,15 @@ ifdef PTXCC_REGMAX
endif
#
ISPC=ispc
ISPC=$(ISPC_HOME)/ispc
ISPC_FLAGS+=-O3 --math-lib=fast --target=nvptx --opt=fast-math
#
#
#
ISPC_LLVM_OBJS=$(ISPC_SRC:%.ispc=objs_ptx/%_llvm_ispc.o)
ISPC_NVVM_OBJS=$(ISPC_SRC:%.ispc=objs_ptx/%_nvvm_ispc.o)
ISPC_BCS=$(ISPC_SRC:%.ispc=objs_ptx/%_ispc.bc)
#ISPC_BCS=$(ISPC_SRC:%.ispc=objs_ptx/%_ispc.bc)
ISPC_LLS=$(ISPC_SRC:%.ispc=objs_ptx/%_ispc.ll)
ISPC_LLVM_PTX=$(ISPC_SRC:%.ispc=objs_ptx/%_llvm_ispc.ptx)
ISPC_NVVM_PTX=$(ISPC_SRC:%.ispc=objs_ptx/%_nvvm_ispc.ptx)
ISPC_HEADERS=$(ISPC_SRC:%.ispc=objs_ptx/%_ispc.h)
@@ -42,13 +43,13 @@ CXX_OBJS+=objs_ptx/ispc_malloc_gcc.o
PTXGEN = $(ISPC_HOME)/ptxtools/ptxgen
PTXGEN += --use_fast_math
LLVM32=$(HOME)/usr/local/llvm/bin-3.2
LLVM32DIS=$(LLVM32)/bin/llvm-dis
#LLVM32=$(HOME)/usr/local/llvm/bin-3.2
#LLVM32DIS=$(LLVM32)/bin/llvm-dis
LLC=$(HOME)/usr/local/llvm/bin-trunk/bin/llc
LLC=$(LLVM_ROOT)/bin/llc
LLC_FLAGS=-march=nvptx64 -mcpu=sm_35
# .SUFFIXES: .bc .o .cu
# .SUFFIXES: .bc .o .cu .ll
ifdef LLVM_GPU
OBJSptx_llvm=$(ISPC_LLVM_OBJS) $(CXX_OBJS) $(NVCC_OBJS)
@@ -74,7 +75,7 @@ endif
all: dirs \
$(PROGptx_nvvm) \
$(PROGptx_llvm) \
$(PROGcu) $(ISPC_BC) $(ISPC_HEADERS) $(ISPC_NVVM_PTX) $(ISPC_LLVM_PTX)
$(PROGcu) $(ISPC_BCS) $(ISPC_LLS) $(ISPC_HEADERS) $(ISPC_NVVM_PTX) $(ISPC_LLVM_PTX)
dirs:
/bin/mkdir -p objs_ptx/
@@ -109,15 +110,21 @@ objs_ptx/%_nvcc.o: %.cu
$(NVCC) $(NVCC_FLAGS) -o $@ -c $<
# compile ISPC to LLVM BC
objs_ptx/%_ispc.h objs_ptx/%_ispc.bc: %.ispc
$(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs_ptx/$*_ispc.h -o objs_ptx/$*_ispc.bc $<
#objs_ptx/%_ispc.h objs_ptx/%_ispc.bc: %.ispc
# $(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs_ptx/$*_ispc.h -o objs_ptx/$*_ispc.bc $<
objs_ptx/%_ispc.h objs_ptx/%_ispc.ll: %.ispc
$(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs_ptx/$*_ispc.h -o objs_ptx/$*_ispc.ll $<
# generate PTX from LLVM BC
objs_ptx/%_llvm_ispc.ptx: objs_ptx/%_ispc.bc
#objs_ptx/%_llvm_ispc.ptx: objs_ptx/%_ispc.bc
# $(LLC) $(LLC_FLAGS) -o $@ $<
objs_ptx/%_llvm_ispc.ptx: objs_ptx/%_ispc.ll
$(LLC) $(LLC_FLAGS) -o $@ $<
objs_ptx/%_nvvm_ispc.ptx: objs_ptx/%_ispc.bc
$(LLVM32DIS) $< -o objs_ptx/$*_ispc-ll32.ll
$(PTXGEN) objs_ptx/$*_ispc-ll32.ll -o $@
#objs_ptx/%_nvvm_ispc.ptx: objs_ptx/%_ispc.bc
# $(LLVM32DIS) $< -o objs_ptx/$*_ispc-ll32.ll
# $(PTXGEN) objs_ptx/$*_ispc-ll32.ll -o $@
objs_ptx/%_nvvm_ispc.ptx: objs_ptx/%_ispc.ll
$(PTXGEN) $< -o $@
# generate an object file from PTX
objs_ptx/%_ispc.o: objs_ptx/%_ispc.ptx

View File

@@ -58,6 +58,7 @@
#include <set>
#include <sstream>
#include <iostream>
#include <map>
#ifdef ISPC_IS_WINDOWS
#include <windows.h>
#include <io.h>
@@ -71,6 +72,7 @@
#include <llvm/Instructions.h>
#include <llvm/Intrinsics.h>
#include <llvm/DerivedTypes.h>
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
#else
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
@@ -78,6 +80,7 @@
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Intrinsics.h>
#include <llvm/IR/DerivedTypes.h>
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
#endif
#include <llvm/PassManager.h>
#include <llvm/PassRegistry.h>
@@ -1034,8 +1037,14 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
fileType = "assembly";
break;
case Bitcode:
if (strcasecmp(suffix, "bc"))
fileType = "LLVM bitcode";
if (g->target->getISA() != Target::NVPTX)
{
if (strcasecmp(suffix, "bc"))
fileType = "LLVM bitcode";
}
else
if (strcasecmp(suffix, "ll"))
fileType = "LLVM assembly";
break;
case Object:
if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj"))
@@ -1104,6 +1113,73 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
return writeObjectFileOrAssembly(outputType, outFileName);
}
typedef std::vector<std::string> vecString_t;
static vecString_t
lSplitString(const std::string &s)
{
std::stringstream ss(s);
std::istream_iterator<std::string> begin(ss);
std::istream_iterator<std::string> end;
return vecString_t(begin,end);
}
static void
lFixAttributes(const vecString_t &src, vecString_t &dst)
{
dst.clear();
std::vector< std::pair<int,int> > attributePos;
typedef std::map<std::string, std::string> attributeMap_t;
attributeMap_t attributeMap;
for (vecString_t::const_iterator it = src.begin(); it != src.end(); it++)
{
const vecString_t words = lSplitString(*it);
if (!words.empty() && words[0] == "attributes" && words[1][0] == '#')
{
const int nWords = words.size();
assert(nWords > 3);
assert(words[2 ] == "=");
assert(words[3 ] == "{");
assert(words[nWords-1] == "}");
std::string attributes;
for (int w = 4; w < nWords-1; w++)
attributes += words[w] + " ";
attributeMap[words[1]] = attributes;
}
}
for (vecString_t::const_iterator it = src.begin(); it != src.end(); it++)
{
vecString_t words = lSplitString(*it);
if (!words.empty() && words[0] == "attributes")
continue;
std::string s;
std::map<std::string, std::string> attributeSet;
#if 1 /* this attributed cannot be used in function parametrers, so remove them */
attributeSet["readnone"] = " ";
attributeSet["readonly"] = " ";
attributeSet["readnone,"] = ",";
attributeSet["readonly,"] = ",";
#endif
for (vecString_t::iterator w = words.begin(); w != words.end(); w++)
{
if (attributeSet.find(*w) != attributeSet.end())
*w = attributeSet[*w];
if ((*w)[0] == '#')
{
attributeMap_t::iterator m = attributeMap.find(*w);
assert (m != attributeMap.end());
*w = attributeMap[*w];
}
s += *w + " ";
}
dst.push_back(s);
}
}
bool
Module::writeBitcode(llvm::Module *module, const char *outFileName) {
@@ -1128,12 +1204,44 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
}
llvm::raw_fd_ostream fos(fd, (fd != 1), false);
if (g->target->getISA() == Target::NVPTX)
if (g->target->getISA() != Target::NVPTX)
{
llvm::WriteBitcodeToFile(module, fos);
}
else
{
/* when using "nvptx" target, emit patched/hacked assembly
* NVPTX only accepts 3.2-style LLVM assembly, where attributes
* must be inlined, rather then referenced by #attribute_d
* As soon as NVVM support 3.3,3.4 style assembly this fix won't be needed
*/
const std::string dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
module->setDataLayout(dl_string);
std::string s;
llvm::raw_string_ostream out(s);
llvm::OwningPtr<llvm::AssemblyAnnotationWriter> Annotator;
module->print(out, Annotator.get());
std::istringstream iss(s);
vecString_t input,output;
while (std::getline(iss,s))
input.push_back(s);
output = input;
#if !(defined(LLVM_3_1) || defined(LLVM_3_2))
/* do not fix attributed with LLVM 3.2, everything is fine there */
lFixAttributes(input,output);
#endif
for (vecString_t::iterator it = output.begin(); it != output.end(); it++)
{
*it += "\n";
fos << *it;
}
}
llvm::WriteBitcodeToFile(module, fos);
return true;
}

View File

@@ -7,9 +7,12 @@ TMPDIR=/tmp
fbname=`basename $1`
if [ "$NVVM" == "1" ];
then
LLVM32=$HOME/usr/local/llvm/bin-3.2
LLVMDIS=$LLVM32/bin/llvm-dis
$($LLVMDIS $1 -o $TMPDIR/$fbname.ll) && $($PTXGEN $TMPDIR/$fbname.ll -o $TMPDIR/$fbname.ptx) && \
# LLVM32=$HOME/usr/local/llvm/bin-3.2
# LLVM34=$HOME/usr/local/llvm/bin-3.4
# LLVMAS=$LLVM34/bin/llvm-as
# LLVMDIS=$LLVM32/bin/llvm-dis
# $($LLVMAS $1 -o $TMPDIR/$fbname.bc) && $($LLVMDIS $TMPDIR/$fbname.bc -o $TMPDIR/$fbname.ll) && $($PTXGEN $TMPDIR/$fbname.ll -o $TMPDIR/$fbname.ptx) && \
$($PTXGEN $1 -o $TMPDIR/$fbname.ptx) && \
$($PTXCC $TMPDIR/$fbname.ptx -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS);
else