remove dependenace on llvm-dis from 3.2
This commit is contained in:
@@ -124,23 +124,23 @@ define i64* @__cvt_const2gen(i64 addrspace(4)*) nounwind readnone alwaysinline
|
||||
;; i32
|
||||
define internal i32 @__shfl_i32_nvptx(i32, i32) nounwind readnone alwaysinline
|
||||
{
|
||||
%shfl = tail call i32 asm sideeffect "shfl.idx.b32 $0, $1, $2, 0x1f;", "=r,r,r"(i32 %0, i32 %1) nounwind readnone alwaysinline
|
||||
%shfl = tail call i32 asm sideeffect "shfl.idx.b32 $0, $1, $2, 0x1f;", "=r,r,r"(i32 %0, i32 %1)
|
||||
ret i32 %shfl
|
||||
}
|
||||
define internal i32 @__shfl_xor_i32_nvptx(i32, i32) nounwind readnone alwaysinline
|
||||
{
|
||||
%shfl = tail call i32 asm sideeffect "shfl.bfly.b32 $0, $1, $2, 0x1f;", "=r,r,r"(i32 %0, i32 %1) nounwind readnone alwaysinline
|
||||
%shfl = tail call i32 asm sideeffect "shfl.bfly.b32 $0, $1, $2, 0x1f;", "=r,r,r"(i32 %0, i32 %1)
|
||||
ret i32 %shfl
|
||||
}
|
||||
;; float
|
||||
define internal float @__shfl_float_nvptx(float, i32) nounwind readnone alwaysinline
|
||||
{
|
||||
%shfl = tail call float asm sideeffect "shfl.idx.b32 $0, $1, $2, 0x1f;", "=f,f,r"(float %0, i32 %1) nounwind readnone alwaysinline
|
||||
%shfl = tail call float asm sideeffect "shfl.idx.b32 $0, $1, $2, 0x1f;", "=f,f,r"(float %0, i32 %1)
|
||||
ret float %shfl
|
||||
}
|
||||
define internal float @__shfl_xor_float_nvptx(float, i32) nounwind readnone alwaysinline
|
||||
{
|
||||
%shfl = tail call float asm sideeffect "shfl.bfly.b32 $0, $1, $2, 0x1f;", "=f,f,r"(float %0, i32 %1) nounwind readnone alwaysinline
|
||||
%shfl = tail call float asm sideeffect "shfl.bfly.b32 $0, $1, $2, 0x1f;", "=f,f,r"(float %0, i32 %1)
|
||||
ret float %shfl
|
||||
}
|
||||
|
||||
@@ -148,12 +148,12 @@ define internal float @__shfl_xor_float_nvptx(float, i32) nounwind readnone alwa
|
||||
;; float/double
|
||||
define internal float @__fminf_nvptx(float,float) nounwind readnone alwaysinline
|
||||
{
|
||||
%min = tail call float asm sideeffect "min.f32 $0, $1, $2;", "=f,f,f"(float %0, float %1) nounwind readnone alwaysinline
|
||||
%min = tail call float asm sideeffect "min.f32 $0, $1, $2;", "=f,f,f"(float %0, float %1)
|
||||
ret float %min
|
||||
}
|
||||
define internal float @__fmaxf_nvptx(float,float) nounwind readnone alwaysinline
|
||||
{
|
||||
%max = tail call float asm sideeffect "max.f32 $0, $1, $2;", "=f,f,f"(float %0, float %1) nounwind readnone alwaysinline
|
||||
%max = tail call float asm sideeffect "max.f32 $0, $1, $2;", "=f,f,f"(float %0, float %1)
|
||||
ret float %max
|
||||
}
|
||||
|
||||
@@ -245,12 +245,12 @@ define internal i32 @__ballot_nvptx(i1) nounwind readnone alwaysinline
|
||||
"{ .reg .pred %p1;
|
||||
setp.ne.u32 %p1, $1, 0;
|
||||
vote.ballot.b32 $0, %p1;
|
||||
}", "=r,r"(i32 %conv) nounwind readnone alwaysinline
|
||||
}", "=r,r"(i32 %conv)
|
||||
ret i32 %res
|
||||
}
|
||||
define internal i32 @__lanemask_lt_nvptx() nounwind readnone alwaysinline
|
||||
{
|
||||
%mask = tail call i32 asm sideeffect "mov.u32 $0, %lanemask_lt;", "=r"() nounwind readnone alwaysinline
|
||||
%mask = tail call i32 asm sideeffect "mov.u32 $0, %lanemask_lt;", "=r"()
|
||||
ret i32 %mask
|
||||
}
|
||||
|
||||
@@ -418,7 +418,7 @@ define float @__half_to_float_uniform(i16 %v) nounwind readnone alwaysinline
|
||||
"{ .reg .f16 tmp;
|
||||
mov.b16 tmp, $1;
|
||||
cvt.f32.f16 $0, tmp;
|
||||
}", "=f,h"(i16 %v) nounwind readnone alwaysinline
|
||||
}", "=f,h"(i16 %v)
|
||||
ret float %res
|
||||
}
|
||||
define i16 @__float_to_half_uniform(float %v) nounwind readnone alwaysinline
|
||||
@@ -429,7 +429,7 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone alwaysinline
|
||||
"{ .reg .f16 tmp;
|
||||
cvt.rn.f16.f32 tmp, $1;
|
||||
mov.b16 $0, tmp;
|
||||
}", "=h,f"(float %v) nounwind readnone alwaysinline
|
||||
}", "=h,f"(float %v)
|
||||
ret i16 %half
|
||||
}
|
||||
define <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone alwaysinline
|
||||
@@ -473,7 +473,7 @@ define internal float @__round_uniform_float_ptx(float) nounwind readnone always
|
||||
cvt.rzi.f32.f32 f9, f4;
|
||||
BB2_2:
|
||||
mov.f32 $0, f9;
|
||||
}", "=f,f"(float %0) nounwind readnone alwaysinline
|
||||
}", "=f,f"(float %0)
|
||||
ret float %2
|
||||
}
|
||||
define float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||
@@ -490,12 +490,12 @@ define float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||
}
|
||||
define float @__floor_uniform_float(float) nounwind readnone alwaysinline
|
||||
{
|
||||
%2 = tail call float asm sideeffect "cvt.rmi.f32.f32 $0, $1;", "=f,f"(float %0) nounwind alwaysinline readnone
|
||||
%2 = tail call float asm sideeffect "cvt.rmi.f32.f32 $0, $1;", "=f,f"(float %0)
|
||||
ret float %2
|
||||
}
|
||||
define float @__ceil_uniform_float(float) nounwind readnone alwaysinline
|
||||
{
|
||||
%2 = tail call float asm sideeffect "cvt.rpi.f32.f32 $0, $1;", "=f,f"(float %0) nounwind alwaysinline readnone
|
||||
%2 = tail call float asm sideeffect "cvt.rpi.f32.f32 $0, $1;", "=f,f"(float %0)
|
||||
ret float %2
|
||||
}
|
||||
|
||||
@@ -534,17 +534,17 @@ define double @__round_uniform_double(double) nounwind readnone alwaysinline
|
||||
|
||||
BB5_2:
|
||||
mov.f64 $0, fd8;
|
||||
}", "=d,d"(double %0) nounwind readnone alwaysinline
|
||||
}", "=d,d"(double %0)
|
||||
ret double %2
|
||||
}
|
||||
define double @__floor_uniform_double(double) nounwind readnone alwaysinline
|
||||
{
|
||||
%2 = tail call double asm sideeffect "cvt.rmi.f64.f64 $0, $1;", "=f,f"(double %0) nounwind alwaysinline readnone
|
||||
%2 = tail call double asm sideeffect "cvt.rmi.f64.f64 $0, $1;", "=f,f"(double %0)
|
||||
ret double %2
|
||||
}
|
||||
define double @__ceil_uniform_double(double) nounwind readnone alwaysinline
|
||||
{
|
||||
%2 = tail call double asm sideeffect "cvt.rpi.f64.f64 $0, $1;", "=f,f"(double %0) nounwind alwaysinline readnone
|
||||
%2 = tail call double asm sideeffect "cvt.rpi.f64.f64 $0, $1;", "=f,f"(double %0)
|
||||
ret double %2
|
||||
}
|
||||
|
||||
@@ -763,7 +763,7 @@ declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
define i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
||||
%call = call i32 @llvm.ctpop.i32(i32 %0)
|
||||
ret i32 %call
|
||||
;; %res = tail call i32 asm sideeffect "popc.b32 $0, $1;", "=r,r"(i32 %0) nounwind readnone alwaysinline
|
||||
;; %res = tail call i32 asm sideeffect "popc.b32 $0, $1;", "=r,r"(i32 %0)
|
||||
;; ret i32 %res
|
||||
}
|
||||
|
||||
@@ -1117,7 +1117,7 @@ define internal i32 @__shfl_reduce_and_step_i32_nvptx(i32, i32) nounwind readnon
|
||||
shfl.bfly.b32 r0|p, $1, $2, 0;
|
||||
@p and.b32 r0, r0, $3;
|
||||
mov.u32 $0, r0;
|
||||
}", "=r,r,r,r"(i32 %0, i32 %1, i32 %0) nounwind readnone alwaysinline
|
||||
}", "=r,r,r,r"(i32 %0, i32 %1, i32 %0)
|
||||
ret i32 %shfl
|
||||
}
|
||||
shfl64(__shfl_reduce_and_step, i64)
|
||||
@@ -1298,7 +1298,7 @@ define internal i32 @__shfl_scan_add_step_i32(i32 %partial, i32 %up_offset) noun
|
||||
shfl.up.b32 r0|p, $1, $2, 0;
|
||||
@p add.u32 r0, r0, $3;
|
||||
mov.u32 $0, r0;
|
||||
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) nounwind readnone alwaysinline
|
||||
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial)
|
||||
ret i32 %result;
|
||||
}
|
||||
define <1 x i32> @__exclusive_scan_add_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline
|
||||
@@ -1325,7 +1325,7 @@ define internal i32 @__shfl_scan_or_step_i32(i32 %partial, i32 %up_offset) nounw
|
||||
shfl.up.b32 r0|p, $1, $2, 0;
|
||||
@p or.b32 r0, r0, $3;
|
||||
mov.u32 $0, r0;
|
||||
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) nounwind alwaysinline
|
||||
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial)
|
||||
ret i32 %result;
|
||||
}
|
||||
define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline
|
||||
@@ -1341,7 +1341,7 @@ define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone
|
||||
shfl.up.b32 r0|p, $1, 1, 0;
|
||||
@!p mov.u32 r0, 0;
|
||||
mov.u32 $0, r0;
|
||||
}","=r,r"(i32 %v1); alwaysinline nounwind
|
||||
}","=r,r"(i32 %v1)
|
||||
|
||||
%s1 = tail call i32 @__shfl_scan_or_step_i32(i32 %v, i32 1);
|
||||
%s2 = tail call i32 @__shfl_scan_or_step_i32(i32 %s1, i32 2);
|
||||
@@ -1360,7 +1360,7 @@ define internal i32 @__shfl_scan_and_step_i32(i32 %partial, i32 %up_offset) noun
|
||||
shfl.up.b32 r0|p, $1, $2, 0;
|
||||
@p and.b32 r0, r0, $3;
|
||||
mov.u32 $0, r0;
|
||||
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) alwaysinline
|
||||
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial)
|
||||
ret i32 %result;
|
||||
}
|
||||
define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline
|
||||
@@ -1376,7 +1376,7 @@ define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnon
|
||||
shfl.up.b32 r0|p, $1, 1, 0;
|
||||
@!p mov.u32 r0, -1;
|
||||
mov.u32 $0, r0;
|
||||
}","=r,r"(i32 %v1); alwaysinline
|
||||
}","=r,r"(i32 %v1)
|
||||
|
||||
%s1 = call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1);
|
||||
%s2 = call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2);
|
||||
@@ -1395,7 +1395,7 @@ define internal float @__shfl_scan_add_step_float(float %partial, i32 %up_offset
|
||||
shfl.up.b32 f0|p, $1, $2, 0;
|
||||
@p add.f32 f0, f0, $3;
|
||||
mov.f32 $0, f0;
|
||||
}", "=f,f,r,f"(float %partial, i32 %up_offset, float %partial) nounwind readnone alwaysinline
|
||||
}", "=f,f,r,f"(float %partial, i32 %up_offset, float %partial)
|
||||
ret float %result;
|
||||
}
|
||||
define <1 x float> @__exclusive_scan_add_float(<1 x float>, <1 x i1>) nounwind readnone alwaysinline
|
||||
@@ -1427,7 +1427,7 @@ define internal double @__shfl_scan_add_step_double(double %partial, i32 %up_off
|
||||
mov.b64 fd0, {r3,r4};
|
||||
@p add.f64 fd0, fd0, $3;
|
||||
mov.f64 $0, fd0;
|
||||
}", "=d,d,r,d"(double %partial, i32 %up_offset, double %partial) nounwind readnone alwaysinline
|
||||
}", "=d,d,r,d"(double %partial, i32 %up_offset, double %partial)
|
||||
ret double %result;
|
||||
}
|
||||
define <1 x double> @__exclusive_scan_add_double(<1 x double>, <1 x i1>) nounwind readnone alwaysinline
|
||||
@@ -1460,7 +1460,7 @@ define internal i64 @__shfl_scan_add_step_i64(i64 %partial, i32 %up_offset) noun
|
||||
mov.b64 rl0, {r3,r4};
|
||||
@p add.s64 rl0, rl0, $3;
|
||||
mov.s64 $0, rl0;
|
||||
}", "=l,l,r,l"(i64 %partial, i32 %up_offset, i64 %partial) nounwind readnone alwaysinline
|
||||
}", "=l,l,r,l"(i64 %partial, i32 %up_offset, i64 %partial)
|
||||
ret i64 %result;
|
||||
}
|
||||
define <1 x i64> @__exclusive_scan_add_i64(<1 x i64>, <1 x i1>) nounwind readnone alwaysinline
|
||||
|
||||
@@ -21,14 +21,15 @@ ifdef PTXCC_REGMAX
|
||||
endif
|
||||
|
||||
#
|
||||
ISPC=ispc
|
||||
ISPC=$(ISPC_HOME)/ispc
|
||||
ISPC_FLAGS+=-O3 --math-lib=fast --target=nvptx --opt=fast-math
|
||||
#
|
||||
#
|
||||
#
|
||||
ISPC_LLVM_OBJS=$(ISPC_SRC:%.ispc=objs_ptx/%_llvm_ispc.o)
|
||||
ISPC_NVVM_OBJS=$(ISPC_SRC:%.ispc=objs_ptx/%_nvvm_ispc.o)
|
||||
ISPC_BCS=$(ISPC_SRC:%.ispc=objs_ptx/%_ispc.bc)
|
||||
#ISPC_BCS=$(ISPC_SRC:%.ispc=objs_ptx/%_ispc.bc)
|
||||
ISPC_LLS=$(ISPC_SRC:%.ispc=objs_ptx/%_ispc.ll)
|
||||
ISPC_LLVM_PTX=$(ISPC_SRC:%.ispc=objs_ptx/%_llvm_ispc.ptx)
|
||||
ISPC_NVVM_PTX=$(ISPC_SRC:%.ispc=objs_ptx/%_nvvm_ispc.ptx)
|
||||
ISPC_HEADERS=$(ISPC_SRC:%.ispc=objs_ptx/%_ispc.h)
|
||||
@@ -42,13 +43,13 @@ CXX_OBJS+=objs_ptx/ispc_malloc_gcc.o
|
||||
PTXGEN = $(ISPC_HOME)/ptxtools/ptxgen
|
||||
PTXGEN += --use_fast_math
|
||||
|
||||
LLVM32=$(HOME)/usr/local/llvm/bin-3.2
|
||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||
#LLVM32=$(HOME)/usr/local/llvm/bin-3.2
|
||||
#LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||
|
||||
LLC=$(HOME)/usr/local/llvm/bin-trunk/bin/llc
|
||||
LLC=$(LLVM_ROOT)/bin/llc
|
||||
LLC_FLAGS=-march=nvptx64 -mcpu=sm_35
|
||||
|
||||
# .SUFFIXES: .bc .o .cu
|
||||
# .SUFFIXES: .bc .o .cu .ll
|
||||
|
||||
ifdef LLVM_GPU
|
||||
OBJSptx_llvm=$(ISPC_LLVM_OBJS) $(CXX_OBJS) $(NVCC_OBJS)
|
||||
@@ -74,7 +75,7 @@ endif
|
||||
all: dirs \
|
||||
$(PROGptx_nvvm) \
|
||||
$(PROGptx_llvm) \
|
||||
$(PROGcu) $(ISPC_BC) $(ISPC_HEADERS) $(ISPC_NVVM_PTX) $(ISPC_LLVM_PTX)
|
||||
$(PROGcu) $(ISPC_BCS) $(ISPC_LLS) $(ISPC_HEADERS) $(ISPC_NVVM_PTX) $(ISPC_LLVM_PTX)
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs_ptx/
|
||||
@@ -109,15 +110,21 @@ objs_ptx/%_nvcc.o: %.cu
|
||||
$(NVCC) $(NVCC_FLAGS) -o $@ -c $<
|
||||
|
||||
# compile ISPC to LLVM BC
|
||||
objs_ptx/%_ispc.h objs_ptx/%_ispc.bc: %.ispc
|
||||
$(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs_ptx/$*_ispc.h -o objs_ptx/$*_ispc.bc $<
|
||||
#objs_ptx/%_ispc.h objs_ptx/%_ispc.bc: %.ispc
|
||||
# $(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs_ptx/$*_ispc.h -o objs_ptx/$*_ispc.bc $<
|
||||
objs_ptx/%_ispc.h objs_ptx/%_ispc.ll: %.ispc
|
||||
$(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs_ptx/$*_ispc.h -o objs_ptx/$*_ispc.ll $<
|
||||
|
||||
# generate PTX from LLVM BC
|
||||
objs_ptx/%_llvm_ispc.ptx: objs_ptx/%_ispc.bc
|
||||
#objs_ptx/%_llvm_ispc.ptx: objs_ptx/%_ispc.bc
|
||||
# $(LLC) $(LLC_FLAGS) -o $@ $<
|
||||
objs_ptx/%_llvm_ispc.ptx: objs_ptx/%_ispc.ll
|
||||
$(LLC) $(LLC_FLAGS) -o $@ $<
|
||||
objs_ptx/%_nvvm_ispc.ptx: objs_ptx/%_ispc.bc
|
||||
$(LLVM32DIS) $< -o objs_ptx/$*_ispc-ll32.ll
|
||||
$(PTXGEN) objs_ptx/$*_ispc-ll32.ll -o $@
|
||||
#objs_ptx/%_nvvm_ispc.ptx: objs_ptx/%_ispc.bc
|
||||
# $(LLVM32DIS) $< -o objs_ptx/$*_ispc-ll32.ll
|
||||
# $(PTXGEN) objs_ptx/$*_ispc-ll32.ll -o $@
|
||||
objs_ptx/%_nvvm_ispc.ptx: objs_ptx/%_ispc.ll
|
||||
$(PTXGEN) $< -o $@
|
||||
|
||||
# generate an object file from PTX
|
||||
objs_ptx/%_ispc.o: objs_ptx/%_ispc.ptx
|
||||
|
||||
116
module.cpp
116
module.cpp
@@ -58,6 +58,7 @@
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#include <windows.h>
|
||||
#include <io.h>
|
||||
@@ -71,6 +72,7 @@
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/Intrinsics.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
|
||||
#else
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
@@ -78,6 +80,7 @@
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Intrinsics.h>
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
|
||||
#endif
|
||||
#include <llvm/PassManager.h>
|
||||
#include <llvm/PassRegistry.h>
|
||||
@@ -1034,8 +1037,14 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
|
||||
fileType = "assembly";
|
||||
break;
|
||||
case Bitcode:
|
||||
if (strcasecmp(suffix, "bc"))
|
||||
fileType = "LLVM bitcode";
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
if (strcasecmp(suffix, "bc"))
|
||||
fileType = "LLVM bitcode";
|
||||
}
|
||||
else
|
||||
if (strcasecmp(suffix, "ll"))
|
||||
fileType = "LLVM assembly";
|
||||
break;
|
||||
case Object:
|
||||
if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj"))
|
||||
@@ -1104,6 +1113,73 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
|
||||
return writeObjectFileOrAssembly(outputType, outFileName);
|
||||
}
|
||||
|
||||
typedef std::vector<std::string> vecString_t;
|
||||
static vecString_t
|
||||
lSplitString(const std::string &s)
|
||||
{
|
||||
std::stringstream ss(s);
|
||||
std::istream_iterator<std::string> begin(ss);
|
||||
std::istream_iterator<std::string> end;
|
||||
return vecString_t(begin,end);
|
||||
}
|
||||
|
||||
static void
|
||||
lFixAttributes(const vecString_t &src, vecString_t &dst)
|
||||
{
|
||||
dst.clear();
|
||||
|
||||
std::vector< std::pair<int,int> > attributePos;
|
||||
|
||||
typedef std::map<std::string, std::string> attributeMap_t;
|
||||
attributeMap_t attributeMap;
|
||||
|
||||
for (vecString_t::const_iterator it = src.begin(); it != src.end(); it++)
|
||||
{
|
||||
const vecString_t words = lSplitString(*it);
|
||||
if (!words.empty() && words[0] == "attributes" && words[1][0] == '#')
|
||||
{
|
||||
const int nWords = words.size();
|
||||
assert(nWords > 3);
|
||||
assert(words[2 ] == "=");
|
||||
assert(words[3 ] == "{");
|
||||
assert(words[nWords-1] == "}");
|
||||
std::string attributes;
|
||||
for (int w = 4; w < nWords-1; w++)
|
||||
attributes += words[w] + " ";
|
||||
attributeMap[words[1]] = attributes;
|
||||
}
|
||||
}
|
||||
for (vecString_t::const_iterator it = src.begin(); it != src.end(); it++)
|
||||
{
|
||||
vecString_t words = lSplitString(*it);
|
||||
if (!words.empty() && words[0] == "attributes")
|
||||
continue;
|
||||
std::string s;
|
||||
std::map<std::string, std::string> attributeSet;
|
||||
#if 1 /* this attributed cannot be used in function parametrers, so remove them */
|
||||
attributeSet["readnone"] = " ";
|
||||
attributeSet["readonly"] = " ";
|
||||
attributeSet["readnone,"] = ",";
|
||||
attributeSet["readonly,"] = ",";
|
||||
#endif
|
||||
|
||||
|
||||
for (vecString_t::iterator w = words.begin(); w != words.end(); w++)
|
||||
{
|
||||
if (attributeSet.find(*w) != attributeSet.end())
|
||||
*w = attributeSet[*w];
|
||||
|
||||
if ((*w)[0] == '#')
|
||||
{
|
||||
attributeMap_t::iterator m = attributeMap.find(*w);
|
||||
assert (m != attributeMap.end());
|
||||
*w = attributeMap[*w];
|
||||
}
|
||||
s += *w + " ";
|
||||
}
|
||||
dst.push_back(s);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
||||
@@ -1128,12 +1204,44 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
||||
}
|
||||
|
||||
llvm::raw_fd_ostream fos(fd, (fd != 1), false);
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
llvm::WriteBitcodeToFile(module, fos);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* when using "nvptx" target, emit patched/hacked assembly
|
||||
* NVPTX only accepts 3.2-style LLVM assembly, where attributes
|
||||
* must be inlined, rather then referenced by #attribute_d
|
||||
* As soon as NVVM support 3.3,3.4 style assembly this fix won't be needed
|
||||
*/
|
||||
const std::string dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
|
||||
module->setDataLayout(dl_string);
|
||||
|
||||
std::string s;
|
||||
llvm::raw_string_ostream out(s);
|
||||
llvm::OwningPtr<llvm::AssemblyAnnotationWriter> Annotator;
|
||||
module->print(out, Annotator.get());
|
||||
std::istringstream iss(s);
|
||||
|
||||
vecString_t input,output;
|
||||
while (std::getline(iss,s))
|
||||
input.push_back(s);
|
||||
output = input;
|
||||
|
||||
#if !(defined(LLVM_3_1) || defined(LLVM_3_2))
|
||||
/* do not fix attributed with LLVM 3.2, everything is fine there */
|
||||
lFixAttributes(input,output);
|
||||
#endif
|
||||
|
||||
for (vecString_t::iterator it = output.begin(); it != output.end(); it++)
|
||||
{
|
||||
*it += "\n";
|
||||
fos << *it;
|
||||
}
|
||||
}
|
||||
llvm::WriteBitcodeToFile(module, fos);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -7,9 +7,12 @@ TMPDIR=/tmp
|
||||
fbname=`basename $1`
|
||||
if [ "$NVVM" == "1" ];
|
||||
then
|
||||
LLVM32=$HOME/usr/local/llvm/bin-3.2
|
||||
LLVMDIS=$LLVM32/bin/llvm-dis
|
||||
$($LLVMDIS $1 -o $TMPDIR/$fbname.ll) && $($PTXGEN $TMPDIR/$fbname.ll -o $TMPDIR/$fbname.ptx) && \
|
||||
# LLVM32=$HOME/usr/local/llvm/bin-3.2
|
||||
# LLVM34=$HOME/usr/local/llvm/bin-3.4
|
||||
# LLVMAS=$LLVM34/bin/llvm-as
|
||||
# LLVMDIS=$LLVM32/bin/llvm-dis
|
||||
# $($LLVMAS $1 -o $TMPDIR/$fbname.bc) && $($LLVMDIS $TMPDIR/$fbname.bc -o $TMPDIR/$fbname.ll) && $($PTXGEN $TMPDIR/$fbname.ll -o $TMPDIR/$fbname.ptx) && \
|
||||
$($PTXGEN $1 -o $TMPDIR/$fbname.ptx) && \
|
||||
$($PTXCC $TMPDIR/$fbname.ptx -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
|
||||
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS);
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user