diff --git a/Makefile b/Makefile index 34839f86..182cff26 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ # If you have your own special version of llvm and/or clang, change # these variables to match. -LLVM_CONFIG=$(shell which /home/evghenii/usr/local/llvm/bin-trunk/bin/llvm-config) +LLVM_CONFIG=$(shell which /home/evghenii/usr/local/llvm/bin-3.2/bin/llvm-config) CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir) # Enable ARM by request diff --git a/builtins/target-nvptx64.ll b/builtins/target-nvptx64.ll index 8bceace7..fa719ba9 100644 --- a/builtins/target-nvptx64.ll +++ b/builtins/target-nvptx64.ll @@ -647,15 +647,15 @@ define float @__reduce_add_float(<1 x float> %v) nounwind readonly alwaysinline define float @__reduce_min_float(<1 x float>) nounwind readnone { %value = extractelement <1 x float> %0, i32 0 %call = tail call float @__shfl_xor_float(float %value, i32 16) - %call1 = tail call float @__fminf(float %value, float %call) #4 + %call1 = tail call float @__fminf(float %value, float %call) %call.1 = tail call float @__shfl_xor_float(float %call1, i32 8) - %call1.1 = tail call float @__fminf(float %call1, float %call.1) #4 + %call1.1 = tail call float @__fminf(float %call1, float %call.1) %call.2 = tail call float @__shfl_xor_float(float %call1.1, i32 4) - %call1.2 = tail call float @__fminf(float %call1.1, float %call.2) #4 + %call1.2 = tail call float @__fminf(float %call1.1, float %call.2) %call.3 = tail call float @__shfl_xor_float(float %call1.2, i32 2) - %call1.3 = tail call float @__fminf(float %call1.2, float %call.3) #4 + %call1.3 = tail call float @__fminf(float %call1.2, float %call.3) %call.4 = tail call float @__shfl_xor_float(float %call1.3, i32 1) - %call1.4 = tail call float @__fminf(float %call1.3, float %call.4) #4 + %call1.4 = tail call float @__fminf(float %call1.3, float %call.4) ret float %call1.4 } diff --git a/builtins/util_ptx.m4 b/builtins/util_ptx.m4 index ca61f84e..d60a09dd 100644 --- a/builtins/util_ptx.m4 +++ b/builtins/util_ptx.m4 @@ -2917,7 +2917,7 @@ entry: br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry - %call2 = tail call noalias i8* @malloc(i64 %size) #3 + %call2 = tail call noalias i8* @malloc(i64 %size) %phitmp = ptrtoint i8* %call2 to i64 br label %if.end @@ -2946,7 +2946,7 @@ entry: br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry - tail call void @free(i8* %ptr) #3 + tail call void @free(i8* %ptr) br label %if.end if.end: ; preds = %if.then, %entry diff --git a/examples_cuda/aobench/ao1.ispc b/examples_cuda/aobench/ao1.ispc index 24659535..b01523d0 100644 --- a/examples_cuda/aobench/ao1.ispc +++ b/examples_cuda/aobench/ao1.ispc @@ -298,4 +298,5 @@ export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples, const uniform int ntilex = (w+TILEX-1)/TILEX; const uniform int ntiley = (h+TILEY-1)/TILEY; launch[ntilex,ntiley] ao_task(w, h, nsubsamples, image); + sync; } diff --git a/examples_cuda/deferred/Makefile_gpu b/examples_cuda/deferred/Makefile_gpu index 83b45531..6e7739fa 100644 --- a/examples_cuda/deferred/Makefile_gpu +++ b/examples_cuda/deferred/Makefile_gpu @@ -41,8 +41,9 @@ $(PROG): $(CXX_OBJ) kernel.ptx $(ISPC) $(ISPCFLAGS) --emit-llvm -o `basename $< .ispc`_ispc_nvptx64.bc -h `basename $< .ispc`_ispc.h $< --emit-llvm %.ptx: %.bc - $(LLVM32DIS) $< - $(PTXGEN) `basename $< .bc`.ll > $@ + $(PTXGEN) $< > $@ +# $(LLVM32DIS) $< +# $(PTXGEN) `basename $< .bc`.ll > $@ kernel.ptx: $(PTXSRC) cat $^ > kernel.ptx diff --git a/examples_cuda/options/Makefile_gpu b/examples_cuda/options/Makefile_gpu index 03c63d50..768da3d9 100644 --- a/examples_cuda/options/Makefile_gpu +++ b/examples_cuda/options/Makefile_gpu @@ -42,8 +42,9 @@ $(PROG): $(CXX_OBJ) kernel.ptx $(ISPC) $(ISPCFLAGS) --emit-llvm -o `basename $< .ispc`_ispc_nvptx64.bc -h `basename $< .ispc`_ispc.h $< --emit-llvm %.ptx: %.bc - $(LLVM32DIS) $< - $(PTXGEN) `basename $< .bc`.ll > $@ + $(PTXGEN) $< > $@ +# $(LLVM32DIS) $< +# $(PTXGEN) `basename $< .bc`.ll > $@ kernel.ptx: $(PTXSRC) cat $^ > kernel.ptx