diff --git a/Makefile b/Makefile index 34055496..72eb98b3 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,35 @@ +# +# Copyright (c) 2010-2013, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # # ispc Makefile # @@ -182,7 +214,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc objs/builtins-%.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll) @echo Creating C++ source from builtins definition file $< - @m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) $< | python bitcode2cpp.py $< > $@ + @m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX $< | python bitcode2cpp.py $< > $@ objs/builtins-c-32.cpp: builtins/builtins.c @echo Creating C++ source from builtins definition file $< diff --git a/builtins.cpp b/builtins.cpp index 53cab1f6..b8b5ac59 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -406,8 +406,10 @@ lSetInternalFunctions(llvm::Module *module) { "__count_trailing_zeros_i64", "__count_leading_zeros_i32", "__count_leading_zeros_i64", - "__delete_uniform", - "__delete_varying", + "__delete_uniform_32rt", + "__delete_uniform_64rt", + "__delete_varying_32rt", + "__delete_varying_64rt", "__do_assert_uniform", "__do_assert_varying", "__do_print", @@ -477,9 +479,11 @@ lSetInternalFunctions(llvm::Module *module) { "__min_varying_uint32", "__min_varying_uint64", "__movmsk", - "__new_uniform", - "__new_varying32", - "__new_varying64", + "__new_uniform_32rt", + "__new_uniform_64rt", + "__new_varying32_32rt", + "__new_varying32_64rt", + "__new_varying64_64rt", "__none", "__num_cores", "__packed_load_active", diff --git a/builtins/util.m4 b/builtins/util.m4 index 765b5587..36ebdd2a 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2013, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -2536,15 +2536,117 @@ ok: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; new/delete -declare i8 * @malloc(i64) +;; Set of functions for 32 bit runtime. +;; They are different for Windows and Unix (Linux/MacOS), +;; on Windows we have to use _aligned_malloc/_aligned_free, +;; while on Unix we use posix_memalign/free +;; +;; Note that this should be really two different libraries for 32 and 64 +;; environment and it should happen sooner or later + +ifelse(BUILD_OS, `UNIX', +` + +declare i32 @posix_memalign(i8**, i32, i32) + +define noalias i8 * @__new_uniform_32rt(i64 %size) { + %ptr = alloca i8* + %conv = trunc i64 %size to i32 + %call1 = call i32 @posix_memalign(i8** %ptr, i32 16, i32 %conv) + %ptr_val = load i8** %ptr + ret i8* %ptr_val +} + +define @__new_varying32_32rt( %size, %mask) { + %ret = alloca + store zeroinitializer, * %ret + %ret64 = bitcast * %ret to i64 * + + per_lane(WIDTH, %mask, ` + %sz_LANE_ID = extractelement %size, i32 LANE + %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE + %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** + %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i32 16, i32 %sz_LANE_ID)') + + %r = load * %ret + ret %r +} + +define void @__delete_uniform_32rt(i8 * %ptr) { + call void @free(i8 * %ptr) + ret void +} + +define void @__delete_varying_32rt( %ptr, %mask) { + per_lane(WIDTH, %mask, ` + %iptr_LANE_ID = extractelement %ptr, i32 LANE + %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to i8 * + call void @free(i8 * %ptr_LANE_ID) + ') + ret void +} + +', +BUILD_OS, `WINDOWS', +` +declare i8* @_aligned_malloc(i32, i32) +declare void @_aligned_free(i8 *) + +define noalias i8 * @__new_uniform_32rt(i64 %size) { + %conv = trunc i64 %size to i32 + %ptr = tail call i8* @_aligned_malloc(i32 %conv, i32 16) + ret i8* %ptr +} + +define @__new_varying32_32rt( %size, %mask) { + %ret = alloca + store zeroinitializer, * %ret + %ret64 = bitcast * %ret to i64 * + + per_lane(WIDTH, %mask, ` + %sz_LANE_ID = extractelement %size, i32 LANE + %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i32 %sz_LANE_ID, i32 16) + %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 + %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE + store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') + + %r = load * %ret + ret %r +} + +define void @__delete_uniform_32rt(i8 * %ptr) { + call void @_aligned_free(i8 * %ptr) + ret void +} + +define void @__delete_varying_32rt( %ptr, %mask) { + per_lane(WIDTH, %mask, ` + %iptr_LANE_ID = extractelement %ptr, i32 LANE + %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to i8 * + call void @_aligned_free(i8 * %ptr_LANE_ID) + ') + ret void +} + +', +` +errprint(`BUILD_OS should be defined to either UNIX or WINDOWS +') +m4exit(`1') +') + +;; Set of functions for 64 bit runtime +;; We use the same standard malloc/free pair on all platforms (Windows/Linux/MacOS). + +declare noalias i8 * @malloc(i64) declare void @free(i8 *) -define i8 * @__new_uniform(i64 %size) { - %a = call i8 * @malloc(i64 %size) +define noalias i8 * @__new_uniform_64rt(i64 %size) { + %a = call noalias i8 * @malloc(i64 %size) ret i8 * %a } -define @__new_varying32( %size, %mask) { +define @__new_varying32_64rt( %size, %mask) { %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * @@ -2552,7 +2654,7 @@ define @__new_varying32( %size, %mask) per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE %sz64_LANE_ID = zext i32 %sz_LANE_ID to i64 - %ptr_LANE_ID = call i8 * @malloc(i64 %sz64_LANE_ID) + %ptr_LANE_ID = call noalias i8 * @malloc(i64 %sz64_LANE_ID) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') @@ -2561,14 +2663,14 @@ define @__new_varying32( %size, %mask) ret %r } -define @__new_varying64( %size, %mask) { +define @__new_varying64_64rt( %size, %mask) { %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE - %ptr_LANE_ID = call i8 * @malloc(i64 %sz_LANE_ID) + %ptr_LANE_ID = call noalias i8 * @malloc(i64 %sz_LANE_ID) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') @@ -2577,12 +2679,12 @@ define @__new_varying64( %size, %mask) ret %r } -define void @__delete_uniform(i8 * %ptr) { +define void @__delete_uniform_64rt(i8 * %ptr) { call void @free(i8 * %ptr) ret void } -define void @__delete_varying( %ptr, %mask) { +define void @__delete_varying_64rt( %ptr, %mask) { per_lane(WIDTH, %mask, ` %iptr_LANE_ID = extractelement %ptr, i32 LANE %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to i8 * diff --git a/ctx.cpp b/ctx.cpp index 11e25492..4d207ee2 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -3285,6 +3285,20 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, // pointer, so just emit the IR directly. llvm::Instruction *ci = llvm::CallInst::Create(func, argVals, name ? name : "", bblock); + + // Copy noalias attribute to call instruction, to enable better + // alias analysis. + // TODO: what other attributes needs to be copied? + // TODO: do the same for varing path. +#if defined (LLVM_3_3) + llvm::CallInst *cc = llvm::dyn_cast(ci); + if (cc && + cc->getCalledFunction() && + cc->getCalledFunction()->doesNotAlias(0)) { + cc->addAttribute(0, llvm::Attribute::NoAlias); + } +#endif + AddDebugPos(ci); return ci; } diff --git a/expr.cpp b/expr.cpp index 7808d2af..27f45299 100644 --- a/expr.cpp +++ b/expr.cpp @@ -8214,16 +8214,24 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const { // varying, and taking 32-bit or 64-bit allocation counts. llvm::Function *func; if (isVarying) { - if (do32Bit) - func = m->module->getFunction("__new_varying32"); - else - func = m->module->getFunction("__new_varying64"); + if (g->target->is32Bit()) { + func = m->module->getFunction("__new_varying32_32rt"); + } else if (g->opt.force32BitAddressing) { + func = m->module->getFunction("__new_varying32_64rt"); + } else { + func = m->module->getFunction("__new_varying64_64rt"); + } } else { + // FIXME: __new_uniform_32rt should take i32 if (allocSize->getType() != LLVMTypes::Int64Type) allocSize = ctx->SExtInst(allocSize, LLVMTypes::Int64Type, "alloc_size64"); - func = m->module->getFunction("__new_uniform"); + if (g->target->is32Bit()) { + func = m->module->getFunction("__new_uniform_32rt"); + } else { + func = m->module->getFunction("__new_uniform_64rt"); + } } AssertPos(pos, func != NULL); diff --git a/ispc.cpp b/ispc.cpp index 63b66c9c..daa3f5a8 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -477,6 +477,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : #endif // Set is32Bit + // This indicates if we are compiling for 32 bit platform + // and can assume 32 bit runtime. + // FIXME: all generic targets are handled as 64 bit, which is incorrect. this->m_is32Bit = (getDataLayout()->getPointerSize() == 4); #if !defined(LLVM_3_1) && !defined(LLVM_3_2) diff --git a/ispc.vcxproj b/ispc.vcxproj index 3a0bf98b..41fd1569 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -115,10 +115,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp gen-bitcode-sse4.cpp builtins\util.m4;builtins\target-sse4-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp gen-bitcode-sse4.cpp builtins\util.m4;builtins\target-sse4-common.ll Building gen-bitcode-sse4.cpp @@ -128,10 +128,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp gen-bitcode-sse4-x2.cpp builtins\util.m4;builtins\target-sse4-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp gen-bitcode-sse4-x2.cpp builtins\util.m4;builtins\target-sse4-common.ll Building gen-bitcode-sse4-x2.cpp @@ -141,10 +141,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp gen-bitcode-sse2.cpp builtins\util.m4;builtins\target-sse2-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp gen-bitcode-sse2.cpp builtins\util.m4;builtins\target-sse2-common.ll Building gen-bitcode-sse2.cpp @@ -154,10 +154,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp gen-bitcode-sse2-x2.cpp builtins\util.m4;builtins\target-sse2-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp gen-bitcode-sse2-x2.cpp builtins\util.m4;builtins\target-sse2-common.ll Building gen-bitcode-sse2-x2.cpp @@ -167,10 +167,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp gen-bitcode-avx1.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp gen-bitcode-avx1.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll Building gen-bitcode-avx1.cpp @@ -180,10 +180,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp gen-bitcode-avx1-x2.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp gen-bitcode-avx1-x2.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll Building gen-bitcode-avx1-x2.cpp @@ -193,10 +193,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp gen-bitcode-avx11.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp gen-bitcode-avx11.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll Building gen-bitcode-avx11.cpp @@ -206,10 +206,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp gen-bitcode-avx11-x2.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp gen-bitcode-avx11-x2.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll Building gen-bitcode-avx11-x2.cpp @@ -219,10 +219,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp gen-bitcode-avx2.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp gen-bitcode-avx2.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll Building gen-bitcode-avx2.cpp @@ -232,10 +232,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp gen-bitcode-avx2-x2.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp gen-bitcode-avx2-x2.cpp builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll Building gen-bitcode-avx2-x2.cpp @@ -245,10 +245,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll > gen-bitcode-generic-1.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll > gen-bitcode-generic-1.cpp gen-bitcode-generic-1.cpp builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll > gen-bitcode-generic-1.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll > gen-bitcode-generic-1.cpp gen-bitcode-generic-1.cpp builtins\util.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-1.cpp @@ -258,10 +258,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp gen-bitcode-generic-4.cpp builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp gen-bitcode-generic-4.cpp builtins\util.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-4.cpp @@ -271,10 +271,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp gen-bitcode-generic-8.cpp builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp gen-bitcode-generic-8.cpp builtins\util.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-8.cpp @@ -284,10 +284,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp gen-bitcode-generic-16.cpp builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp gen-bitcode-generic-16.cpp builtins\util.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-16.cpp @@ -297,10 +297,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp gen-bitcode-generic-32.cpp builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp gen-bitcode-generic-32.cpp builtins\util.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-32.cpp @@ -310,10 +310,10 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll > gen-bitcode-generic-64.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll > gen-bitcode-generic-64.cpp gen-bitcode-generic-64.cpp builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll > gen-bitcode-generic-64.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll > gen-bitcode-generic-64.cpp gen-bitcode-generic-64.cpp builtins\util.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-64.cpp diff --git a/main.cpp b/main.cpp index 4a970ac8..d58ff71d 100644 --- a/main.cpp +++ b/main.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -272,6 +272,7 @@ int main(int Argc, char *Argv[]) { g->cppArgs.push_back(argv[i]); else if (!strncmp(argv[i], "--addressing=", 13)) { if (atoi(argv[i] + 13) == 64) + // FIXME: this doesn't make sense on 32 bit platform. g->opt.force32BitAddressing = false; else if (atoi(argv[i] + 13) == 32) g->opt.force32BitAddressing = true; diff --git a/stmt.cpp b/stmt.cpp index 5ada5584..a95f8fbf 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -3234,7 +3234,12 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const { // __delete_uniform() from the builtins expects. exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType, "ptr_to_void"); - llvm::Function *func = m->module->getFunction("__delete_uniform"); + llvm::Function *func; + if (g->target->is32Bit()) { + func = m->module->getFunction("__delete_uniform_32rt"); + } else { + func = m->module->getFunction("__delete_uniform_64rt"); + } AssertPos(pos, func != NULL); ctx->CallInst(func, NULL, exprValue, ""); @@ -3244,7 +3249,12 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const { // takes a vector of i64s (even for 32-bit targets). Therefore, we // only need to extend to 64-bit values on 32-bit targets before // calling it. - llvm::Function *func = m->module->getFunction("__delete_varying"); + llvm::Function *func; + if (g->target->is32Bit()) { + func = m->module->getFunction("__delete_varying_32rt"); + } else { + func = m->module->getFunction("__delete_varying_64rt"); + } AssertPos(pos, func != NULL); if (g->target->is32Bit()) exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType,