diff --git a/Makefile b/Makefile index 72eb98b3..4d3063cd 100644 --- a/Makefile +++ b/Makefile @@ -118,9 +118,14 @@ HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ opt.h stmt.h sym.h type.h util.h TARGETS=avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 \ generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 -BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \ - builtins/dispatch.ll -BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \ +# These files need to be compiled in two versions - 32 and 64 bits. +BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) +# These are files to be compiled in single version. +BUILTINS_SRC_COMMON=builtins/dispatch.ll +BUILTINS_OBJS_32=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_TARGET:.ll=-32bit.o))) +BUILTINS_OBJS_64=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_TARGET:.ll=-64bit.o))) +BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_COMMON:.ll=.o))) \ + $(BUILTINS_OBJS_32) $(BUILTINS_OBJS_64) \ builtins-c-32.cpp builtins-c-64.cpp BISON_SRC=parse.yy FLEX_SRC=lex.ll @@ -212,17 +217,25 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc @echo Compiling $< @$(CXX) $(CXXFLAGS) -o $@ -c $< -objs/builtins-%.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll) +objs/builtins-dispatch.cpp: builtins/dispatch.ll builtins/util.m4 $(wildcard builtins/*common.ll) @echo Creating C++ source from builtins definition file $< @m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX $< | python bitcode2cpp.py $< > $@ +objs/builtins-%-32bit.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll) + @echo Creating C++ source from builtins definition file $< \(32 bit version\) + @m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX -DRUNTIME=32 $< | python bitcode2cpp.py $< 32bit > $@ + +objs/builtins-%-64bit.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll) + @echo Creating C++ source from builtins definition file $< \(64 bit version\) + @m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX -DRUNTIME=64 $< | python bitcode2cpp.py $< 64bit > $@ + objs/builtins-c-32.cpp: builtins/builtins.c @echo Creating C++ source from builtins definition file $< - @$(CLANG) -m32 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c-32 > $@ + @$(CLANG) -m32 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c 32 > $@ objs/builtins-c-64.cpp: builtins/builtins.c @echo Creating C++ source from builtins definition file $< - @$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c-64 > $@ + @$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c 64 > $@ objs/stdlib_generic_ispc.cpp: stdlib.ispc @echo Creating C++ source from $< for generic diff --git a/bitcode2cpp.py b/bitcode2cpp.py index aaeb1f29..10a28153 100755 --- a/bitcode2cpp.py +++ b/bitcode2cpp.py @@ -10,6 +10,8 @@ import os length=0 src=str(sys.argv[1]) +if (len(sys.argv) > 2): + runtime=str(sys.argv[2]) target = re.sub("builtins/target-", "", src) target = re.sub(r"builtins\\target-", "", target) @@ -29,8 +31,11 @@ except IOError: sys.stderr.write("Couldn't open " + src) sys.exit(1) +name = target +if (len(sys.argv) > 2): + name += "_" + runtime; width = 16; -sys.stdout.write("unsigned char builtins_bitcode_" + target + "[] = {\n") +sys.stdout.write("unsigned char builtins_bitcode_" + name + "[] = {\n") data = as_out.stdout.read() for i in range(0, len(data), 1): @@ -40,7 +45,7 @@ for i in range(0, len(data), 1): sys.stdout.write("\n") sys.stdout.write("0x00 };\n\n") -sys.stdout.write("int builtins_bitcode_" + target + "_length = " + str(i+1) + ";\n") +sys.stdout.write("int builtins_bitcode_" + name + "_length = " + str(i+1) + ";\n") as_out.wait() diff --git a/builtins.cpp b/builtins.cpp index 69559b51..68d955f8 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -776,169 +776,195 @@ lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) { void DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module, bool includeStdlibISPC) { + bool runtime32 = g->target->is32Bit(); + +#define EXPORT_MODULE(export_module) \ + extern unsigned char export_module[]; \ + extern int export_module##_length; \ + AddBitcodeToModule(export_module, export_module##_length, \ + module, symbolTable); + // Add the definitions from the compiled builtins-c.c file - if (g->target->is32Bit()) { - extern unsigned char builtins_bitcode_c_32[]; - extern int builtins_bitcode_c_32_length; - AddBitcodeToModule(builtins_bitcode_c_32, builtins_bitcode_c_32_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_c_32); } else { - extern unsigned char builtins_bitcode_c_64[]; - extern int builtins_bitcode_c_64_length; - AddBitcodeToModule(builtins_bitcode_c_64, builtins_bitcode_c_64_length, - module, symbolTable); + EXPORT_MODULE(builtins_bitcode_c_64); } // Next, add the target's custom implementations of the various needed // builtin functions (e.g. __masked_store_32(), etc). switch (g->target->getISA()) { - case Target::SSE2: - extern unsigned char builtins_bitcode_sse2[]; - extern int builtins_bitcode_sse2_length; - extern unsigned char builtins_bitcode_sse2_x2[]; - extern int builtins_bitcode_sse2_x2_length; + case Target::SSE2: { switch (g->target->getVectorWidth()) { case 4: - AddBitcodeToModule(builtins_bitcode_sse2, builtins_bitcode_sse2_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_sse2_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_sse2_64bit); + } break; case 8: - AddBitcodeToModule(builtins_bitcode_sse2_x2, builtins_bitcode_sse2_x2_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_sse2_x2_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_sse2_x2_64bit); + } break; default: FATAL("logic error in DefineStdlib"); } break; - case Target::SSE4: - extern unsigned char builtins_bitcode_sse4[]; - extern int builtins_bitcode_sse4_length; - extern unsigned char builtins_bitcode_sse4_x2[]; - extern int builtins_bitcode_sse4_x2_length; + } + case Target::SSE4: { switch (g->target->getVectorWidth()) { case 4: - AddBitcodeToModule(builtins_bitcode_sse4, - builtins_bitcode_sse4_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_sse4_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_sse4_64bit); + } break; case 8: - AddBitcodeToModule(builtins_bitcode_sse4_x2, - builtins_bitcode_sse4_x2_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_sse4_x2_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_sse4_x2_64bit); + } break; default: FATAL("logic error in DefineStdlib"); } break; - case Target::AVX: + } + case Target::AVX: { switch (g->target->getVectorWidth()) { case 8: - extern unsigned char builtins_bitcode_avx1[]; - extern int builtins_bitcode_avx1_length; - AddBitcodeToModule(builtins_bitcode_avx1, - builtins_bitcode_avx1_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_avx1_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_avx1_64bit); + } break; case 16: - extern unsigned char builtins_bitcode_avx1_x2[]; - extern int builtins_bitcode_avx1_x2_length; - AddBitcodeToModule(builtins_bitcode_avx1_x2, - builtins_bitcode_avx1_x2_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_avx1_x2_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_avx1_x2_64bit); + } break; default: FATAL("logic error in DefineStdlib"); } break; - case Target::AVX11: + } + case Target::AVX11: { switch (g->target->getVectorWidth()) { case 8: - extern unsigned char builtins_bitcode_avx11[]; - extern int builtins_bitcode_avx11_length; - AddBitcodeToModule(builtins_bitcode_avx11, - builtins_bitcode_avx11_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_avx11_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_avx11_64bit); + } break; case 16: - extern unsigned char builtins_bitcode_avx11_x2[]; - extern int builtins_bitcode_avx11_x2_length; - AddBitcodeToModule(builtins_bitcode_avx11_x2, - builtins_bitcode_avx11_x2_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_avx11_x2_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_avx11_x2_64bit); + } break; default: FATAL("logic error in DefineStdlib"); } break; - case Target::AVX2: + } + case Target::AVX2: { switch (g->target->getVectorWidth()) { case 8: - extern unsigned char builtins_bitcode_avx2[]; - extern int builtins_bitcode_avx2_length; - AddBitcodeToModule(builtins_bitcode_avx2, - builtins_bitcode_avx2_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_avx2_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_avx2_64bit); + } break; case 16: - extern unsigned char builtins_bitcode_avx2_x2[]; - extern int builtins_bitcode_avx2_x2_length; - AddBitcodeToModule(builtins_bitcode_avx2_x2, - builtins_bitcode_avx2_x2_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_avx2_x2_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_avx2_x2_64bit); + } break; default: FATAL("logic error in DefineStdlib"); } break; - case Target::GENERIC: + } + case Target::GENERIC: { switch (g->target->getVectorWidth()) { case 4: - extern unsigned char builtins_bitcode_generic_4[]; - extern int builtins_bitcode_generic_4_length; - AddBitcodeToModule(builtins_bitcode_generic_4, - builtins_bitcode_generic_4_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_generic_4_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_generic_4_64bit); + } break; case 8: - extern unsigned char builtins_bitcode_generic_8[]; - extern int builtins_bitcode_generic_8_length; - AddBitcodeToModule(builtins_bitcode_generic_8, - builtins_bitcode_generic_8_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_generic_8_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_generic_8_64bit); + } break; case 16: - extern unsigned char builtins_bitcode_generic_16[]; - extern int builtins_bitcode_generic_16_length; - AddBitcodeToModule(builtins_bitcode_generic_16, - builtins_bitcode_generic_16_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_generic_16_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_generic_4_64bit); + } break; case 32: - extern unsigned char builtins_bitcode_generic_32[]; - extern int builtins_bitcode_generic_32_length; - AddBitcodeToModule(builtins_bitcode_generic_32, - builtins_bitcode_generic_32_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_generic_32_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_generic_32_64bit); + } break; case 64: - extern unsigned char builtins_bitcode_generic_64[]; - extern int builtins_bitcode_generic_64_length; - AddBitcodeToModule(builtins_bitcode_generic_64, - builtins_bitcode_generic_64_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_generic_64_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_generic_64_64bit); + } break; case 1: - extern unsigned char builtins_bitcode_generic_1[]; - extern int builtins_bitcode_generic_1_length; - AddBitcodeToModule(builtins_bitcode_generic_1, - builtins_bitcode_generic_1_length, - module, symbolTable); + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_generic_1_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_generic_1_64bit); + } break; default: FATAL("logic error in DefineStdlib"); } break; + } default: FATAL("logic error"); } @@ -970,6 +996,11 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(), module, symbolTable); + if (g->forceAlignment != -1) { + llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true); + alignment->setInitializer(LLVMInt32(g->forceAlignment)); + } + if (includeStdlibISPC) { // If the user wants the standard library to be included, parse the // serialized version of the stdlib.ispc file to get its diff --git a/builtins/util.m4 b/builtins/util.m4 index 6dda67b9..c19d4930 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -34,8 +34,6 @@ ;; builtins for various targets can use macros from this file to simplify ;; generating code for their implementations of those builtins. -declare i1 @__is_compile_time_constant_uniform_int32(i32) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; It is a bit of a pain to compute this in m4 for 32 and 64-wide targets... @@ -820,6 +818,8 @@ define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp, ;; count trailing zeros define(`ctlztz', ` +declare_count_zeros() + define i32 @__count_trailing_zeros_i32(i32) nounwind readnone alwaysinline { %c = call i32 @llvm.cttz.i32(i32 %0) ret i32 %c @@ -1548,6 +1548,7 @@ declare void @ISPCSync(i8*) nounwind declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind declare i1 @__is_compile_time_constant_mask( %mask) +declare i1 @__is_compile_time_constant_uniform_int32(i32) declare i1 @__is_compile_time_constant_varying_int32() define void @__pause() nounwind readnone { @@ -2543,15 +2544,32 @@ ok: ;; Note that this should be really two different libraries for 32 and 64 ;; environment and it should happen sooner or later +ifelse(WIDTH, 1, `define(`ALIGNMENT', `16')', `define(`ALIGNMENT', `eval(WIDTH*4)')') + +@memory_alignment = internal constant i32 ALIGNMENT + ifelse(BUILD_OS, `UNIX', ` +ifelse(RUNTIME, `32', +` + +;; Unix 32 bit environment. +;; Use: posix_memalign and free +;; Define: +;; - __new_uniform_32rt +;; - __new_varying32_32rt +;; - __delete_uniform_32rt +;; - __delete_varying_32rt + declare i32 @posix_memalign(i8**, i32, i32) +declare void @free(i8 *) define noalias i8 * @__new_uniform_32rt(i64 %size) { %ptr = alloca i8* %conv = trunc i64 %size to i32 - %call1 = call i32 @posix_memalign(i8** %ptr, i32 16, i32 %conv) + %alignment = load i32* @memory_alignment + %call1 = call i32 @posix_memalign(i8** %ptr, i32 %alignment, i32 %conv) %ptr_val = load i8** %ptr ret i8* %ptr_val } @@ -2560,12 +2578,13 @@ define @__new_varying32_32rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** - %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i32 16, i32 %sz_LANE_ID)') + %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i32 %alignment, i32 %sz_LANE_ID)') %r = load * %ret ret %r @@ -2585,15 +2604,108 @@ define void @__delete_varying_32rt( %ptr, %mask) { ret void } +', +RUNTIME, `64', +` + +;; Unix 64 bit environment. +;; Use: posix_memalign and free +;; Define: +;; - __new_uniform_64rt +;; - __new_varying32_64rt +;; - __new_varying64_64rt +;; - __delete_uniform_64rt +;; - __delete_varying_64rt + +declare i32 @posix_memalign(i8**, i64, i64) +declare void @free(i8 *) + +define noalias i8 * @__new_uniform_64rt(i64 %size) { + %ptr = alloca i8* + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 + %call1 = call i32 @posix_memalign(i8** %ptr, i64 %alignment64, i64 %size) + %ptr_val = load i8** %ptr + ret i8* %ptr_val +} + +define @__new_varying32_64rt( %size, %mask) { + %ret = alloca + store zeroinitializer, * %ret + %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 + + per_lane(WIDTH, %mask, ` + %sz_LANE_ID = extractelement %size, i32 LANE + %sz64_LANE_ID = zext i32 %sz_LANE_ID to i64 + %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE + %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** + %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i64 %alignment64, i64 %sz64_LANE_ID)') + + %r = load * %ret + ret %r +} + +define @__new_varying64_64rt( %size, %mask) { + %ret = alloca + store zeroinitializer, * %ret + %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 + + per_lane(WIDTH, %mask, ` + %sz64_LANE_ID = extractelement %size, i32 LANE + %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE + %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** + %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i64 %alignment64, i64 %sz64_LANE_ID)') + + %r = load * %ret + ret %r +} + +define void @__delete_uniform_64rt(i8 * %ptr) { + call void @free(i8 * %ptr) + ret void +} + +define void @__delete_varying_64rt( %ptr, %mask) { + per_lane(WIDTH, %mask, ` + %iptr_LANE_ID = extractelement %ptr, i32 LANE + %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to i8 * + call void @free(i8 * %ptr_LANE_ID) + ') + ret void +} + +', ` +errprint(`RUNTIME should be defined to either 32 or 64 +') +m4exit(`1') +') + ', BUILD_OS, `WINDOWS', ` + +ifelse(RUNTIME, `32', +` + +;; Windows 32 bit environment. +;; Use: _aligned_malloc and _aligned_free +;; Define: +;; - __new_uniform_32rt +;; - __new_varying32_32rt +;; - __delete_uniform_32rt +;; - __delete_varying_32rt + declare i8* @_aligned_malloc(i32, i32) declare void @_aligned_free(i8 *) define noalias i8 * @__new_uniform_32rt(i64 %size) { %conv = trunc i64 %size to i32 - %ptr = tail call i8* @_aligned_malloc(i32 %conv, i32 16) + %alignment = load i32* @memory_alignment + %ptr = tail call i8* @_aligned_malloc(i32 %conv, i32 %alignment) ret i8* %ptr } @@ -2601,10 +2713,11 @@ define @__new_varying32_32rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE - %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i32 %sz_LANE_ID, i32 16) + %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i32 %sz_LANE_ID, i32 %alignment) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') @@ -2628,32 +2741,39 @@ define void @__delete_varying_32rt( %ptr, %mask) { } ', +RUNTIME, `64', ` -errprint(`BUILD_OS should be defined to either UNIX or WINDOWS -') -m4exit(`1') -') -;; Set of functions for 64 bit runtime -;; We use the same standard malloc/free pair on all platforms (Windows/Linux/MacOS). +;; Windows 64 bit environment. +;; Use: _aligned_malloc and _aligned_free +;; Define: +;; - __new_uniform_64rt +;; - __new_varying32_64rt +;; - __new_varying64_64rt +;; - __delete_uniform_64rt +;; - __delete_varying_64rt -declare noalias i8 * @malloc(i64) -declare void @free(i8 *) +declare i8* @_aligned_malloc(i64, i64) +declare void @_aligned_free(i8 *) define noalias i8 * @__new_uniform_64rt(i64 %size) { - %a = call noalias i8 * @malloc(i64 %size) - ret i8 * %a + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 + %ptr = tail call i8* @_aligned_malloc(i64 %size, i64 %alignment64) + ret i8* %ptr } define @__new_varying32_64rt( %size, %mask) { %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE %sz64_LANE_ID = zext i32 %sz_LANE_ID to i64 - %ptr_LANE_ID = call noalias i8 * @malloc(i64 %sz64_LANE_ID) + %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 %alignment64) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') @@ -2666,10 +2786,12 @@ define @__new_varying64_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` - %sz_LANE_ID = extractelement %size, i32 LANE - %ptr_LANE_ID = call noalias i8 * @malloc(i64 %sz_LANE_ID) + %sz64_LANE_ID = extractelement %size, i32 LANE + %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 %alignment64) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') @@ -2679,7 +2801,7 @@ define @__new_varying64_64rt( %size, % } define void @__delete_uniform_64rt(i8 * %ptr) { - call void @free(i8 * %ptr) + call void @_aligned_free(i8 * %ptr) ret void } @@ -2687,11 +2809,23 @@ define void @__delete_varying_64rt( %ptr, %mask) { per_lane(WIDTH, %mask, ` %iptr_LANE_ID = extractelement %ptr, i32 LANE %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to i8 * - call void @free(i8 * %ptr_LANE_ID) + call void @_aligned_free(i8 * %ptr_LANE_ID) ') ret void } +', ` +errprint(`RUNTIME should be defined to either 32 or 64 +') +m4exit(`1') +') + +', +` +errprint(`BUILD_OS should be defined to either UNIX or WINDOWS +') +m4exit(`1') +') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; read hw clock @@ -3350,13 +3484,26 @@ done: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; reduce_equal -; count leading/trailing zeros +;; count leading/trailing zeros +;; Macros declares set of count-trailing and count-leading zeros. +;; Macros behaves as a static functon - it works only at first invokation +;; to avoid redifinition. +define(`declare_count_zeros', ` +ifelse(count_zeros_are_defined, true, `', +` declare i32 @llvm.ctlz.i32(i32) declare i64 @llvm.ctlz.i64(i64) declare i32 @llvm.cttz.i32(i32) declare i64 @llvm.cttz.i64(i64) +define(`count_zeros_are_defined', true) +') + +') + define(`reduce_equal_aux', ` +declare_count_zeros() + define i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue, <$1 x MASK> %mask) nounwind alwaysinline { entry: diff --git a/cbackend.cpp b/cbackend.cpp index d335d4e9..d5772b2c 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -2388,8 +2388,14 @@ bool CWriter::doInitialization(llvm::Module &M) { I->getName() == "memset" || I->getName() == "memset_pattern16" || I->getName() == "puts" || I->getName() == "printf" || I->getName() == "putchar" || - I->getName() == "fflush" || I->getName() == "malloc" || - I->getName() == "free") + I->getName() == "fflush" || + // Memory allocation + I->getName() == "malloc" || + I->getName() == "posix_memalign" || + I->getName() == "free" || + I->getName() == "_aligned_malloc" || + I->getName() == "_aligned_free" + ) continue; // Don't redeclare ispc's own intrinsics @@ -3805,7 +3811,8 @@ void CWriter::visitCallInst(llvm::CallInst &I) { Callee = RF; } - if (Callee->getName() == "malloc") + if (Callee->getName() == "malloc" || + Callee->getName() == "_aligned_malloc") Out << "(uint8_t *)"; if (NeedsCast) { @@ -3844,7 +3851,13 @@ void CWriter::visitCallInst(llvm::CallInst &I) { for (; AI != AE; ++AI, ++ArgNo) { if (PrintedArg) Out << ", "; - if (ArgNo < NumDeclaredParams && + if (ArgNo == 0 && + Callee->getName() == "posix_memalign") { + // uint8_t** is incompatible with void** without explicit cast. + // Should be do this any other functions? + Out << "(void **)"; + } + else if (ArgNo < NumDeclaredParams && (*AI)->getType() != FTy->getParamType(ArgNo)) { Out << '('; printType(Out, FTy->getParamType(ArgNo), @@ -3865,10 +3878,12 @@ void CWriter::visitCallInst(llvm::CallInst &I) { #else llvm::Attribute::ByVal #endif - )) + )) { writeOperandDeref(*AI); - else + } + else { writeOperand(*AI); + } PrintedArg = true; } Out << ')'; diff --git a/ispc.cpp b/ispc.cpp index 40bf0020..dccbe423 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -747,6 +747,7 @@ Globals::Globals() { if (getcwd(currentDirectory, sizeof(currentDirectory)) == NULL) FATAL("Current directory path too long!"); #endif + forceAlignment = -1; } /////////////////////////////////////////////////////////////////////////// diff --git a/ispc.h b/ispc.h index 3582e2fc..784cb7cc 100644 --- a/ispc.h +++ b/ispc.h @@ -551,6 +551,10 @@ struct Globals { /** Additional user-provided directories to search when processing #include directives in the preprocessor. */ std::vector includePath; + + /** Indicates that alignment in memory allocation routines should be + forced to have given value. -1 value means natural alignment for the platforms. */ + int forceAlignment; }; enum { diff --git a/ispc.vcxproj b/ispc.vcxproj index 41fd1569..2bc8377b 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -1,4 +1,4 @@ - + @@ -18,49 +18,59 @@ - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - 4146;4800;4996;4355;4624;4005;4003;4018 - 4146;4800;4996;4355;4624;4005;4003;4018 + + 4146;4800;4996;4355;4624;4005;4003;4018 - - 4146;4800;4996;4355;4624;4005;4065 - 4146;4800;4996;4355;4624;4005;4065 + + 4146;4800;4996;4355;4624;4005;4065 - %LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-32 > gen-bitcode-c-32.cpp; -%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-64 > gen-bitcode-c-64.cpp - Building builtins.c - %LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-32 > gen-bitcode-c-32.cpp; -%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-64 > gen-bitcode-c-64.cpp - Building builtins.c - gen-bitcode-c-32.cpp;gen-bitcode-c-64.cpp - gen-bitcode-c-32.cpp;gen-bitcode-c-64.cpp + %LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c 32 > $(Configuration)/gen-bitcode-c-32.cpp; +%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c 64 > $(Configuration)/gen-bitcode-c-64.cpp + Building builtins.c + $(Configuration)/gen-bitcode-c-32.cpp;$(Configuration)/gen-bitcode-c-64.cpp @@ -87,259 +97,324 @@ Document - %LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 > gen-stdlib-x86.cpp; -%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic > gen-stdlib-generic.cpp; + %LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 > $(Configuration)/gen-stdlib-x86.cpp; +%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic > $(Configuration)/gen-stdlib-generic.cpp; - gen-stdlib-generic.cpp;gen-stdlib-x86.cpp - %LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 > gen-stdlib-x86.cpp; -%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic > gen-stdlib-generic.cpp; - - gen-stdlib-generic.cpp;gen-stdlib-x86.cpp - Building gen-stdlib-{generic,x86}.cpp - Building gen-stdlib-{generic,x86}.cpp + $(Configuration)/gen-stdlib-generic.cpp;$(Configuration)/gen-stdlib-x86.cpp + Building gen-stdlib-{generic,x86}.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\dispatch.ll | python bitcode2cpp.py dispatch.ll > gen-bitcode-dispatch.cpp - gen-bitcode-dispatch.cpp - builtins\util.m4 - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\dispatch.ll | python bitcode2cpp.py dispatch.ll > gen-bitcode-dispatch.cpp - gen-bitcode-dispatch.cpp - builtins\util.m4 - Building gen-bitcode-dispatch.cpp - Building gen-bitcode-dispatch.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins/dispatch.ll | python bitcode2cpp.py dispatch.ll > $(Configuration)/gen-bitcode-dispatch.cpp + $(Configuration)/gen-bitcode-dispatch.cpp + builtins\util.m4 + Building gen-bitcode-dispatch.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp - gen-bitcode-sse4.cpp - builtins\util.m4;builtins\target-sse4-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp - gen-bitcode-sse4.cpp - builtins\util.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4.cpp - Building gen-bitcode-sse4.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll 32bit > $(Configuration)/gen-bitcode-sse4-32bit.cpp + $(Configuration)/gen-bitcode-sse4-32bit.cpp + builtins\util.m4;builtins\target-sse4-common.ll + Building gen-bitcode-sse4-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll 64bit > $(Configuration)/gen-bitcode-sse4-64bit.cpp + $(Configuration)/gen-bitcode-sse4-64bit.cpp + builtins\util.m4;builtins\target-sse4-common.ll + Building gen-bitcode-sse4-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp - gen-bitcode-sse4-x2.cpp - builtins\util.m4;builtins\target-sse4-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp - gen-bitcode-sse4-x2.cpp - builtins\util.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-x2.cpp - Building gen-bitcode-sse4-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 32bit > $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp + $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp + builtins\util.m4;builtins\target-sse4-common.ll + Building gen-bitcode-sse4-x2-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 64bit > $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp + $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp + builtins\util.m4;builtins\target-sse4-common.ll + Building gen-bitcode-sse4-x2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp - gen-bitcode-sse2.cpp - builtins\util.m4;builtins\target-sse2-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp - gen-bitcode-sse2.cpp - builtins\util.m4;builtins\target-sse2-common.ll - Building gen-bitcode-sse2.cpp - Building gen-bitcode-sse2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 32bit > $(Configuration)/gen-bitcode-sse2-32bit.cpp + $(Configuration)/gen-bitcode-sse2-32bit.cpp + builtins\util.m4;builtins\target-sse2-common.ll + Building gen-bitcode-sse2-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 64bit > $(Configuration)/gen-bitcode-sse2-64bit.cpp + $(Configuration)/gen-bitcode-sse2-64bit.cpp + builtins\util.m4;builtins\target-sse2-common.ll + Building gen-bitcode-sse2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp - gen-bitcode-sse2-x2.cpp - builtins\util.m4;builtins\target-sse2-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp - gen-bitcode-sse2-x2.cpp - builtins\util.m4;builtins\target-sse2-common.ll - Building gen-bitcode-sse2-x2.cpp - Building gen-bitcode-sse2-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 32bit > $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp + $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp + builtins\util.m4;builtins\target-sse2-common.ll + Building gen-bitcode-sse2-x2-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 64bit > $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp + $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp + builtins\util.m4;builtins\target-sse2-common.ll + Building gen-bitcode-sse2-x2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp - gen-bitcode-avx1.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp - gen-bitcode-avx1.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx1.cpp - Building gen-bitcode-avx1.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 32bit > $(Configuration)/gen-bitcode-avx1-32bit.cpp + $(Configuration)/gen-bitcode-avx1-32bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + Building gen-bitcode-avx1-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 64bit > $(Configuration)/gen-bitcode-avx1-64bit.cpp + $(Configuration)/gen-bitcode-avx1-64bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + Building gen-bitcode-avx1-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp - gen-bitcode-avx1-x2.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp - gen-bitcode-avx1-x2.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll - Building gen-bitcode-avx1-x2.cpp - Building gen-bitcode-avx1-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 32bit > $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp + $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll + Building gen-bitcode-avx1-x2-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 64bit > $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp + $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll + Building gen-bitcode-avx1-x2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp - gen-bitcode-avx11.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp - gen-bitcode-avx11.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx11.cpp - Building gen-bitcode-avx11.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 32bit > $(Configuration)/gen-bitcode-avx11-32bit.cpp + $(Configuration)/gen-bitcode-avx11-32bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + Building gen-bitcode-avx11-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 64bit > $(Configuration)/gen-bitcode-avx11-64bit.cpp + $(Configuration)/gen-bitcode-avx11-64bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + Building gen-bitcode-avx11-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp - gen-bitcode-avx11-x2.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp - gen-bitcode-avx11-x2.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll - Building gen-bitcode-avx11-x2.cpp - Building gen-bitcode-avx11-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 32bit > $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp + $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll + Building gen-bitcode-avx11-x2-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 64bit > $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp + $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll + Building gen-bitcode-avx11-x2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp - gen-bitcode-avx2.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp - gen-bitcode-avx2.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx2.cpp - Building gen-bitcode-avx2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 32bit > $(Configuration)/gen-bitcode-avx2-32bit.cpp + $(Configuration)/gen-bitcode-avx2-32bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + Building gen-bitcode-avx2-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 64bit > $(Configuration)/gen-bitcode-avx2-64bit.cpp + $(Configuration)/gen-bitcode-avx2-64bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + Building gen-bitcode-avx2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp - gen-bitcode-avx2-x2.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp - gen-bitcode-avx2-x2.cpp - builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll - Building gen-bitcode-avx2-x2.cpp - Building gen-bitcode-avx2-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 32bit > $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp + $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll + Building gen-bitcode-avx2-x2-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 64bit > $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp + $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll + Building gen-bitcode-avx2-x2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll > gen-bitcode-generic-1.cpp - gen-bitcode-generic-1.cpp - builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll > gen-bitcode-generic-1.cpp - gen-bitcode-generic-1.cpp - builtins\util.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-1.cpp - Building gen-bitcode-generic-1.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 32bit > $(Configuration)/gen-bitcode-generic-1-32bit.cpp + $(Configuration)/gen-bitcode-generic-1-32bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-1-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 64bit > $(Configuration)/gen-bitcode-generic-1-64bit.cpp + $(Configuration)/gen-bitcode-generic-1-64bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-1-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp - gen-bitcode-generic-4.cpp - builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp - gen-bitcode-generic-4.cpp - builtins\util.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-4.cpp - Building gen-bitcode-generic-4.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 32bit > $(Configuration)/gen-bitcode-generic-4-32bit.cpp + $(Configuration)/gen-bitcode-generic-4-32bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-4-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 64bit > $(Configuration)/gen-bitcode-generic-4-64bit.cpp + $(Configuration)/gen-bitcode-generic-4-64bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-4-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp - gen-bitcode-generic-8.cpp - builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp - gen-bitcode-generic-8.cpp - builtins\util.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-8.cpp - Building gen-bitcode-generic-8.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 32bit > $(Configuration)/gen-bitcode-generic-8-32bit.cpp + $(Configuration)/gen-bitcode-generic-8-32bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-8-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 64bit > $(Configuration)/gen-bitcode-generic-8-64bit.cpp + $(Configuration)/gen-bitcode-generic-8-64bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-8-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp - gen-bitcode-generic-16.cpp - builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp - gen-bitcode-generic-16.cpp - builtins\util.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-16.cpp - Building gen-bitcode-generic-16.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 32bit > $(Configuration)/gen-bitcode-generic-16-32bit.cpp + $(Configuration)/gen-bitcode-generic-16-32bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-16-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 64bit > $(Configuration)/gen-bitcode-generic-16-64bit.cpp + $(Configuration)/gen-bitcode-generic-16-64bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-16-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp - gen-bitcode-generic-32.cpp - builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp - gen-bitcode-generic-32.cpp - builtins\util.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-32.cpp - Building gen-bitcode-generic-32.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 32bit > $(Configuration)/gen-bitcode-generic-32-32bit.cpp + $(Configuration)/gen-bitcode-generic-32-32bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-32-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 64bit > $(Configuration)/gen-bitcode-generic-32-64bit.cpp + $(Configuration)/gen-bitcode-generic-32-64bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-32-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll > gen-bitcode-generic-64.cpp - gen-bitcode-generic-64.cpp - builtins\util.m4;builtins\target-generic-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll > gen-bitcode-generic-64.cpp - gen-bitcode-generic-64.cpp - builtins\util.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-64.cpp - Building gen-bitcode-generic-64.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 32bit > $(Configuration)/gen-bitcode-generic-64-32bit.cpp + $(Configuration)/gen-bitcode-generic-64-32bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-64-32bit.cpp - + Document - flex -t lex.ll > lex.cc - lex.cc - flex -t lex.ll > lex.cc - lex.cc - ispc.h;decl.h;parse.hh;sym.h - ispc.h;decl.h;parse.hh;sym.h + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 64bit > $(Configuration)/gen-bitcode-generic-64-64bit.cpp + $(Configuration)/gen-bitcode-generic-64-64bit.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-64-64bit.cpp + + + + + Document + flex -t lex.ll > $(Configuration)\lex.cc + $(Configuration)\lex.cc + ispc.h;decl.h;$(Configuration)\parse.hh;sym.h + Running flex on lex.ll Document - bison -d -v -t -o parse.cc parse.yy - parse.cc;parse.h - bison -d -v -t -o parse.cc parse.yy - parse.cc;parse.h - ispc.h;type.h;decl.h;expr.h;sym.h;stmt.h - ispc.h;type.h;decl.h;expr.h;sym.h;stmt.h - Running bison on parse.yy - Running bison on parse.yy + bison -d -v -t -o $(Configuration)/parse.cc parse.yy + $(Configuration)\parse.cc;$(Configuration)\parse.hh;$(Configuration)\parse.output + ispc.h;type.h;decl.h;expr.h;sym.h;stmt.h + Running bison on parse.yy @@ -356,7 +431,7 @@ Application false - true + false Unicode diff --git a/main.cpp b/main.cpp index cc005fde..80f77683 100644 --- a/main.cpp +++ b/main.cpp @@ -98,6 +98,7 @@ usage(int ret) { printf(" [--emit-c++]\t\t\tEmit a C++ source file as output\n"); printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n"); printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n"); + printf(" [--force-alignment=]\t\tForce alignment in memory allocations routine to be \n"); printf(" [-g]\t\t\t\tGenerate debugging information\n"); printf(" [--help]\t\t\t\tPrint help\n"); printf(" [--help-dev]\t\t\tPrint help for developer options\n"); @@ -393,6 +394,9 @@ int main(int Argc, char *Argv[]) { usage(1); } } + else if (!strncmp(argv[i], "--force-alignment=", 18)) { + g->forceAlignment = atoi(argv[i] + 18); + } else if (!strcmp(argv[i], "--woff") || !strcmp(argv[i], "-woff")) { g->disableWarnings = true; g->emitPerfWarnings = false;