Added nvptx64 target. Things to do:

1. builtins/target-nvptx64.ll to write, now it is just a copy of target-generic-1.ll 2. add __global__ & __device__ scope 2. make code work for a single cuda thread 3. use tasks to work as a block grid and programIndex as laneIdx, programCount as warpSize 4. ... and more...
2013-07-28 14:31:43 +02:00
parent 663ebf7857
commit 67b549a937
6 changed files with 1035 additions and 27 deletions
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -656,7 +656,8 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
        // the values for an ARM target.  This maybe won't cause problems
        // in the generated code, since bulitins.c doesn't do anything too
        // complex w.r.t. struct layouts, etc.
-        if (g->target->getISA() != Target::NEON)
+        if (g->target->getISA() != Target::NEON &&
+            g->target->getISA() != Target::NVPTX64)
 #endif // !__arm__
        {
            Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
@@ -819,6 +820,17 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
    // Next, add the target's custom implementations of the various needed
    // builtin functions (e.g. __masked_store_32(), etc).
    switch (g->target->getISA()) {
+    case Target::NVPTX64: 
+      {
+        if (runtime32) {
+            fprintf(stderr, " please add 32-bit bulitins .. \n");
+            assert(0);
+        }
+        else {
+            EXPORT_MODULE(builtins_bitcode_nvptx64_64bit);
+        }
+        break;
+      };
    case Target::NEON: {
        if (runtime32) {
            EXPORT_MODULE(builtins_bitcode_neon_32bit);