Added bytecode parser according to c0vm-writeup.pdf

We can now parse a bytecode file to find int_pool, string_pool, function_pool, and native_pool.
2015-03-21 02:04:49 -04:00
parent c62c7ed8bf
commit 837efb7b2b
3 changed files with 112 additions and 4 deletions
--- a/src/byte-stream.js
+++ b/src/byte-stream.js
@@ -0,0 +1,39 @@
+var ByteStream = function (byte_array) {
+    console.log("Instance created.");
+    this.byte_array = byte_array;
+    this.index = 0;
+};
+
+ByteStream.prototype.get_u1 = function() {
+    var result = this.byte_array[this.index];
+    this.index += 1;
+    return result;
+}
+
+ByteStream.prototype.get_u2 = function() {
+    var high = this.get_u1();
+    var low = this.get_u1();
+    return (high << 8) + low;
+}
+
+ByteStream.prototype.get_u4 = function() {
+    var high = this.get_u2();
+    var low = this.get_u2();
+    return (high * 0x10000) + low;
+}
+
+ByteStream.prototype.get_i4 = function() {
+    var unsigned_val = this.get_u4();
+    var sign_mult = (unsigned_val & 0x80000000) ? -1 : 1;
+    return (unsigned_val & 0x7FFFFFFF) * sign_mult;
+}
+
+ByteStream.prototype.get_bytes = function(n) {
+    var result = [];
+    for (var i = 0; i < n; i++) {
+        result.push(this.get_u1());
+    }
+    return result;
+}
+
+exports.ByteStream = ByteStream;
--- a/src/bytecode-parser.js
+++ b/src/bytecode-parser.js
@@ -1,8 +1,9 @@
 fs = require("fs");
+byte_stream = require("./byte-stream");

 // This is a simple, kinda hacky bytecode parser for .bc0 files
 function getBytes(filename) {
-    data = fs.readFileSync(filename);
+    var data = fs.readFileSync(filename);
    
    if (data == null) {
        if (err["code"] === "ENOENT")
@@ -13,14 +14,14 @@ function getBytes(filename) {
    }

    // Data contains our file, but we want it as a string
-    string_data = data.toString();
+    var string_data = data.toString();

    // Strip all the comments for easier parsing
-    without_comments = string_data.replace(new RegExp("#.*", "gi"), "");
+    var without_comments = string_data.replace(new RegExp("#.*", "gi"), "");

    // Each byte should now be a pair of two hex digits.
    // Put all these in an array.
-    bytes = [];
+    var bytes = [];
    without_comments.replace(
        new RegExp("([0123456789ABCDEF][0123456789ABCDEF])", "gi"),
        function(next_byte) {
@@ -32,4 +33,69 @@ function getBytes(filename) {

 }

+var FunctionInfo = function (stream) {
+    this.num_args = stream.get_u2();
+    this.num_vars = stream.get_u2();
+    this.code_length = stream.get_u2();
+    this.code = stream.get_bytes(this.code_length);
+}
+
+var NativeInfo = function (stream) {
+    this.num_args = stream.get_u2();
+    this.function_table_index = stream.get_u2();
+}
+
+var Bc0File = function (filename) {
+    var file = getBytes(filename);
+    var stream = new byte_stream.ByteStream(file);
+    
+    var magic = stream.get_u4();
+    if (magic != 0xc0c0ffee) {
+        console.log("Error - file is not a c0 bytecode file");
+        return null;
+    }
+
+    // I don't know that we need this, but here it is
+    this.version_arch = stream.get_u2();
+
+    this.int_count = stream.get_u2();
+
+    this.int_pool = [];
+    for (var i = 0; i < this.int_count; i++) {
+        this.int_pool[i] = stream.get_i4();
+    }
+
+    this.string_count = stream.get_u2();
+    
+    this.string_pool = [];
+    var current_string = "";
+    for (var i = 0; i < this.string_count; i++) {
+        var c = stream.get_u1();
+        if (c == 0) {
+            this.string_pool.push(current_string);
+            current_string = "";
+        } else {
+            current_string += String.fromCharCode(c);
+        }
+    }
+
+    this.function_count = stream.get_u2();
+
+    this.function_pool = [];
+    for (var i = 0; i < this.function_count; i++) {
+        this.function_pool.push(new FunctionInfo(stream));
+    }
+
+    this.native_count = stream.get_u2();
+    this.native_pool = [];
+    for (var i = 0; i < this.native_count; i++) {
+        this.native_pool.push(new NativeInfo(stream));
+    }
+}
+
+function parse(filename) {
+    return new Bc0File(filename);
+}
+
 exports.getBytes = getBytes;
+exports.parse = parse;
--- a/src/index.js
+++ b/src/index.js
@@ -4,3 +4,6 @@ console.log("Reading in sample bytecode file:");
 console.log(parser.getBytes("../test/test.bc0"));
 console.log("That was the sample bytecode file" +
            " -- it probably took up your whole terminal screen.");
+var file = parser.parse("../test/test.bc0");
+console.log(file);
+console.log(file.function_pool[0].code);