Added bytecode parser according to c0vm-writeup.pdf

We can now parse a bytecode file to find int_pool, string_pool,
function_pool, and native_pool.
This commit is contained in:
Mitchell Plamann
2015-03-21 02:04:49 -04:00
parent c62c7ed8bf
commit 837efb7b2b
3 changed files with 112 additions and 4 deletions

39
src/byte-stream.js Executable file
View File

@@ -0,0 +1,39 @@
var ByteStream = function (byte_array) {
console.log("Instance created.");
this.byte_array = byte_array;
this.index = 0;
};
ByteStream.prototype.get_u1 = function() {
var result = this.byte_array[this.index];
this.index += 1;
return result;
}
ByteStream.prototype.get_u2 = function() {
var high = this.get_u1();
var low = this.get_u1();
return (high << 8) + low;
}
ByteStream.prototype.get_u4 = function() {
var high = this.get_u2();
var low = this.get_u2();
return (high * 0x10000) + low;
}
ByteStream.prototype.get_i4 = function() {
var unsigned_val = this.get_u4();
var sign_mult = (unsigned_val & 0x80000000) ? -1 : 1;
return (unsigned_val & 0x7FFFFFFF) * sign_mult;
}
ByteStream.prototype.get_bytes = function(n) {
var result = [];
for (var i = 0; i < n; i++) {
result.push(this.get_u1());
}
return result;
}
exports.ByteStream = ByteStream;

View File

@@ -1,8 +1,9 @@
fs = require("fs");
byte_stream = require("./byte-stream");
// This is a simple, kinda hacky bytecode parser for .bc0 files
function getBytes(filename) {
data = fs.readFileSync(filename);
var data = fs.readFileSync(filename);
if (data == null) {
if (err["code"] === "ENOENT")
@@ -13,14 +14,14 @@ function getBytes(filename) {
}
// Data contains our file, but we want it as a string
string_data = data.toString();
var string_data = data.toString();
// Strip all the comments for easier parsing
without_comments = string_data.replace(new RegExp("#.*", "gi"), "");
var without_comments = string_data.replace(new RegExp("#.*", "gi"), "");
// Each byte should now be a pair of two hex digits.
// Put all these in an array.
bytes = [];
var bytes = [];
without_comments.replace(
new RegExp("([0123456789ABCDEF][0123456789ABCDEF])", "gi"),
function(next_byte) {
@@ -32,4 +33,69 @@ function getBytes(filename) {
}
var FunctionInfo = function (stream) {
this.num_args = stream.get_u2();
this.num_vars = stream.get_u2();
this.code_length = stream.get_u2();
this.code = stream.get_bytes(this.code_length);
}
var NativeInfo = function (stream) {
this.num_args = stream.get_u2();
this.function_table_index = stream.get_u2();
}
var Bc0File = function (filename) {
var file = getBytes(filename);
var stream = new byte_stream.ByteStream(file);
var magic = stream.get_u4();
if (magic != 0xc0c0ffee) {
console.log("Error - file is not a c0 bytecode file");
return null;
}
// I don't know that we need this, but here it is
this.version_arch = stream.get_u2();
this.int_count = stream.get_u2();
this.int_pool = [];
for (var i = 0; i < this.int_count; i++) {
this.int_pool[i] = stream.get_i4();
}
this.string_count = stream.get_u2();
this.string_pool = [];
var current_string = "";
for (var i = 0; i < this.string_count; i++) {
var c = stream.get_u1();
if (c == 0) {
this.string_pool.push(current_string);
current_string = "";
} else {
current_string += String.fromCharCode(c);
}
}
this.function_count = stream.get_u2();
this.function_pool = [];
for (var i = 0; i < this.function_count; i++) {
this.function_pool.push(new FunctionInfo(stream));
}
this.native_count = stream.get_u2();
this.native_pool = [];
for (var i = 0; i < this.native_count; i++) {
this.native_pool.push(new NativeInfo(stream));
}
}
function parse(filename) {
return new Bc0File(filename);
}
exports.getBytes = getBytes;
exports.parse = parse;

View File

@@ -4,3 +4,6 @@ console.log("Reading in sample bytecode file:");
console.log(parser.getBytes("../test/test.bc0"));
console.log("That was the sample bytecode file" +
" -- it probably took up your whole terminal screen.");
var file = parser.parse("../test/test.bc0");
console.log(file);
console.log(file.function_pool[0].code);