Fixed bugs in bin object code parsing and the assembler.

This commit is contained in:
Daan Vanden Bosch 2019-07-29 23:46:11 +02:00
parent 1da64b8632
commit 7d89c870cc
5 changed files with 156 additions and 118 deletions

2
.gitignore vendored
View File

@ -26,3 +26,5 @@
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# node error reports
report.*.json

View File

@ -124,11 +124,11 @@ export function parse_bin(cursor: Cursor, lenient: boolean = false): BinFile {
const label_offset_count = Math.floor((cursor.size - label_offset_table_offset) / 4);
cursor.seek_start(label_offset_table_offset);
const label_offsets = cursor.i32_array(label_offset_count);
const label_offset_table = cursor.i32_array(label_offset_count);
const offset_to_labels = new Map<number, number[]>();
for (let label = 0; label < label_offsets.length; label++) {
const offset = label_offsets[label];
for (let label = 0; label < label_offset_table.length; label++) {
const offset = label_offset_table[label];
if (offset !== -1) {
let labels = offset_to_labels.get(offset);
@ -173,7 +173,7 @@ export function parse_bin(cursor: Cursor, lenient: boolean = false): BinFile {
// Verify labels.
outer: for (let label = 0; label < label_offset_count; label++) {
if (label_offsets[label] !== -1) {
if (label_offset_table[label] !== -1) {
for (const segment of segments) {
if (segment.label === label) {
continue outer;
@ -181,7 +181,7 @@ export function parse_bin(cursor: Cursor, lenient: boolean = false): BinFile {
}
logger.warn(
`Label ${label} with offset ${label_offsets[label]} does not point to anything.`
`Label ${label} with offset ${label_offset_table[label]} does not point to anything.`
);
}
}
@ -265,104 +265,109 @@ function parse_object_code(
const labels: number[] | undefined = offset_to_labels.get(offset);
// Check whether we've encountered a data segment.
// If a single label that points to this segment is referred to from a data context we assume the segment is a data segment.
// If a label that points to this segment is referred to from a data context we assume the segment is a data segment.
if (labels && labels.some(label => data_labels.has(label))) {
for (const [label_offset, labels] of offset_to_labels.entries()) {
if (label_offset > offset) {
let last_label = -1;
let data_segment_size = cursor.size - offset;
// Get the next label's offset.
for (let i = offset + 1; i < cursor.size; i++) {
if (offset_to_labels.has(i)) {
// We create empty segments for all but the last label.
// The data will be in the last label's segment.
for (let i = 0; i < labels.length - 1; i++) {
for (let j = 0; j < labels.length - 1; j++) {
segments.push({
type: SegmentType.Data,
label: labels[i],
label: labels[j],
data: new ArrayBuffer(0),
});
}
segments.push({
type: SegmentType.Data,
label: labels[labels.length - 1],
data: cursor.array_buffer(label_offset - offset),
});
last_label = labels[labels.length - 1];
data_segment_size = i - offset;
break;
}
}
instructions = undefined;
continue;
}
segments.push({
type: SegmentType.Data,
label: last_label,
data: cursor.array_buffer(data_segment_size),
});
// Parse as instruction.
if (labels == undefined) {
if (instructions == undefined) {
logger.warn(`Unlabelled instructions at ${offset}.`);
instructions = undefined;
} else {
// Parse as instruction.
if (labels == undefined) {
if (instructions == undefined) {
logger.warn(`Unlabelled instructions at ${offset}.`);
instructions = [];
segments.push({
type: SegmentType.Instructions,
label: -1,
instructions,
});
}
} else {
for (let i = 0; i < labels.length - 1; i++) {
segments.push({
type: SegmentType.Instructions,
label: labels[i],
instructions: [],
});
}
instructions = [];
segments.push({
type: SegmentType.Instructions,
label: -1,
label: labels[labels.length - 1],
instructions,
});
}
} else {
for (let i = 0; i < labels.length - 1; i++) {
segments.push({
type: SegmentType.Instructions,
label: labels[i],
instructions: [],
});
// Parse the opcode.
const main_opcode = cursor.u8();
let opcode_index;
switch (main_opcode) {
case 0xf8:
case 0xf9:
opcode_index = (main_opcode << 8) | cursor.u8();
break;
default:
opcode_index = main_opcode;
break;
}
instructions = [];
let opcode = OPCODES[opcode_index];
segments.push({
type: SegmentType.Instructions,
label: labels[labels.length - 1],
instructions,
});
}
// Parse the arguments.
try {
const args = parse_instruction_arguments(cursor, opcode);
instructions.push(new Instruction(opcode, args));
// Parse the opcode.
const main_opcode = cursor.u8();
let opcode_index;
// Check whether we can deduce a data segment label.
for (let i = 0; i < opcode.params.length; i++) {
const param_type = opcode.params[i].type;
const arg_value = args[i].value;
switch (main_opcode) {
case 0xf8:
case 0xf9:
opcode_index = (main_opcode << 8) | cursor.u8();
break;
default:
opcode_index = main_opcode;
break;
}
let opcode = OPCODES[opcode_index];
// Parse the arguments.
try {
const args = parse_instruction_arguments(cursor, opcode);
instructions.push(new Instruction(opcode, args));
// Check whether we can deduce a data segment label.
for (let i = 0; i < opcode.params.length; i++) {
const param_type = opcode.params[i].type;
const arg_value = args[i].value;
if (param_type === Type.DLabel) {
data_labels.add(arg_value);
if (param_type === Type.DLabel) {
data_labels.add(arg_value);
}
}
} catch (e) {
if (lenient) {
logger.error(
`Exception occurred while parsing arguments for instruction ${opcode.mnemonic}.`,
e
);
instructions.push(new Instruction(opcode, []));
} else {
throw e;
}
}
} catch (e) {
if (lenient) {
logger.error(
`Exception occurred while parsing arguments for instruction ${opcode.mnemonic}.`,
e
);
instructions.push(new Instruction(opcode, []));
} else {
throw e;
}
}
}

View File

@ -76,13 +76,9 @@ class Assembler {
for (const line of this.assembly) {
this.tokens = this.lexer.tokenize_line(line);
if (this.tokens.length === 0) {
continue;
}
if (this.tokens.length > 0) {
const token = this.tokens.shift()!;
const token = this.tokens.shift()!;
if (this.code_section) {
switch (token.type) {
case TokenType.Label:
this.parse_label(token);
@ -93,8 +89,27 @@ class Assembler {
case TokenType.DataSection:
this.parse_data_section(token);
break;
case TokenType.Int:
if (this.code_section) {
this.add_error({
col: token.col,
length: token.len,
message: "Unexpected token.",
});
} else {
this.parse_bytes(token);
}
break;
case TokenType.Ident:
this.parse_instruction(token);
if (this.code_section) {
this.parse_instruction(token);
} else {
this.add_error({
col: token.col,
length: token.len,
message: "Unexpected token.",
});
}
break;
case TokenType.InvalidSection:
this.add_error({
@ -118,15 +133,6 @@ class Assembler {
});
break;
}
} else {
switch (token.type) {
case TokenType.Label:
this.parse_label(token);
break;
case TokenType.Int:
this.parse_bytes(token);
break;
}
}
this.line_no++;
@ -425,7 +431,7 @@ class Assembler {
this.add_error({
col: token.col,
length: token.len,
message: "Argument expected.",
message: "Expected an argument.",
});
} else {
if (param.type !== Type.U8Var && param.type !== Type.ILabelVar) {
@ -442,7 +448,7 @@ class Assembler {
this.add_error({
col,
length: token.col - col,
message: "Comma expected.",
message: "Expected a comma.",
});
}
@ -503,37 +509,37 @@ class Assembler {
switch (param.type) {
case Type.U8:
type_str = "unsigned 8-bit integer";
type_str = "an unsigned 8-bit integer";
break;
case Type.U16:
type_str = "unsigned 16-bit integer";
type_str = "an unsigned 16-bit integer";
break;
case Type.U32:
type_str = "unsigned 32-bit integer";
type_str = "an unsigned 32-bit integer";
break;
case Type.I32:
type_str = "signed 32-bit integer";
type_str = "a signed 32-bit integer";
break;
case Type.F32:
type_str = "float";
type_str = "a float";
break;
case Type.Register:
type_str = "register reference";
type_str = "a register reference";
break;
case Type.ILabel:
type_str = "instruction label";
type_str = "an instruction label";
break;
case Type.DLabel:
type_str = "data label";
type_str = "a data label";
break;
case Type.U8Var:
type_str = "unsigned 8-bit integer";
type_str = "an unsigned 8-bit integer";
break;
case Type.ILabelVar:
type_str = "instruction label";
type_str = "an instruction label";
break;
case Type.String:
type_str = "string";
type_str = "a string";
break;
}
@ -558,13 +564,13 @@ class Assembler {
this.add_error({
col,
length: len,
message: `${bit_size}-Bit unsigned integer can't be less than 0.`,
message: `Unsigned ${bit_size}-bit integer can't be less than 0.`,
});
} else if (value > max_value) {
this.add_error({
col,
length: len,
message: `${bit_size}-Bit unsigned integer can't be greater than ${max_value}.`,
message: `Unsigned ${bit_size}-bit integer can't be greater than ${max_value}.`,
});
}
@ -583,13 +589,13 @@ class Assembler {
this.add_error({
col,
length: len,
message: `${bit_size}-Bit signed integer can't be less than ${min_value}.`,
message: `Signed ${bit_size}-bit integer can't be less than ${min_value}.`,
});
} else if (value > max_value) {
this.add_error({
col,
length: len,
message: `${bit_size}-Bit signed integer can't be greater than ${max_value}.`,
message: `Signed ${bit_size}-bit integer can't be greater than ${max_value}.`,
});
}
@ -624,13 +630,13 @@ class Assembler {
this.add_error({
col: token.col,
length: token.len,
message: `8-Bit unsigned integer can't be less than 0.`,
message: "Unsigned 8-bit integer can't be less than 0.",
});
} else if (token.value > 255) {
this.add_error({
col: token.col,
length: token.len,
message: `8-Bit unsigned integer can't be greater than 255.`,
message: "Unsigned 8-bit integer can't be greater than 255.",
});
}
@ -647,7 +653,7 @@ class Assembler {
this.add_error({
col: token.col,
length: token.len,
message: "Unexpected token.",
message: "Expected an unsigned 8-bit integer.",
});
}

View File

@ -12,12 +12,22 @@ export function disassemble(object_code: Segment[], manual_stack: boolean = fals
if (segment.type === SegmentType.Data) {
if (code_block !== false) {
code_block = false;
lines.push(".data");
if (lines.length) {
lines.push("");
}
lines.push(".data", "");
}
} else {
if (code_block !== true) {
code_block = true;
lines.push(".code");
if (lines.length) {
lines.push("");
}
lines.push(".code", "");
}
}

View File

@ -13,10 +13,14 @@ const ASM_SYNTAX: languages.IMonarchLanguage = {
tokenizer: {
root: [
// Strings.
[/"([^"\\]|\\.)*$/, "string.invalid"], // Unterminated string.
[/"/, { token: "string.quote", bracket: "@open", next: "@string" }],
// Registers.
[/r\d+/, "predefined"],
[/\.[^\s]+|(^|\s+)bytes($|\s+)/, "keyword"],
[/\.[^\s]+/, "keyword"],
// Labels.
[/[^\s]+:/, "tag"],
@ -36,10 +40,6 @@ const ASM_SYNTAX: languages.IMonarchLanguage = {
// Delimiters.
[/,/, "delimiter"],
// Strings.
[/"([^"\\]|\\.)*$/, "string.invalid"], // Unterminated string.
[/"/, { token: "string.quote", bracket: "@open", next: "@string" }],
],
// comment: [
@ -66,6 +66,19 @@ const INSTRUCTION_SUGGESTIONS = OPCODES.filter(opcode => opcode != null).map(opc
} as any) as languages.CompletionItem;
});
const KEYWORD_SUGGESTIONS = [
{
label: ".code",
kind: languages.CompletionItemKind.Keyword,
insertText: "code",
},
{
label: ".data",
kind: languages.CompletionItemKind.Keyword,
insertText: "data",
},
] as languages.CompletionItem[];
languages.register({ id: "psoasm" });
languages.setMonarchTokensProvider("psoasm", ASM_SYNTAX);
languages.registerCompletionItemProvider("psoasm", {
@ -78,6 +91,8 @@ languages.registerCompletionItemProvider("psoasm", {
});
const suggestions = /^\s*([a-z][a-z0-9_=<>!]*)?$/.test(value)
? INSTRUCTION_SUGGESTIONS
: /^\s*\.[a-z]+$/.test(value)
? KEYWORD_SUGGESTIONS
: [];
return {