Dynamically referenced object code segments are now parsed correctly.

This commit is contained in:
Daan Vanden Bosch 2019-08-05 16:56:39 +02:00
parent 1ba7d3b6a7
commit 2fc55cdc1a
7 changed files with 163 additions and 297 deletions

View File

@ -832,6 +832,16 @@ opcodes:
reg_tup: # TODO: determine type and access reg_tup: # TODO: determine type and access
- type: any - type: any
access: read access: read
- type: any
access: read
- type: any
access: read
- type: instruction_label
access: read
- type: any
access: read
- type: any
access: read
- code: 0x68 - code: 0x68
mnemonic: create_pipe mnemonic: create_pipe

View File

@ -33,6 +33,9 @@ import { Cursor } from "../../cursor/Cursor";
import { ResizableBufferCursor } from "../../cursor/ResizableBufferCursor"; import { ResizableBufferCursor } from "../../cursor/ResizableBufferCursor";
import { WritableCursor } from "../../cursor/WritableCursor"; import { WritableCursor } from "../../cursor/WritableCursor";
import { ResizableBuffer } from "../../ResizableBuffer"; import { ResizableBuffer } from "../../ResizableBuffer";
import { ControlFlowGraph } from "../../../scripting/data_flow_analysis/ControlFlowGraph";
import { register_values } from "../../../scripting/data_flow_analysis/register_values";
import { disassemble } from "../../../scripting/disassembly";
const logger = Logger.get("data_formats/parsing/quest/bin"); const logger = Logger.get("data_formats/parsing/quest/bin");
@ -231,17 +234,7 @@ function parse_object_code(
): Segment[] { ): Segment[] {
const offset_to_segment = new Map<number, Segment>(); const offset_to_segment = new Map<number, Segment>();
// Recursively parse segments from the entry points. find_and_parse_segments(cursor, label_holder, entry_labels, offset_to_segment, lenient);
for (const entry_label of entry_labels) {
parse_segment(
offset_to_segment,
label_holder,
cursor,
entry_label,
SegmentType.Instructions,
lenient
);
}
const segments: Segment[] = []; const segments: Segment[] = [];
@ -339,6 +332,75 @@ function parse_object_code(
return segments; return segments;
} }
function find_and_parse_segments(
cursor: Cursor,
label_holder: LabelHolder,
entry_labels: number[],
offset_to_segment: Map<number, Segment>,
lenient: boolean
) {
let start_segment_count: number;
// Iteratively parse segments from entry points.
do {
start_segment_count = offset_to_segment.size;
for (const entry_label of entry_labels) {
parse_segment(
offset_to_segment,
label_holder,
cursor,
entry_label,
SegmentType.Instructions,
lenient
);
}
// Determine dynamically set entry points.
const sorted_segments = [...offset_to_segment.entries()]
.filter(([, s]) => s.type === SegmentType.Instructions)
.sort(([a], [b]) => a - b)
.map(([, s]) => s as InstructionSegment);
const cfg = ControlFlowGraph.create(sorted_segments);
entry_labels = [];
for (const segment of sorted_segments) {
for (const instruction of segment.instructions) {
if (instruction.opcode.stack) {
continue;
}
const len = Math.min(instruction.opcode.params.length, instruction.args.length);
for (let i = 0; i < len; i++) {
const param = instruction.opcode.params[i];
const arg = instruction.args[i];
if (param.type instanceof RegTupRefType) {
for (let j = 0; j < param.type.register_tuples.length; j++) {
const reg_tup = param.type.register_tuples[j];
if (reg_tup.type === TYPE_I_LABEL) {
const label_values = register_values(
cfg,
instruction,
arg.value + j
);
if (label_values.size() <= 10) {
entry_labels.push(...label_values);
}
}
}
}
}
}
}
} while (offset_to_segment.size > start_segment_count);
}
function parse_segment( function parse_segment(
offset_to_segment: Map<number, Segment>, offset_to_segment: Map<number, Segment>,
label_holder: LabelHolder, label_holder: LabelHolder,
@ -459,7 +521,7 @@ function parse_instructions_segment(
} }
} }
// Recurse on label references. // Recurse on static label references.
const stack: Arg[] = []; const stack: Arg[] = [];
for (const instruction of instructions) { for (const instruction of instructions) {
@ -523,8 +585,17 @@ function parse_instructions_segment(
} }
// Recurse on label drop-through. // Recurse on label drop-through.
if (next_label != undefined && instructions.length) { if (next_label != undefined) {
const last_opcode = instructions[instructions.length - 1].opcode; // Find the first non-nop.
let last_opcode: Opcode | undefined;
for (let i = instructions.length - 1; i >= 0; i--) {
last_opcode = instructions[i].opcode;
if (last_opcode !== Opcode.NOP) {
break;
}
}
if (last_opcode !== Opcode.RET && last_opcode !== Opcode.JMP) { if (last_opcode !== Opcode.RET && last_opcode !== Opcode.JMP) {
parse_segment( parse_segment(

View File

@ -446,6 +446,7 @@ class Assembler {
} }
} else { } else {
// Stack arguments. // Stack arguments.
// TODO: take into account that stack arguments can come from registers (arg_pushr).
const stack_args: Arg[] = []; const stack_args: Arg[] = [];
if (!this.parse_args(opcode.params, stack_args)) { if (!this.parse_args(opcode.params, stack_args)) {

View File

@ -25,9 +25,11 @@ export class BasicBlock {
) {} ) {}
link_to(other: BasicBlock): void { link_to(other: BasicBlock): void {
if (!this.to.includes(other)) {
this.to.push(other); this.to.push(other);
other.from.push(this); other.from.push(this);
} }
}
} }
export class ControlFlowGraph { export class ControlFlowGraph {
@ -200,286 +202,47 @@ export class ControlFlowGraph {
/** /**
* Links returning blocks to their callers. * Links returning blocks to their callers.
*
* @param ret Block the caller should return to.
* @param caller Calling block.
*/ */
function link_returning_blocks( function link_returning_blocks(
label_blocks: Map<number, BasicBlock>, label_blocks: Map<number, BasicBlock>,
ret: BasicBlock, ret: BasicBlock,
block: BasicBlock caller: BasicBlock
): void { ): void {
for (const label of block.branch_labels) { for (const label of caller.branch_labels) {
const sub_block = label_blocks.get(label); const callee = label_blocks.get(label);
if (sub_block) { if (callee) {
if (sub_block.branch_type === BranchType.Return) { if (callee.branch_type === BranchType.Return) {
sub_block.link_to(ret); callee.link_to(ret);
}
link_returning_blocks(label_blocks, ret, sub_block);
}
}
}
/////////////////
// Crap: //
/////////////////
class DfState {
private registers: DataView;
constructor(other?: DfState) {
if (other) {
this.registers = new DataView(other.registers.buffer.slice(0));
} else { } else {
this.registers = new DataView(new ArrayBuffer(2 * 4 * 256)); link_returning_blocks_recurse(new Set(), ret, callee);
} }
} }
get_min(register: number): number {
return this.registers.getInt32(2 * register);
} }
get_max(register: number): number {
return this.registers.getInt32(2 * register + 1);
}
set(register: number, min: number, max: number): void {
this.registers.setInt32(2 * register, min);
this.registers.setInt32(2 * register + 1, max);
}
// getf(register: number): number {
// return this.registers.getFloat32(2 * register);
// }
// setf(register: number, value: number): void {
// this.registers.setFloat32(2 * register, value);
// this.registers.setFloat32(2 * register + 1, value);
// }
} }
/** /**
* @param segments mapping of labels to segments. * @param encountered For avoiding infinite loops.
*/ */
function data_flow( function link_returning_blocks_recurse(
label_holder: any, encountered: Set<BasicBlock>,
segments: Map<number, Segment>, ret: BasicBlock,
entry_label: number, block: BasicBlock
entry_state: DfState
): void { ): void {
const segment = segments.get(entry_label); if (encountered.has(block)) {
if (!segment || segment.type !== SegmentType.Instructions) return; return;
} else {
let out_states: DfState[] = [new DfState(entry_state)]; encountered.add(block);
for (const instruction of segment.instructions) {
const args = instruction.args;
for (const state of out_states) {
switch (instruction.opcode) {
case Opcode.LET:
case Opcode.FLET:
state.set(
args[0].value,
state.get_min(args[1].value),
state.get_max(args[1].value)
);
break;
case Opcode.LETI:
case Opcode.LETB:
case Opcode.LETW:
case Opcode.LETA:
case Opcode.SYNC_LETI:
case Opcode.SYNC_REGISTER:
state.set(args[0].value, args[1].value, args[1].value);
break;
case Opcode.LETO:
{
const info = label_holder.get_info(args[1].value);
state.set(args[0].value, info ? info.offset : 0, info ? info.offset : 0);
}
break;
case Opcode.SET:
state.set(args[0].value, 1, 1);
break;
case Opcode.CLEAR:
state.set(args[0].value, 0, 0);
break;
case Opcode.LETI:
case Opcode.LETB:
case Opcode.LETW:
case Opcode.LETA:
case Opcode.SYNC_LETI:
case Opcode.SYNC_REGISTER:
state.set(args[0].value, args[1].value, args[1].value);
break;
// case Opcode.fleti:
// state.setf(args[0].value, args[1].value);
// break;
case Opcode.REV:
{
const reg = args[0].value;
const max = state.get_min(reg) <= 0 && state.get_max(reg) >= 0 ? 1 : 0;
const min = state.get_min(reg) === 0 && state.get_max(reg) === 0 ? 1 : 0;
state.set(reg, min, max);
}
break;
// case Opcode.add:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) + state.get_min(args[1].value));
// }
// break;
// case Opcode.addi:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) + args[1].value);
// }
// break;
// case Opcode.sub:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) - state.get_min(args[1].value));
// }
// break;
// case Opcode.subi:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) - args[1].value);
// }
// break;
// case Opcode.mul:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) * state.get_min(args[1].value));
// }
// break;
// case Opcode.muli:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) * args[1].value);
// }
// break;
// case Opcode.div:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) / state.get_min(args[1].value));
// }
// break;
// case Opcode.divi:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) / args[1].value);
// }
// break;
// case Opcode.and:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) & state.get_min(args[1].value));
// }
// break;
// case Opcode.andi:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) & args[1].value);
// }
// break;
// case Opcode.or:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) | state.get_min(args[1].value));
// }
// break;
// case Opcode.ori:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) | args[1].value);
// }
// break;
// case Opcode.xor:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) ^ state.get_min(args[1].value));
// }
// break;
// case Opcode.xori:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) ^ args[1].value);
// }
// break;
// case Opcode.mod:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) % state.get_min(args[1].value));
// }
// break;
// case Opcode.modi:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) % args[1].value);
// }
// break;
// case Opcode.shift_left:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) << state.get_min(args[1].value));
// }
// break;
// case Opcode.shift_right:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) >> state.get_min(args[1].value));
// }
// break;
// case Opcode.fadd:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) + state.getf(args[1].value));
// }
// break;
// case Opcode.faddi:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) + args[1].value);
// }
// break;
// case Opcode.fsub:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) - state.getf(args[1].value));
// }
// break;
// case Opcode.fsubi:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) - args[1].value);
// }
// break;
// case Opcode.fmul:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) * state.getf(args[1].value));
// }
// break;
// case Opcode.fmuli:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) * args[1].value);
// }
// break;
// case Opcode.fdiv:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) / state.getf(args[1].value));
// }
// break;
// case Opcode.fdivi:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) / args[1].value);
// }
// break;
} }
for (const to_block of block.to) {
if (to_block.branch_type === BranchType.Return) {
to_block.link_to(ret);
} else {
link_returning_blocks_recurse(encountered, ret, to_block);
} }
} }
} }

View File

@ -2,6 +2,9 @@ import { Instruction } from "../instructions";
import { Opcode, ParamAccess, RegTupRefType } from "../opcodes"; import { Opcode, ParamAccess, RegTupRefType } from "../opcodes";
import { BasicBlock, ControlFlowGraph } from "./ControlFlowGraph"; import { BasicBlock, ControlFlowGraph } from "./ControlFlowGraph";
import { ValueSet } from "./ValueSet"; import { ValueSet } from "./ValueSet";
import Logger from "js-logger";
const logger = Logger.get("scripting/data_flow_analysis");
export const MIN_REGISTER_VALUE = -Math.pow(2, 31); export const MIN_REGISTER_VALUE = -Math.pow(2, 31);
export const MAX_REGISTER_VALUE = Math.pow(2, 31) - 1; export const MAX_REGISTER_VALUE = Math.pow(2, 31) - 1;
@ -28,13 +31,18 @@ export function register_values(
inst_idx++; inst_idx++;
} }
return find_values(new Set(), block, inst_idx, register); return find_values(new Context(), new Set(), block, inst_idx, register);
} else { } else {
return new ValueSet(); return new ValueSet();
} }
} }
class Context {
iterations = 0;
}
function find_values( function find_values(
ctx: Context,
path: Set<BasicBlock>, path: Set<BasicBlock>,
block: BasicBlock, block: BasicBlock,
end: number, end: number,
@ -42,6 +50,12 @@ function find_values(
): ValueSet { ): ValueSet {
let values = new ValueSet(); let values = new ValueSet();
if (++ctx.iterations > 1000) {
logger.warn("Too many iterations.");
values.set_interval(MIN_REGISTER_VALUE, MAX_REGISTER_VALUE);
return values;
}
for (let i = block.start; i < end; i++) { for (let i = block.start; i < end; i++) {
const instruction = block.segment.instructions[i]; const instruction = block.segment.instructions[i];
const args = instruction.args; const args = instruction.args;
@ -49,7 +63,7 @@ function find_values(
switch (instruction.opcode) { switch (instruction.opcode) {
case Opcode.LET: case Opcode.LET:
if (args[0].value === register) { if (args[0].value === register) {
values = find_values(new Set(path), block, i, args[1].value); values = find_values(ctx, new Set(path), block, i, args[1].value);
} }
break; break;
case Opcode.LETI: case Opcode.LETI:
@ -72,7 +86,7 @@ function find_values(
break; break;
case Opcode.REV: case Opcode.REV:
if (args[0].value === register) { if (args[0].value === register) {
const prev_vals = find_values(new Set(path), block, i, register); const prev_vals = find_values(ctx, new Set(path), block, i, register);
const prev_size = prev_vals.size(); const prev_size = prev_vals.size();
if (prev_size === 0 || (prev_size === 1 && prev_vals.get(0) === 0)) { if (prev_size === 0 || (prev_size === 1 && prev_vals.get(0) === 0)) {
@ -86,25 +100,25 @@ function find_values(
break; break;
case Opcode.ADDI: case Opcode.ADDI:
if (args[0].value === register) { if (args[0].value === register) {
values = find_values(new Set(path), block, i, register); values = find_values(ctx, new Set(path), block, i, register);
values.scalar_add(args[1].value); values.scalar_add(args[1].value);
} }
break; break;
case Opcode.SUBI: case Opcode.SUBI:
if (args[0].value === register) { if (args[0].value === register) {
values = find_values(new Set(path), block, i, register); values = find_values(ctx, new Set(path), block, i, register);
values.scalar_sub(args[1].value); values.scalar_sub(args[1].value);
} }
break; break;
case Opcode.MULI: case Opcode.MULI:
if (args[0].value === register) { if (args[0].value === register) {
values = find_values(new Set(path), block, i, register); values = find_values(ctx, new Set(path), block, i, register);
values.scalar_mul(args[1].value); values.scalar_mul(args[1].value);
} }
break; break;
case Opcode.DIVI: case Opcode.DIVI:
if (args[0].value === register) { if (args[0].value === register) {
values = find_values(new Set(path), block, i, register); values = find_values(ctx, new Set(path), block, i, register);
values.scalar_div(args[1].value); values.scalar_div(args[1].value);
} }
break; break;
@ -122,9 +136,9 @@ function find_values(
case Opcode.GET_RANDOM: case Opcode.GET_RANDOM:
if (args[1].value === register) { if (args[1].value === register) {
// TODO: undefined values. // TODO: undefined values.
const min = find_values(new Set(path), block, i, args[0].value).min() || 0; const min = find_values(ctx, new Set(path), block, i, args[0].value).min() || 0;
const max = Math.max( const max = Math.max(
find_values(new Set(path), block, i, args[0].value + 1).max() || 0, find_values(ctx, new Set(path), block, i, args[0].value + 1).max() || 0,
min + 1 min + 1
); );
values.set_interval(min, max - 1); values.set_interval(min, max - 1);
@ -154,7 +168,7 @@ function find_values(
const reg_ref = args[j].value; const reg_ref = args[j].value;
let k = 0; let k = 0;
for (const reg_param of param.type.registers) { for (const reg_param of param.type.register_tuples) {
if ( if (
(reg_param.access === ParamAccess.Write || (reg_param.access === ParamAccess.Write ||
reg_param.access === ParamAccess.ReadWrite) && reg_param.access === ParamAccess.ReadWrite) &&
@ -183,7 +197,7 @@ function find_values(
break; break;
} }
values.union(find_values(new Set(path), from, from.end, register)); values.union(find_values(ctx, new Set(path), from, from.end, register));
} }
} }

View File

@ -73,11 +73,11 @@ class RegRefType extends RefType {}
* The only parameterized type. * The only parameterized type.
*/ */
export class RegTupRefType extends RefType { export class RegTupRefType extends RefType {
readonly registers: Param[]; readonly register_tuples: Param[];
constructor(...registers: Param[]) { constructor(...register_tuples: Param[]) {
super(); super();
this.registers = registers; this.register_tuples = register_tuples;
} }
} }
@ -1396,7 +1396,14 @@ export class Opcode {
undefined, undefined,
[ [
new Param( new Param(
new RegTupRefType(new Param(TYPE_ANY, undefined, ParamAccess.Read)), new RegTupRefType(
new Param(TYPE_ANY, undefined, ParamAccess.Read),
new Param(TYPE_ANY, undefined, ParamAccess.Read),
new Param(TYPE_ANY, undefined, ParamAccess.Read),
new Param(TYPE_I_LABEL, undefined, ParamAccess.Read),
new Param(TYPE_ANY, undefined, ParamAccess.Read),
new Param(TYPE_ANY, undefined, ParamAccess.Read)
),
undefined, undefined,
undefined undefined
), ),

View File

@ -8,7 +8,7 @@ const Dotenv = require("dotenv-webpack");
module.exports = merge(common, { module.exports = merge(common, {
mode: "development", mode: "development",
devtool: "eval-source-map", devtool: "inline-cheap-source-map",
module: { module: {
rules: [ rules: [
{ {