Dynamically referenced object code segments are now parsed correctly.

This commit is contained in:
Daan Vanden Bosch 2019-08-05 16:56:39 +02:00
parent 1ba7d3b6a7
commit 2fc55cdc1a
7 changed files with 163 additions and 297 deletions

View File

@ -832,6 +832,16 @@ opcodes:
reg_tup: # TODO: determine type and access
- type: any
access: read
- type: any
access: read
- type: any
access: read
- type: instruction_label
access: read
- type: any
access: read
- type: any
access: read
- code: 0x68
mnemonic: create_pipe

View File

@ -33,6 +33,9 @@ import { Cursor } from "../../cursor/Cursor";
import { ResizableBufferCursor } from "../../cursor/ResizableBufferCursor";
import { WritableCursor } from "../../cursor/WritableCursor";
import { ResizableBuffer } from "../../ResizableBuffer";
import { ControlFlowGraph } from "../../../scripting/data_flow_analysis/ControlFlowGraph";
import { register_values } from "../../../scripting/data_flow_analysis/register_values";
import { disassemble } from "../../../scripting/disassembly";
const logger = Logger.get("data_formats/parsing/quest/bin");
@ -231,17 +234,7 @@ function parse_object_code(
): Segment[] {
const offset_to_segment = new Map<number, Segment>();
// Recursively parse segments from the entry points.
for (const entry_label of entry_labels) {
parse_segment(
offset_to_segment,
label_holder,
cursor,
entry_label,
SegmentType.Instructions,
lenient
);
}
find_and_parse_segments(cursor, label_holder, entry_labels, offset_to_segment, lenient);
const segments: Segment[] = [];
@ -339,6 +332,75 @@ function parse_object_code(
return segments;
}
function find_and_parse_segments(
cursor: Cursor,
label_holder: LabelHolder,
entry_labels: number[],
offset_to_segment: Map<number, Segment>,
lenient: boolean
) {
let start_segment_count: number;
// Iteratively parse segments from entry points.
do {
start_segment_count = offset_to_segment.size;
for (const entry_label of entry_labels) {
parse_segment(
offset_to_segment,
label_holder,
cursor,
entry_label,
SegmentType.Instructions,
lenient
);
}
// Determine dynamically set entry points.
const sorted_segments = [...offset_to_segment.entries()]
.filter(([, s]) => s.type === SegmentType.Instructions)
.sort(([a], [b]) => a - b)
.map(([, s]) => s as InstructionSegment);
const cfg = ControlFlowGraph.create(sorted_segments);
entry_labels = [];
for (const segment of sorted_segments) {
for (const instruction of segment.instructions) {
if (instruction.opcode.stack) {
continue;
}
const len = Math.min(instruction.opcode.params.length, instruction.args.length);
for (let i = 0; i < len; i++) {
const param = instruction.opcode.params[i];
const arg = instruction.args[i];
if (param.type instanceof RegTupRefType) {
for (let j = 0; j < param.type.register_tuples.length; j++) {
const reg_tup = param.type.register_tuples[j];
if (reg_tup.type === TYPE_I_LABEL) {
const label_values = register_values(
cfg,
instruction,
arg.value + j
);
if (label_values.size() <= 10) {
entry_labels.push(...label_values);
}
}
}
}
}
}
}
} while (offset_to_segment.size > start_segment_count);
}
function parse_segment(
offset_to_segment: Map<number, Segment>,
label_holder: LabelHolder,
@ -459,7 +521,7 @@ function parse_instructions_segment(
}
}
// Recurse on label references.
// Recurse on static label references.
const stack: Arg[] = [];
for (const instruction of instructions) {
@ -523,8 +585,17 @@ function parse_instructions_segment(
}
// Recurse on label drop-through.
if (next_label != undefined && instructions.length) {
const last_opcode = instructions[instructions.length - 1].opcode;
if (next_label != undefined) {
// Find the first non-nop.
let last_opcode: Opcode | undefined;
for (let i = instructions.length - 1; i >= 0; i--) {
last_opcode = instructions[i].opcode;
if (last_opcode !== Opcode.NOP) {
break;
}
}
if (last_opcode !== Opcode.RET && last_opcode !== Opcode.JMP) {
parse_segment(

View File

@ -446,6 +446,7 @@ class Assembler {
}
} else {
// Stack arguments.
// TODO: take into account that stack arguments can come from registers (arg_pushr).
const stack_args: Arg[] = [];
if (!this.parse_args(opcode.params, stack_args)) {

View File

@ -25,8 +25,10 @@ export class BasicBlock {
) {}
link_to(other: BasicBlock): void {
this.to.push(other);
other.from.push(this);
if (!this.to.includes(other)) {
this.to.push(other);
other.from.push(this);
}
}
}
@ -200,286 +202,47 @@ export class ControlFlowGraph {
/**
* Links returning blocks to their callers.
*
* @param ret Block the caller should return to.
* @param caller Calling block.
*/
function link_returning_blocks(
label_blocks: Map<number, BasicBlock>,
ret: BasicBlock,
block: BasicBlock
caller: BasicBlock
): void {
for (const label of block.branch_labels) {
const sub_block = label_blocks.get(label);
for (const label of caller.branch_labels) {
const callee = label_blocks.get(label);
if (sub_block) {
if (sub_block.branch_type === BranchType.Return) {
sub_block.link_to(ret);
if (callee) {
if (callee.branch_type === BranchType.Return) {
callee.link_to(ret);
} else {
link_returning_blocks_recurse(new Set(), ret, callee);
}
link_returning_blocks(label_blocks, ret, sub_block);
}
}
}
/////////////////
// Crap: //
/////////////////
class DfState {
private registers: DataView;
constructor(other?: DfState) {
if (other) {
this.registers = new DataView(other.registers.buffer.slice(0));
} else {
this.registers = new DataView(new ArrayBuffer(2 * 4 * 256));
}
}
get_min(register: number): number {
return this.registers.getInt32(2 * register);
}
get_max(register: number): number {
return this.registers.getInt32(2 * register + 1);
}
set(register: number, min: number, max: number): void {
this.registers.setInt32(2 * register, min);
this.registers.setInt32(2 * register + 1, max);
}
// getf(register: number): number {
// return this.registers.getFloat32(2 * register);
// }
// setf(register: number, value: number): void {
// this.registers.setFloat32(2 * register, value);
// this.registers.setFloat32(2 * register + 1, value);
// }
}
/**
* @param segments mapping of labels to segments.
* @param encountered For avoiding infinite loops.
*/
function data_flow(
label_holder: any,
segments: Map<number, Segment>,
entry_label: number,
entry_state: DfState
function link_returning_blocks_recurse(
encountered: Set<BasicBlock>,
ret: BasicBlock,
block: BasicBlock
): void {
const segment = segments.get(entry_label);
if (!segment || segment.type !== SegmentType.Instructions) return;
if (encountered.has(block)) {
return;
} else {
encountered.add(block);
}
let out_states: DfState[] = [new DfState(entry_state)];
for (const instruction of segment.instructions) {
const args = instruction.args;
for (const state of out_states) {
switch (instruction.opcode) {
case Opcode.LET:
case Opcode.FLET:
state.set(
args[0].value,
state.get_min(args[1].value),
state.get_max(args[1].value)
);
break;
case Opcode.LETI:
case Opcode.LETB:
case Opcode.LETW:
case Opcode.LETA:
case Opcode.SYNC_LETI:
case Opcode.SYNC_REGISTER:
state.set(args[0].value, args[1].value, args[1].value);
break;
case Opcode.LETO:
{
const info = label_holder.get_info(args[1].value);
state.set(args[0].value, info ? info.offset : 0, info ? info.offset : 0);
}
break;
case Opcode.SET:
state.set(args[0].value, 1, 1);
break;
case Opcode.CLEAR:
state.set(args[0].value, 0, 0);
break;
case Opcode.LETI:
case Opcode.LETB:
case Opcode.LETW:
case Opcode.LETA:
case Opcode.SYNC_LETI:
case Opcode.SYNC_REGISTER:
state.set(args[0].value, args[1].value, args[1].value);
break;
// case Opcode.fleti:
// state.setf(args[0].value, args[1].value);
// break;
case Opcode.REV:
{
const reg = args[0].value;
const max = state.get_min(reg) <= 0 && state.get_max(reg) >= 0 ? 1 : 0;
const min = state.get_min(reg) === 0 && state.get_max(reg) === 0 ? 1 : 0;
state.set(reg, min, max);
}
break;
// case Opcode.add:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) + state.get_min(args[1].value));
// }
// break;
// case Opcode.addi:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) + args[1].value);
// }
// break;
// case Opcode.sub:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) - state.get_min(args[1].value));
// }
// break;
// case Opcode.subi:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) - args[1].value);
// }
// break;
// case Opcode.mul:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) * state.get_min(args[1].value));
// }
// break;
// case Opcode.muli:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) * args[1].value);
// }
// break;
// case Opcode.div:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) / state.get_min(args[1].value));
// }
// break;
// case Opcode.divi:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) / args[1].value);
// }
// break;
// case Opcode.and:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) & state.get_min(args[1].value));
// }
// break;
// case Opcode.andi:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) & args[1].value);
// }
// break;
// case Opcode.or:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) | state.get_min(args[1].value));
// }
// break;
// case Opcode.ori:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) | args[1].value);
// }
// break;
// case Opcode.xor:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) ^ state.get_min(args[1].value));
// }
// break;
// case Opcode.xori:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) ^ args[1].value);
// }
// break;
// case Opcode.mod:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) % state.get_min(args[1].value));
// }
// break;
// case Opcode.modi:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) % args[1].value);
// }
// break;
// case Opcode.shift_left:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) << state.get_min(args[1].value));
// }
// break;
// case Opcode.shift_right:
// {
// const reg = args[0].value;
// state.set(reg, state.get_min(reg) >> state.get_min(args[1].value));
// }
// break;
// case Opcode.fadd:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) + state.getf(args[1].value));
// }
// break;
// case Opcode.faddi:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) + args[1].value);
// }
// break;
// case Opcode.fsub:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) - state.getf(args[1].value));
// }
// break;
// case Opcode.fsubi:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) - args[1].value);
// }
// break;
// case Opcode.fmul:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) * state.getf(args[1].value));
// }
// break;
// case Opcode.fmuli:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) * args[1].value);
// }
// break;
// case Opcode.fdiv:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) / state.getf(args[1].value));
// }
// break;
// case Opcode.fdivi:
// {
// const reg = args[0].value;
// state.setf(reg, state.getf(reg) / args[1].value);
// }
// break;
}
for (const to_block of block.to) {
if (to_block.branch_type === BranchType.Return) {
to_block.link_to(ret);
} else {
link_returning_blocks_recurse(encountered, ret, to_block);
}
}
}

View File

@ -2,6 +2,9 @@ import { Instruction } from "../instructions";
import { Opcode, ParamAccess, RegTupRefType } from "../opcodes";
import { BasicBlock, ControlFlowGraph } from "./ControlFlowGraph";
import { ValueSet } from "./ValueSet";
import Logger from "js-logger";
const logger = Logger.get("scripting/data_flow_analysis");
export const MIN_REGISTER_VALUE = -Math.pow(2, 31);
export const MAX_REGISTER_VALUE = Math.pow(2, 31) - 1;
@ -28,13 +31,18 @@ export function register_values(
inst_idx++;
}
return find_values(new Set(), block, inst_idx, register);
return find_values(new Context(), new Set(), block, inst_idx, register);
} else {
return new ValueSet();
}
}
class Context {
iterations = 0;
}
function find_values(
ctx: Context,
path: Set<BasicBlock>,
block: BasicBlock,
end: number,
@ -42,6 +50,12 @@ function find_values(
): ValueSet {
let values = new ValueSet();
if (++ctx.iterations > 1000) {
logger.warn("Too many iterations.");
values.set_interval(MIN_REGISTER_VALUE, MAX_REGISTER_VALUE);
return values;
}
for (let i = block.start; i < end; i++) {
const instruction = block.segment.instructions[i];
const args = instruction.args;
@ -49,7 +63,7 @@ function find_values(
switch (instruction.opcode) {
case Opcode.LET:
if (args[0].value === register) {
values = find_values(new Set(path), block, i, args[1].value);
values = find_values(ctx, new Set(path), block, i, args[1].value);
}
break;
case Opcode.LETI:
@ -72,7 +86,7 @@ function find_values(
break;
case Opcode.REV:
if (args[0].value === register) {
const prev_vals = find_values(new Set(path), block, i, register);
const prev_vals = find_values(ctx, new Set(path), block, i, register);
const prev_size = prev_vals.size();
if (prev_size === 0 || (prev_size === 1 && prev_vals.get(0) === 0)) {
@ -86,25 +100,25 @@ function find_values(
break;
case Opcode.ADDI:
if (args[0].value === register) {
values = find_values(new Set(path), block, i, register);
values = find_values(ctx, new Set(path), block, i, register);
values.scalar_add(args[1].value);
}
break;
case Opcode.SUBI:
if (args[0].value === register) {
values = find_values(new Set(path), block, i, register);
values = find_values(ctx, new Set(path), block, i, register);
values.scalar_sub(args[1].value);
}
break;
case Opcode.MULI:
if (args[0].value === register) {
values = find_values(new Set(path), block, i, register);
values = find_values(ctx, new Set(path), block, i, register);
values.scalar_mul(args[1].value);
}
break;
case Opcode.DIVI:
if (args[0].value === register) {
values = find_values(new Set(path), block, i, register);
values = find_values(ctx, new Set(path), block, i, register);
values.scalar_div(args[1].value);
}
break;
@ -122,9 +136,9 @@ function find_values(
case Opcode.GET_RANDOM:
if (args[1].value === register) {
// TODO: undefined values.
const min = find_values(new Set(path), block, i, args[0].value).min() || 0;
const min = find_values(ctx, new Set(path), block, i, args[0].value).min() || 0;
const max = Math.max(
find_values(new Set(path), block, i, args[0].value + 1).max() || 0,
find_values(ctx, new Set(path), block, i, args[0].value + 1).max() || 0,
min + 1
);
values.set_interval(min, max - 1);
@ -154,7 +168,7 @@ function find_values(
const reg_ref = args[j].value;
let k = 0;
for (const reg_param of param.type.registers) {
for (const reg_param of param.type.register_tuples) {
if (
(reg_param.access === ParamAccess.Write ||
reg_param.access === ParamAccess.ReadWrite) &&
@ -183,7 +197,7 @@ function find_values(
break;
}
values.union(find_values(new Set(path), from, from.end, register));
values.union(find_values(ctx, new Set(path), from, from.end, register));
}
}

View File

@ -73,11 +73,11 @@ class RegRefType extends RefType {}
* The only parameterized type.
*/
export class RegTupRefType extends RefType {
readonly registers: Param[];
readonly register_tuples: Param[];
constructor(...registers: Param[]) {
constructor(...register_tuples: Param[]) {
super();
this.registers = registers;
this.register_tuples = register_tuples;
}
}
@ -1396,7 +1396,14 @@ export class Opcode {
undefined,
[
new Param(
new RegTupRefType(new Param(TYPE_ANY, undefined, ParamAccess.Read)),
new RegTupRefType(
new Param(TYPE_ANY, undefined, ParamAccess.Read),
new Param(TYPE_ANY, undefined, ParamAccess.Read),
new Param(TYPE_ANY, undefined, ParamAccess.Read),
new Param(TYPE_I_LABEL, undefined, ParamAccess.Read),
new Param(TYPE_ANY, undefined, ParamAccess.Read),
new Param(TYPE_ANY, undefined, ParamAccess.Read)
),
undefined,
undefined
),

View File

@ -8,7 +8,7 @@ const Dotenv = require("dotenv-webpack");
module.exports = merge(common, {
mode: "development",
devtool: "eval-source-map",
devtool: "inline-cheap-source-map",
module: {
rules: [
{