Fixed bugs in DFA code. All dynamic label references are now detected unless they're computed in a way which is too complex to analyse at the moment.

This commit is contained in:
Daan Vanden Bosch 2019-08-06 01:14:57 +02:00
parent 9284cf4a8a
commit 054b1c99fb
6 changed files with 307 additions and 181 deletions

View File

@ -1,7 +1,7 @@
import Logger from "js-logger";
import { Endianness } from "../..";
import { ControlFlowGraph } from "../../../scripting/data_flow_analysis/ControlFlowGraph";
import { register_values } from "../../../scripting/data_flow_analysis/register_values";
import { register_value } from "../../../scripting/data_flow_analysis/register_value";
import {
Arg,
DataSegment,
@ -35,6 +35,9 @@ import { Cursor } from "../../cursor/Cursor";
import { ResizableBufferCursor } from "../../cursor/ResizableBufferCursor";
import { WritableCursor } from "../../cursor/WritableCursor";
import { ResizableBuffer } from "../../ResizableBuffer";
import { stack_value } from "../../../scripting/data_flow_analysis/stack_value";
// TODO: correctly deal with stack floats (they're pushed with arg_pushl)
const logger = Logger.get("data_formats/parsing/quest/bin");
@ -233,7 +236,13 @@ function parse_object_code(
): Segment[] {
const offset_to_segment = new Map<number, Segment>();
find_and_parse_segments(cursor, label_holder, entry_labels, offset_to_segment, lenient);
find_and_parse_segments(
cursor,
label_holder,
entry_labels.reduce((m, l) => m.set(l, SegmentType.Instructions), new Map()),
offset_to_segment,
lenient
);
const segments: Segment[] = [];
@ -334,28 +343,21 @@ function parse_object_code(
function find_and_parse_segments(
cursor: Cursor,
label_holder: LabelHolder,
entry_labels: number[],
labels: Map<number, SegmentType>,
offset_to_segment: Map<number, Segment>,
lenient: boolean
) {
let start_segment_count: number;
// Iteratively parse segments from entry points.
// Iteratively parse segments from label references.
do {
start_segment_count = offset_to_segment.size;
for (const entry_label of entry_labels) {
parse_segment(
offset_to_segment,
label_holder,
cursor,
entry_label,
SegmentType.Instructions,
lenient
);
for (const [label, type] of labels) {
parse_segment(offset_to_segment, label_holder, cursor, label, type, lenient);
}
// Determine dynamically set entry points.
// Find label references.
const sorted_segments = [...offset_to_segment.entries()]
.filter(([, s]) => s.type === SegmentType.Instructions)
.sort(([a], [b]) => a - b)
@ -363,36 +365,62 @@ function find_and_parse_segments(
const cfg = ControlFlowGraph.create(sorted_segments);
entry_labels = [];
labels = new Map();
for (const segment of sorted_segments) {
for (const instruction of segment.instructions) {
if (instruction.opcode.stack) {
continue;
}
const len = Math.min(instruction.opcode.params.length, instruction.args.length);
for (let i = 0; i < len; i++) {
for (let i = 0; i < instruction.opcode.params.length; i++) {
const param = instruction.opcode.params[i];
const arg = instruction.args[i];
if (param.type instanceof RegTupRefType) {
for (let j = 0; j < param.type.register_tuples.length; j++) {
const reg_tup = param.type.register_tuples[j];
switch (param.type) {
case TYPE_I_LABEL:
get_arg_label_values(
cfg,
labels,
instruction,
i,
SegmentType.Instructions
);
break;
case TYPE_I_LABEL_VAR:
// Never on the stack.
// Eat all remaining arguments.
for (; i < instruction.args.length; i++) {
labels.set(instruction.args[i].value, SegmentType.Instructions);
}
if (reg_tup.type === TYPE_I_LABEL) {
const label_values = register_values(
cfg,
instruction,
arg.value + j
);
break;
case TYPE_D_LABEL:
get_arg_label_values(cfg, labels, instruction, i, SegmentType.Data);
break;
case TYPE_S_LABEL:
get_arg_label_values(cfg, labels, instruction, i, SegmentType.String);
break;
default:
if (param.type instanceof RegTupRefType) {
// Never on the stack.
const arg = instruction.args[i];
if (label_values.size() <= 10) {
entry_labels.push(...label_values);
for (let j = 0; j < param.type.register_tuples.length; j++) {
const reg_tup = param.type.register_tuples[j];
if (reg_tup.type === TYPE_I_LABEL) {
const label_values = register_value(
cfg,
instruction,
arg.value + j
);
if (label_values.size() <= 10) {
for (const label of label_values) {
labels.set(label, SegmentType.Instructions);
}
}
}
}
}
}
break;
}
}
}
@ -400,6 +428,45 @@ function find_and_parse_segments(
} while (offset_to_segment.size > start_segment_count);
}
/**
* @returns immediate arguments or stack arguments.
*/
function get_arg_label_values(
cfg: ControlFlowGraph,
labels: Map<number, SegmentType>,
instruction: Instruction,
param_idx: number,
segment_type: SegmentType
): void {
if (instruction.opcode.stack === StackInteraction.Pop) {
const stack_values = stack_value(
cfg,
instruction,
instruction.opcode.params.length - param_idx - 1
);
if (stack_values.size() <= 10) {
for (const value of stack_values) {
const old_type = labels.get(value);
if (
old_type == undefined ||
SEGMENT_PRIORITY[segment_type] > SEGMENT_PRIORITY[old_type]
) {
labels.set(value, segment_type);
}
}
}
} else {
const value = instruction.args[param_idx].value;
const old_type = labels.get(value);
if (old_type == undefined || SEGMENT_PRIORITY[segment_type] > SEGMENT_PRIORITY[old_type]) {
labels.set(value, segment_type);
}
}
}
function parse_segment(
offset_to_segment: Map<number, Segment>,
label_holder: LabelHolder,
@ -520,70 +587,6 @@ function parse_instructions_segment(
}
}
// Recurse on static label references.
const stack: Arg[] = [];
for (const instruction of instructions) {
const opcode = instruction.opcode;
const params = opcode.params;
const args =
opcode.stack === StackInteraction.Pop
? stack.splice(stack.length - params.length, params.length)
: instruction.args;
if (opcode.stack === StackInteraction.Push) {
// TODO: correctly deal with arg_pushr.
stack.push(...args);
} else {
const len = Math.min(params.length, args.length);
for (let i = 0; i < len; i++) {
const param_type = params[i].type;
const label = args[i].value;
let segment_type: SegmentType;
switch (param_type) {
case TYPE_I_LABEL:
segment_type = SegmentType.Instructions;
break;
case TYPE_I_LABEL_VAR:
segment_type = SegmentType.Instructions;
// Eat all remaining arguments.
for (; i < args.length; i++) {
parse_segment(
offset_to_segment,
label_holder,
cursor,
args[i].value,
segment_type,
lenient
);
}
break;
case TYPE_D_LABEL:
segment_type = SegmentType.Data;
break;
case TYPE_S_LABEL:
segment_type = SegmentType.String;
break;
default:
continue;
}
parse_segment(
offset_to_segment,
label_holder,
cursor,
label,
segment_type,
lenient
);
}
}
}
// Recurse on label drop-through.
if (next_label != undefined) {
// Find the first non-nop.

View File

@ -30,6 +30,11 @@ export class BasicBlock {
other.from.push(this);
}
}
index_of_instruction(instruction: Instruction): number {
const index = this.segment.instructions.indexOf(instruction, this.start);
return index < this.end ? index : -1;
}
}
export class ControlFlowGraph {

View File

@ -5,35 +5,35 @@ import { ControlFlowGraph } from "./ControlFlowGraph";
import {
MAX_REGISTER_VALUE,
MIN_REGISTER_VALUE,
register_values,
register_value,
REGISTER_VALUES,
} from "./register_values";
} from "./register_value";
test(`${register_values.name} trivial case`, () => {
test(`trivial case`, () => {
const im = to_instructions(`
0:
ret
`);
const cfg = ControlFlowGraph.create(im);
const values = register_values(cfg, im[0].instructions[0], 6);
const values = register_value(cfg, im[0].instructions[0], 6);
expect(values.size()).toBe(0);
});
test(`${register_values.name} single assignment`, () => {
test(`single assignment`, () => {
const im = to_instructions(`
0:
leti r6, 1337
ret
`);
const cfg = ControlFlowGraph.create(im);
const values = register_values(cfg, im[0].instructions[1], 6);
const values = register_value(cfg, im[0].instructions[1], 6);
expect(values.size()).toBe(1);
expect(values.get(0)).toBe(1337);
});
test(`${register_values.name} two code paths`, () => {
test(`two code paths`, () => {
const im = to_instructions(`
0:
jmp_> r1, r2, 1
@ -45,14 +45,14 @@ test(`${register_values.name} two code paths`, () => {
ret
`);
const cfg = ControlFlowGraph.create(im);
const values = register_values(cfg, im[2].instructions[0], 10);
const values = register_value(cfg, im[2].instructions[0], 10);
expect(values.size()).toBe(2);
expect(values.get(0)).toBe(111);
expect(values.get(1)).toBe(222);
});
test(`${register_values.name} loop`, () => {
test(`loop`, () => {
const im = to_instructions(`
0:
addi r10, 5
@ -60,12 +60,12 @@ test(`${register_values.name} loop`, () => {
ret
`);
const cfg = ControlFlowGraph.create(im);
const values = register_values(cfg, im[0].instructions[2], 10);
const values = register_value(cfg, im[0].instructions[2], 10);
expect(values.size()).toBe(REGISTER_VALUES);
});
test(`${register_values.name} leta and leto`, () => {
test(`leta and leto`, () => {
const im = to_instructions(`
0:
leta r0, r100
@ -73,20 +73,20 @@ test(`${register_values.name} leta and leto`, () => {
ret
`);
const cfg = ControlFlowGraph.create(im);
const r0 = register_values(cfg, im[0].instructions[2], 0);
const r0 = register_value(cfg, im[0].instructions[2], 0);
expect(r0.size()).toBe(REGISTER_VALUES);
expect(r0.min()).toBe(MIN_REGISTER_VALUE);
expect(r0.max()).toBe(MAX_REGISTER_VALUE);
const r1 = register_values(cfg, im[0].instructions[2], 1);
const r1 = register_value(cfg, im[0].instructions[2], 1);
expect(r1.size()).toBe(REGISTER_VALUES);
expect(r1.min()).toBe(MIN_REGISTER_VALUE);
expect(r1.max()).toBe(MAX_REGISTER_VALUE);
});
test(`${register_values.name} rev`, () => {
test(`rev`, () => {
const im = to_instructions(`
0:
leti r0, 10
@ -102,17 +102,17 @@ test(`${register_values.name} rev`, () => {
ret
`);
const cfg = ControlFlowGraph.create(im);
const v0 = register_values(cfg, im[0].instructions[4], 10);
const v0 = register_value(cfg, im[0].instructions[4], 10);
expect(v0.size()).toBe(1);
expect(v0.get(0)).toBe(0);
const v1 = register_values(cfg, im[0].instructions[8], 10);
const v1 = register_value(cfg, im[0].instructions[8], 10);
expect(v1.size()).toBe(2);
expect(v1.to_array()).toEqual([0, 1]);
const v2 = register_values(cfg, im[0].instructions[10], 10);
const v2 = register_value(cfg, im[0].instructions[10], 10);
expect(v2.size()).toBe(1);
expect(v2.get(0)).toBe(1);
@ -123,7 +123,7 @@ test(`${register_values.name} rev`, () => {
* The instruction will be called with arguments r99, 15. r99 will be set to 10 or 20.
*/
function test_branched(opcode: Opcode, ...expected: number[]): void {
test(`${register_values.name} ${opcode.mnemonic}`, () => {
test(opcode.mnemonic, () => {
const im = to_instructions(`
0:
leti r99, 10
@ -134,7 +134,7 @@ function test_branched(opcode: Opcode, ...expected: number[]): void {
ret
`);
const cfg = ControlFlowGraph.create(im);
const values = register_values(cfg, im[1].instructions[1], 99);
const values = register_value(cfg, im[1].instructions[1], 99);
expect(values.size()).toBe(expected.length);
expect(values.to_array()).toEqual(expected);
@ -146,7 +146,7 @@ test_branched(Opcode.SUBI, -5, 5);
test_branched(Opcode.MULI, 150, 300);
test_branched(Opcode.DIVI, 0, 1);
test(`${register_values.name} get_random`, () => {
test(`get_random`, () => {
const im = to_instructions(`
0:
leti r0, 20
@ -159,17 +159,17 @@ test(`${register_values.name} get_random`, () => {
ret
`);
const cfg = ControlFlowGraph.create(im);
const v0 = register_values(cfg, im[0].instructions[3], 10);
const v0 = register_value(cfg, im[0].instructions[3], 10);
expect(v0.size()).toBe(1);
expect(v0.get(0)).toBe(20);
const v1 = register_values(cfg, im[0].instructions[5], 10);
const v1 = register_value(cfg, im[0].instructions[5], 10);
expect(v1.size()).toBe(1);
expect(v1.get(0)).toBe(20);
const v2 = register_values(cfg, im[0].instructions[7], 10);
const v2 = register_value(cfg, im[0].instructions[7], 10);
expect(v2.size()).toBe(5);
expect(v2.to_array()).toEqual([20, 21, 22, 23, 24]);

View File

@ -1,19 +1,25 @@
import Logger from "js-logger";
import { Instruction } from "../instructions";
import { Opcode, ParamAccess, RegTupRefType } from "../opcodes";
import {
MAX_SIGNED_DWORD_VALUE,
MIN_SIGNED_DWORD_VALUE,
Opcode,
ParamAccess,
RegTupRefType,
} from "../opcodes";
import { BasicBlock, ControlFlowGraph } from "./ControlFlowGraph";
import { ValueSet } from "./ValueSet";
import Logger from "js-logger";
const logger = Logger.get("scripting/data_flow_analysis");
const logger = Logger.get("scripting/data_flow_analysis/register_value");
export const MIN_REGISTER_VALUE = -Math.pow(2, 31);
export const MAX_REGISTER_VALUE = Math.pow(2, 31) - 1;
export const MIN_REGISTER_VALUE = MIN_SIGNED_DWORD_VALUE;
export const MAX_REGISTER_VALUE = MAX_SIGNED_DWORD_VALUE;
export const REGISTER_VALUES = Math.pow(2, 32);
/**
* Computes the possible values of a register at a specific instruction.
* Computes the possible values of a register right before a specific instruction.
*/
export function register_values(
export function register_value(
cfg: ControlFlowGraph,
instruction: Instruction,
register: number
@ -21,17 +27,13 @@ export function register_values(
const block = cfg.get_block_for_instuction(instruction);
if (block) {
let inst_idx = block.start;
while (inst_idx < block.end) {
if (block.segment.instructions[inst_idx] === instruction) {
break;
}
inst_idx++;
}
return find_values(new Context(), new Set(), block, inst_idx, register);
return find_values(
new Context(),
new Set(),
block,
block.index_of_instruction(instruction),
register
);
} else {
return new ValueSet();
}
@ -48,22 +50,19 @@ function find_values(
end: number,
register: number
): ValueSet {
let values = new ValueSet();
if (++ctx.iterations > 1000) {
if (++ctx.iterations > 100) {
logger.warn("Too many iterations.");
values.set_interval(MIN_REGISTER_VALUE, MAX_REGISTER_VALUE);
return values;
return new ValueSet().set_interval(MIN_REGISTER_VALUE, MAX_REGISTER_VALUE);
}
for (let i = block.start; i < end; i++) {
for (let i = end - 1; i >= block.start; i--) {
const instruction = block.segment.instructions[i];
const args = instruction.args;
switch (instruction.opcode) {
case Opcode.LET:
if (args[0].value === register) {
values = find_values(ctx, new Set(path), block, i, args[1].value);
return find_values(ctx, new Set(path), block, i, args[1].value);
}
break;
case Opcode.LETI:
@ -71,17 +70,17 @@ function find_values(
case Opcode.LETW:
case Opcode.SYNC_LETI:
if (args[0].value === register) {
values.set_value(args[1].value);
return new ValueSet().set_value(args[1].value);
}
break;
case Opcode.SET:
if (args[0].value === register) {
values.set_value(1);
return new ValueSet().set_value(1);
}
break;
case Opcode.CLEAR:
if (args[0].value === register) {
values.set_value(0);
return new ValueSet().set_value(0);
}
break;
case Opcode.REV:
@ -90,47 +89,51 @@ function find_values(
const prev_size = prev_vals.size();
if (prev_size === 0 || (prev_size === 1 && prev_vals.get(0) === 0)) {
values.set_value(1);
} else if (values.has(0)) {
values.set_interval(0, 1);
return new ValueSet().set_value(1);
} else if (prev_vals.has(0)) {
return new ValueSet().set_interval(0, 1);
} else {
values.set_value(0);
return new ValueSet().set_value(0);
}
}
break;
case Opcode.ADDI:
if (args[0].value === register) {
values = find_values(ctx, new Set(path), block, i, register);
values.scalar_add(args[1].value);
const prev_vals = find_values(ctx, new Set(path), block, i, register);
return prev_vals.scalar_add(args[1].value);
}
break;
case Opcode.SUBI:
if (args[0].value === register) {
values = find_values(ctx, new Set(path), block, i, register);
values.scalar_sub(args[1].value);
const prev_vals = find_values(ctx, new Set(path), block, i, register);
return prev_vals.scalar_sub(args[1].value);
}
break;
case Opcode.MULI:
if (args[0].value === register) {
values = find_values(ctx, new Set(path), block, i, register);
values.scalar_mul(args[1].value);
const prev_vals = find_values(ctx, new Set(path), block, i, register);
return prev_vals.scalar_mul(args[1].value);
}
break;
case Opcode.DIVI:
if (args[0].value === register) {
values = find_values(ctx, new Set(path), block, i, register);
values.scalar_div(args[1].value);
const prev_vals = find_values(ctx, new Set(path), block, i, register);
return prev_vals.scalar_div(args[1].value);
}
break;
case Opcode.IF_ZONE_CLEAR:
if (args[0].value === register) {
values.set_interval(0, 1);
return new ValueSet().set_interval(0, 1);
}
break;
case Opcode.GET_DIFFLVL:
if (args[0].value === register) {
return new ValueSet().set_interval(0, 2);
}
break;
case Opcode.GET_SLOTNUMBER:
if (args[0].value === register) {
values.set_interval(0, 3);
return new ValueSet().set_interval(0, 3);
}
break;
case Opcode.GET_RANDOM:
@ -141,7 +144,7 @@ function find_values(
find_values(ctx, new Set(path), block, i, args[0].value + 1).max() || 0,
min + 1
);
values.set_interval(min, max - 1);
return new ValueSet().set_interval(min, max - 1);
}
break;
case Opcode.STACK_PUSHM:
@ -151,7 +154,7 @@ function find_values(
const max_reg = args[0].value + args[1].value;
if (min_reg <= register && register < max_reg) {
values.set_interval(MIN_REGISTER_VALUE, MAX_REGISTER_VALUE);
return new ValueSet().set_interval(MIN_REGISTER_VALUE, MAX_REGISTER_VALUE);
}
}
break;
@ -161,7 +164,7 @@ function find_values(
const params = instruction.opcode.params;
const arg_len = Math.min(args.length, params.length);
outer: for (let j = 0; j < arg_len; j++) {
for (let j = 0; j < arg_len; j++) {
const param = params[j];
if (param.type instanceof RegTupRefType) {
@ -174,8 +177,10 @@ function find_values(
reg_param.access === ParamAccess.ReadWrite) &&
reg_ref + k === register
) {
values.set_interval(MIN_REGISTER_VALUE, MAX_REGISTER_VALUE);
break outer;
return new ValueSet().set_interval(
MIN_REGISTER_VALUE,
MAX_REGISTER_VALUE
);
}
k++;
@ -187,18 +192,17 @@ function find_values(
}
}
if (values.size() === 0) {
path.add(block);
const values = new ValueSet();
path.add(block);
for (const from of block.from) {
// Bail out from loops.
if (path.has(from)) {
values.set_interval(MIN_REGISTER_VALUE, MAX_REGISTER_VALUE);
break;
}
values.union(find_values(ctx, new Set(path), from, from.end, register));
for (const from of block.from) {
// Bail out from loops.
if (path.has(from)) {
values.set_interval(MIN_REGISTER_VALUE, MAX_REGISTER_VALUE);
break;
}
values.union(find_values(ctx, new Set(path), from, from.end, register));
}
return values;

View File

@ -0,0 +1,114 @@
import Logger from "js-logger";
import { Instruction } from "../instructions";
import {
MAX_SIGNED_DWORD_VALUE,
MIN_SIGNED_DWORD_VALUE,
Opcode,
StackInteraction,
} from "../opcodes";
import { BasicBlock, ControlFlowGraph } from "./ControlFlowGraph";
import { ValueSet } from "./ValueSet";
import { register_value } from "./register_value";
const logger = Logger.get("scripting/data_flow_analysis/stack_value");
export const MIN_STACK_VALUE = MIN_SIGNED_DWORD_VALUE;
export const MAX_STACK_VALUE = MAX_SIGNED_DWORD_VALUE;
/**
* Computes the possible values of a stack element at the nth position from the top right before a specific instruction.
*/
export function stack_value(
cfg: ControlFlowGraph,
instruction: Instruction,
position: number
): ValueSet {
const block = cfg.get_block_for_instuction(instruction);
if (block) {
return find_values(
new Context(cfg),
new Set(),
block,
block.index_of_instruction(instruction),
position
);
} else {
return new ValueSet();
}
}
class Context {
iterations = 0;
constructor(readonly cfg: ControlFlowGraph) {}
}
function find_values(
ctx: Context,
path: Set<BasicBlock>,
block: BasicBlock,
end: number,
position: number
): ValueSet {
if (++ctx.iterations > 100) {
logger.warn("Too many iterations.");
return new ValueSet().set_interval(MIN_STACK_VALUE, MAX_STACK_VALUE);
}
for (let i = end - 1; i >= block.start; i--) {
const instruction = block.segment.instructions[i];
if (instruction.opcode.stack === StackInteraction.Pop) {
position += instruction.opcode.params.length;
continue;
}
const args = instruction.args;
switch (instruction.opcode) {
case Opcode.ARG_PUSHR:
if (position === 0) {
return register_value(ctx.cfg, instruction, args[0].value);
} else {
position--;
break;
}
case Opcode.ARG_PUSHL:
case Opcode.ARG_PUSHB:
case Opcode.ARG_PUSHW:
if (position === 0) {
return new ValueSet().set_value(args[0].value);
} else {
position--;
break;
}
case Opcode.ARG_PUSHA:
case Opcode.ARG_PUSHO:
case Opcode.ARG_PUSHS:
if (position === 0) {
return new ValueSet().set_interval(MIN_STACK_VALUE, MAX_STACK_VALUE);
} else {
position--;
break;
}
default:
break;
}
}
const values = new ValueSet();
path.add(block);
for (const from of block.from) {
// Bail out from loops.
if (path.has(from)) {
values.set_interval(MIN_STACK_VALUE, MAX_STACK_VALUE);
break;
}
values.union(find_values(ctx, new Set(path), from, from.end, position));
}
return values;
}

View File

@ -2,8 +2,8 @@ import * as fs from "fs";
/**
* Applies f to all QST files in a directory.
* F is called with the path to the file, the file name and the content of the file.
* Uses the QST files provided with Tethealla version 0.143 by default.
* f is called with the path to the file, the file name and the content of the file.
* Uses the 106 QST files provided with Tethealla version 0.143 by default.
*/
export function walk_qst_files(
f: (path: string, file_name: string, contents: Buffer) => void,