From 2d56e7830f20f58dfe662e7bb3e1ab91ee70e574 Mon Sep 17 00:00:00 2001 From: Daan Vanden Bosch Date: Sun, 18 Apr 2021 13:54:03 +0200 Subject: [PATCH] Bytecode segments of unknown type are now interpreted as instructions segments if they can be heuristically determined to be code. --- FEATURES.md | 3 +- .../kotlin/world/phantasmal/lib/asm/Opcode.kt | 5 + .../lib/fileFormats/quest/Bytecode.kt | 166 +++++++++++++++++- .../lib/test/BytecodeIrAssertions.kt | 5 +- 4 files changed, 172 insertions(+), 7 deletions(-) diff --git a/FEATURES.md b/FEATURES.md index 95c1e1d7..4098233c 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -104,7 +104,7 @@ Features that are in ***bold italics*** are planned but not yet implemented. - Strings - Labels - Interpret code called from NPCs and objects as code -- ***Interpret segments of unknown type as code if possible*** +- Interpret segments of unknown type as code if possible ## Script Assembly Editor @@ -139,6 +139,7 @@ Features that are in ***bold italics*** are planned but not yet implemented. - ***Show reserved register usage on hover over*** - ***When saving, ask user whether to really save when asm contains errors*** - ***Theme selection*** +- ***Easily switch between segment types*** ## Debugger diff --git a/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/Opcode.kt b/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/Opcode.kt index 2a1d0658..2ed60a45 100644 --- a/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/Opcode.kt +++ b/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/Opcode.kt @@ -152,6 +152,11 @@ class Opcode internal constructor( */ val size: Int = if (code < 0xFF) 1 else 2 + /** + * Whether or not the working of this opcode is known. + */ + val known: Boolean = !mnemonic.startsWith("unknown_") + override fun equals(other: Any?): Boolean = this === other override fun hashCode(): Int = code diff --git a/lib/src/commonMain/kotlin/world/phantasmal/lib/fileFormats/quest/Bytecode.kt b/lib/src/commonMain/kotlin/world/phantasmal/lib/fileFormats/quest/Bytecode.kt index 10ccf2a2..aa8e07da 100644 --- a/lib/src/commonMain/kotlin/world/phantasmal/lib/fileFormats/quest/Bytecode.kt +++ b/lib/src/commonMain/kotlin/world/phantasmal/lib/fileFormats/quest/Bytecode.kt @@ -16,6 +16,12 @@ import kotlin.math.min private val logger = KotlinLogging.logger {} +private const val MAX_TOTAL_NOPS = 20 +private const val MAX_SEQUENTIAL_NOPS = 10 +private const val MAX_UNKNOWN_OPCODE_RATIO = 0.2 +private const val MAX_STACK_POP_WITHOUT_PRECEDING_PUSH_RATIO = 0.2 +private const val MAX_UNKNOWN_LABEL_RATIO = 0.2 + val SEGMENT_PRIORITY = mapOf( SegmentType.Instructions to 2, SegmentType.String to 1, @@ -70,7 +76,7 @@ fun parseBytecode( findAndParseSegments( cursor, labelHolder, - entryLabels.map { it to SegmentType.Instructions }.toMap(), + entryLabels.associateWith { SegmentType.Instructions }, offsetToSegment, lenient, dcGcFormat, @@ -78,7 +84,8 @@ fun parseBytecode( val segments: MutableList = mutableListOf() - // Put segments in an array and parse left-over segments as data. + // Put segments in an array and try to parse leftover segments as instructions segments. When a + // segment can't be parsed as instructions, fall back to parsing it as a data segment. var offset = 0 while (offset < cursor.size) { @@ -104,13 +111,27 @@ fun parseBytecode( } cursor.seekStart(offset) - parseDataSegment( + + val isInstructionsSegment = tryParseInstructionsSegment( offsetToSegment, + labelHolder, cursor, endOffset, - labels?.toMutableList() ?: mutableListOf() + labels?.toMutableList() ?: mutableListOf(), + dcGcFormat, ) + if (!isInstructionsSegment) { + cursor.seekStart(offset) + + parseDataSegment( + offsetToSegment, + cursor, + endOffset, + labels?.toMutableList() ?: mutableListOf() + ) + } + segment = offsetToSegment[offset] check(endOffset > offset) { @@ -612,6 +633,141 @@ private fun parseInstructionArguments( return args } +private fun tryParseInstructionsSegment( + offsetToSegment: MutableMap, + labelHolder: LabelHolder, + cursor: Cursor, + endOffset: Int, + labels: MutableList, + dcGcFormat: Boolean, +): Boolean { + val offset = cursor.position + + fun logReason(reason: String, t: Throwable? = null) { + logger.trace(t) { + buildString { + append("Determined that segment ") + + if (labels.isEmpty()) { + append("without label") + } else { + if (labels.size == 1) append("with label ") + else append("with labels ") + + labels.joinTo(this) + } + + append(" at offset ") + append(offset) + append(" is not an instructions segment because ") + append(reason) + append(".") + } + } + } + + try { + parseInstructionsSegment( + offsetToSegment, + labelHolder, + cursor, + endOffset, + labels, + nextLabel = null, + lenient = false, + dcGcFormat, + ) + + val segment = offsetToSegment[offset] + val instructions = (segment as InstructionSegment).instructions + + // Heuristically try to detect whether the segment is actually a data segment. + var prevOpcode: Opcode? = null + var totalNopCount = 0 + var sequentialNopCount = 0 + var unknownOpcodeCount = 0 + var stackPopCount = 0 + var stackPopWithoutPrecedingPushCount = 0 + var labelCount = 0 + var unknownLabelCount = 0 + + for (inst in instructions) { + if (inst.opcode.code == OP_NOP.code) { + if (++totalNopCount > MAX_TOTAL_NOPS) { + logReason("it has more than $MAX_TOTAL_NOPS nop instructions") + return false + } + + if (++sequentialNopCount > MAX_SEQUENTIAL_NOPS) { + logReason("it has more than $MAX_SEQUENTIAL_NOPS sequential nop instructions") + return false + } + } else { + sequentialNopCount = 0 + } + + if (!inst.opcode.known) { + unknownOpcodeCount++ + } + + if (inst.opcode.stack == StackInteraction.Pop) { + stackPopCount++ + + if (prevOpcode?.stack != StackInteraction.Push) { + stackPopWithoutPrecedingPushCount++ + } + } + + for ((index, param) in inst.opcode.params.withIndex()) { + if (index >= inst.args.size) break + + if (param.type is LabelType) { + for (arg in inst.getArgs(index)) { + labelCount++ + + if (!labelHolder.hasLabel(arg.value as Int)) { + unknownLabelCount++ + } + } + } + } + + prevOpcode = inst.opcode + } + + val unknownLabelRatio = unknownLabelCount.toDouble() / labelCount + + if (unknownLabelRatio > MAX_UNKNOWN_LABEL_RATIO) { + logReason( + "${100 * unknownLabelRatio}% of its label references are to nonexistent labels" + ) + return false + } + + val stackPopWithoutPrecedingPushRatio = + stackPopWithoutPrecedingPushCount.toDouble() / stackPopCount + + if (stackPopWithoutPrecedingPushRatio > MAX_STACK_POP_WITHOUT_PRECEDING_PUSH_RATIO) { + logReason( + "${100 * stackPopWithoutPrecedingPushRatio}% of its stack pop instructions don't have a preceding push instruction" + ) + return false + } + + val unknownOpcodeRatio = unknownOpcodeCount.toDouble() / instructions.size + + if (unknownOpcodeRatio > MAX_UNKNOWN_OPCODE_RATIO) { + logReason("${100 * unknownOpcodeRatio}% of its opcodes are unknown") + return false + } + + return true + } catch (e: Exception) { + logReason("parsing it resulted in an exception", e) + return false + } +} + fun writeBytecode(bytecodeIr: BytecodeIr, dcGcFormat: Boolean): BytecodeAndLabelOffsets { val buffer = Buffer.withCapacity(100 * bytecodeIr.segments.size, Endianness.Little) val cursor = buffer.cursor() @@ -764,6 +920,8 @@ private class LabelHolder(labelOffsets: IntArray) { } } + fun hasLabel(label: Int): Boolean = label in labelMap + fun getLabels(offset: Int): List? = offsetMap[offset] fun getInfo(label: Int): LabelInfo? { diff --git a/lib/src/commonTest/kotlin/world/phantasmal/lib/test/BytecodeIrAssertions.kt b/lib/src/commonTest/kotlin/world/phantasmal/lib/test/BytecodeIrAssertions.kt index 15de707d..e56bb331 100644 --- a/lib/src/commonTest/kotlin/world/phantasmal/lib/test/BytecodeIrAssertions.kt +++ b/lib/src/commonTest/kotlin/world/phantasmal/lib/test/BytecodeIrAssertions.kt @@ -6,8 +6,9 @@ import kotlin.test.assertNotNull import kotlin.test.assertNull fun assertDeepEquals(expected: BytecodeIr, actual: BytecodeIr, ignoreSrcLocs: Boolean = false) { - assertDeepEquals(expected.segments, - actual.segments + assertDeepEquals( + expected.segments, + actual.segments, ) { a, b -> assertDeepEquals(a, b, ignoreSrcLocs) } }