mirror of
https://github.com/DaanVandenBosch/phantasmal-world.git
synced 2025-04-05 15:28:29 +08:00
Bytecode segments of unknown type are now interpreted as instructions segments if they can be heuristically determined to be code.
This commit is contained in:
parent
de8aef4cca
commit
2d56e7830f
@ -104,7 +104,7 @@ Features that are in ***bold italics*** are planned but not yet implemented.
|
|||||||
- Strings
|
- Strings
|
||||||
- Labels
|
- Labels
|
||||||
- Interpret code called from NPCs and objects as code
|
- Interpret code called from NPCs and objects as code
|
||||||
- ***Interpret segments of unknown type as code if possible***
|
- Interpret segments of unknown type as code if possible
|
||||||
|
|
||||||
## Script Assembly Editor
|
## Script Assembly Editor
|
||||||
|
|
||||||
@ -139,6 +139,7 @@ Features that are in ***bold italics*** are planned but not yet implemented.
|
|||||||
- ***Show reserved register usage on hover over***
|
- ***Show reserved register usage on hover over***
|
||||||
- ***When saving, ask user whether to really save when asm contains errors***
|
- ***When saving, ask user whether to really save when asm contains errors***
|
||||||
- ***Theme selection***
|
- ***Theme selection***
|
||||||
|
- ***Easily switch between segment types***
|
||||||
|
|
||||||
## Debugger
|
## Debugger
|
||||||
|
|
||||||
|
@ -152,6 +152,11 @@ class Opcode internal constructor(
|
|||||||
*/
|
*/
|
||||||
val size: Int = if (code < 0xFF) 1 else 2
|
val size: Int = if (code < 0xFF) 1 else 2
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not the working of this opcode is known.
|
||||||
|
*/
|
||||||
|
val known: Boolean = !mnemonic.startsWith("unknown_")
|
||||||
|
|
||||||
override fun equals(other: Any?): Boolean = this === other
|
override fun equals(other: Any?): Boolean = this === other
|
||||||
|
|
||||||
override fun hashCode(): Int = code
|
override fun hashCode(): Int = code
|
||||||
|
@ -16,6 +16,12 @@ import kotlin.math.min
|
|||||||
|
|
||||||
private val logger = KotlinLogging.logger {}
|
private val logger = KotlinLogging.logger {}
|
||||||
|
|
||||||
|
private const val MAX_TOTAL_NOPS = 20
|
||||||
|
private const val MAX_SEQUENTIAL_NOPS = 10
|
||||||
|
private const val MAX_UNKNOWN_OPCODE_RATIO = 0.2
|
||||||
|
private const val MAX_STACK_POP_WITHOUT_PRECEDING_PUSH_RATIO = 0.2
|
||||||
|
private const val MAX_UNKNOWN_LABEL_RATIO = 0.2
|
||||||
|
|
||||||
val SEGMENT_PRIORITY = mapOf(
|
val SEGMENT_PRIORITY = mapOf(
|
||||||
SegmentType.Instructions to 2,
|
SegmentType.Instructions to 2,
|
||||||
SegmentType.String to 1,
|
SegmentType.String to 1,
|
||||||
@ -70,7 +76,7 @@ fun parseBytecode(
|
|||||||
findAndParseSegments(
|
findAndParseSegments(
|
||||||
cursor,
|
cursor,
|
||||||
labelHolder,
|
labelHolder,
|
||||||
entryLabels.map { it to SegmentType.Instructions }.toMap(),
|
entryLabels.associateWith { SegmentType.Instructions },
|
||||||
offsetToSegment,
|
offsetToSegment,
|
||||||
lenient,
|
lenient,
|
||||||
dcGcFormat,
|
dcGcFormat,
|
||||||
@ -78,7 +84,8 @@ fun parseBytecode(
|
|||||||
|
|
||||||
val segments: MutableList<Segment> = mutableListOf()
|
val segments: MutableList<Segment> = mutableListOf()
|
||||||
|
|
||||||
// Put segments in an array and parse left-over segments as data.
|
// Put segments in an array and try to parse leftover segments as instructions segments. When a
|
||||||
|
// segment can't be parsed as instructions, fall back to parsing it as a data segment.
|
||||||
var offset = 0
|
var offset = 0
|
||||||
|
|
||||||
while (offset < cursor.size) {
|
while (offset < cursor.size) {
|
||||||
@ -104,13 +111,27 @@ fun parseBytecode(
|
|||||||
}
|
}
|
||||||
|
|
||||||
cursor.seekStart(offset)
|
cursor.seekStart(offset)
|
||||||
parseDataSegment(
|
|
||||||
|
val isInstructionsSegment = tryParseInstructionsSegment(
|
||||||
offsetToSegment,
|
offsetToSegment,
|
||||||
|
labelHolder,
|
||||||
cursor,
|
cursor,
|
||||||
endOffset,
|
endOffset,
|
||||||
labels?.toMutableList() ?: mutableListOf()
|
labels?.toMutableList() ?: mutableListOf(),
|
||||||
|
dcGcFormat,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (!isInstructionsSegment) {
|
||||||
|
cursor.seekStart(offset)
|
||||||
|
|
||||||
|
parseDataSegment(
|
||||||
|
offsetToSegment,
|
||||||
|
cursor,
|
||||||
|
endOffset,
|
||||||
|
labels?.toMutableList() ?: mutableListOf()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
segment = offsetToSegment[offset]
|
segment = offsetToSegment[offset]
|
||||||
|
|
||||||
check(endOffset > offset) {
|
check(endOffset > offset) {
|
||||||
@ -612,6 +633,141 @@ private fun parseInstructionArguments(
|
|||||||
return args
|
return args
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun tryParseInstructionsSegment(
|
||||||
|
offsetToSegment: MutableMap<Int, Segment>,
|
||||||
|
labelHolder: LabelHolder,
|
||||||
|
cursor: Cursor,
|
||||||
|
endOffset: Int,
|
||||||
|
labels: MutableList<Int>,
|
||||||
|
dcGcFormat: Boolean,
|
||||||
|
): Boolean {
|
||||||
|
val offset = cursor.position
|
||||||
|
|
||||||
|
fun logReason(reason: String, t: Throwable? = null) {
|
||||||
|
logger.trace(t) {
|
||||||
|
buildString {
|
||||||
|
append("Determined that segment ")
|
||||||
|
|
||||||
|
if (labels.isEmpty()) {
|
||||||
|
append("without label")
|
||||||
|
} else {
|
||||||
|
if (labels.size == 1) append("with label ")
|
||||||
|
else append("with labels ")
|
||||||
|
|
||||||
|
labels.joinTo(this)
|
||||||
|
}
|
||||||
|
|
||||||
|
append(" at offset ")
|
||||||
|
append(offset)
|
||||||
|
append(" is not an instructions segment because ")
|
||||||
|
append(reason)
|
||||||
|
append(".")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
parseInstructionsSegment(
|
||||||
|
offsetToSegment,
|
||||||
|
labelHolder,
|
||||||
|
cursor,
|
||||||
|
endOffset,
|
||||||
|
labels,
|
||||||
|
nextLabel = null,
|
||||||
|
lenient = false,
|
||||||
|
dcGcFormat,
|
||||||
|
)
|
||||||
|
|
||||||
|
val segment = offsetToSegment[offset]
|
||||||
|
val instructions = (segment as InstructionSegment).instructions
|
||||||
|
|
||||||
|
// Heuristically try to detect whether the segment is actually a data segment.
|
||||||
|
var prevOpcode: Opcode? = null
|
||||||
|
var totalNopCount = 0
|
||||||
|
var sequentialNopCount = 0
|
||||||
|
var unknownOpcodeCount = 0
|
||||||
|
var stackPopCount = 0
|
||||||
|
var stackPopWithoutPrecedingPushCount = 0
|
||||||
|
var labelCount = 0
|
||||||
|
var unknownLabelCount = 0
|
||||||
|
|
||||||
|
for (inst in instructions) {
|
||||||
|
if (inst.opcode.code == OP_NOP.code) {
|
||||||
|
if (++totalNopCount > MAX_TOTAL_NOPS) {
|
||||||
|
logReason("it has more than $MAX_TOTAL_NOPS nop instructions")
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if (++sequentialNopCount > MAX_SEQUENTIAL_NOPS) {
|
||||||
|
logReason("it has more than $MAX_SEQUENTIAL_NOPS sequential nop instructions")
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sequentialNopCount = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inst.opcode.known) {
|
||||||
|
unknownOpcodeCount++
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst.opcode.stack == StackInteraction.Pop) {
|
||||||
|
stackPopCount++
|
||||||
|
|
||||||
|
if (prevOpcode?.stack != StackInteraction.Push) {
|
||||||
|
stackPopWithoutPrecedingPushCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ((index, param) in inst.opcode.params.withIndex()) {
|
||||||
|
if (index >= inst.args.size) break
|
||||||
|
|
||||||
|
if (param.type is LabelType) {
|
||||||
|
for (arg in inst.getArgs(index)) {
|
||||||
|
labelCount++
|
||||||
|
|
||||||
|
if (!labelHolder.hasLabel(arg.value as Int)) {
|
||||||
|
unknownLabelCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
prevOpcode = inst.opcode
|
||||||
|
}
|
||||||
|
|
||||||
|
val unknownLabelRatio = unknownLabelCount.toDouble() / labelCount
|
||||||
|
|
||||||
|
if (unknownLabelRatio > MAX_UNKNOWN_LABEL_RATIO) {
|
||||||
|
logReason(
|
||||||
|
"${100 * unknownLabelRatio}% of its label references are to nonexistent labels"
|
||||||
|
)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
val stackPopWithoutPrecedingPushRatio =
|
||||||
|
stackPopWithoutPrecedingPushCount.toDouble() / stackPopCount
|
||||||
|
|
||||||
|
if (stackPopWithoutPrecedingPushRatio > MAX_STACK_POP_WITHOUT_PRECEDING_PUSH_RATIO) {
|
||||||
|
logReason(
|
||||||
|
"${100 * stackPopWithoutPrecedingPushRatio}% of its stack pop instructions don't have a preceding push instruction"
|
||||||
|
)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
val unknownOpcodeRatio = unknownOpcodeCount.toDouble() / instructions.size
|
||||||
|
|
||||||
|
if (unknownOpcodeRatio > MAX_UNKNOWN_OPCODE_RATIO) {
|
||||||
|
logReason("${100 * unknownOpcodeRatio}% of its opcodes are unknown")
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
} catch (e: Exception) {
|
||||||
|
logReason("parsing it resulted in an exception", e)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fun writeBytecode(bytecodeIr: BytecodeIr, dcGcFormat: Boolean): BytecodeAndLabelOffsets {
|
fun writeBytecode(bytecodeIr: BytecodeIr, dcGcFormat: Boolean): BytecodeAndLabelOffsets {
|
||||||
val buffer = Buffer.withCapacity(100 * bytecodeIr.segments.size, Endianness.Little)
|
val buffer = Buffer.withCapacity(100 * bytecodeIr.segments.size, Endianness.Little)
|
||||||
val cursor = buffer.cursor()
|
val cursor = buffer.cursor()
|
||||||
@ -764,6 +920,8 @@ private class LabelHolder(labelOffsets: IntArray) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun hasLabel(label: Int): Boolean = label in labelMap
|
||||||
|
|
||||||
fun getLabels(offset: Int): List<Int>? = offsetMap[offset]
|
fun getLabels(offset: Int): List<Int>? = offsetMap[offset]
|
||||||
|
|
||||||
fun getInfo(label: Int): LabelInfo? {
|
fun getInfo(label: Int): LabelInfo? {
|
||||||
|
@ -6,8 +6,9 @@ import kotlin.test.assertNotNull
|
|||||||
import kotlin.test.assertNull
|
import kotlin.test.assertNull
|
||||||
|
|
||||||
fun assertDeepEquals(expected: BytecodeIr, actual: BytecodeIr, ignoreSrcLocs: Boolean = false) {
|
fun assertDeepEquals(expected: BytecodeIr, actual: BytecodeIr, ignoreSrcLocs: Boolean = false) {
|
||||||
assertDeepEquals(expected.segments,
|
assertDeepEquals(
|
||||||
actual.segments
|
expected.segments,
|
||||||
|
actual.segments,
|
||||||
) { a, b -> assertDeepEquals(a, b, ignoreSrcLocs) }
|
) { a, b -> assertDeepEquals(a, b, ignoreSrcLocs) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user