diff --git a/core/src/commonMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt b/core/src/commonMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt index 7d8f9c68..8945a4b0 100644 --- a/core/src/commonMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt +++ b/core/src/commonMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt @@ -2,10 +2,9 @@ package world.phantasmal.core // Char.isWhitespace is very slow in JS, use this until // https://youtrack.jetbrains.com/issue/KT-43216 lands. -fun Char.fastIsWhitespace(): Boolean = - this == ' ' || this in '\u0009'..'\u000D' +expect inline fun Char.fastIsWhitespace(): Boolean -fun Char.isDigit(): Boolean = this in '0'..'9' +expect inline fun Char.isDigit(): Boolean /** * Returns true if the bit at the given position is set. Bits are indexed from lowest-order diff --git a/core/src/commonMain/kotlin/world/phantasmal/core/StandardExtensions.kt b/core/src/commonMain/kotlin/world/phantasmal/core/StandardExtensions.kt index fa0025f4..918ddc2d 100644 --- a/core/src/commonMain/kotlin/world/phantasmal/core/StandardExtensions.kt +++ b/core/src/commonMain/kotlin/world/phantasmal/core/StandardExtensions.kt @@ -1,5 +1,8 @@ package world.phantasmal.core +// String.replace is very slow in JS. +expect inline fun String.fastReplace(oldValue: String, newValue: String): String + fun MutableList.replaceAll(elements: Collection): Boolean { clear() return addAll(elements) diff --git a/core/src/commonMain/kotlin/world/phantasmal/core/Strings.kt b/core/src/commonMain/kotlin/world/phantasmal/core/Strings.kt index 370efdc1..726dbb96 100644 --- a/core/src/commonMain/kotlin/world/phantasmal/core/Strings.kt +++ b/core/src/commonMain/kotlin/world/phantasmal/core/Strings.kt @@ -25,3 +25,5 @@ fun filenameExtension(filename: String): String? = // Has an extension. else -> filename.substring(dotIdx + 1) } + +expect inline fun String.getCodePointAt(index: Int): Int diff --git a/core/src/jsMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt b/core/src/jsMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt index 89ed8dc3..f5ad909c 100644 --- a/core/src/jsMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt +++ b/core/src/jsMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt @@ -5,6 +5,14 @@ import org.khronos.webgl.DataView private val dataView = DataView(ArrayBuffer(4)) +@Suppress("NOTHING_TO_INLINE") +actual inline fun Char.fastIsWhitespace(): Boolean = + asDynamic() == 0x20 || (asDynamic() >= 0x09 && asDynamic() <= 0x0D) + +@Suppress("NOTHING_TO_INLINE") +actual inline fun Char.isDigit(): Boolean = + asDynamic() >= 0x30 && asDynamic() <= 0x39 + actual fun Int.reinterpretAsFloat(): Float { dataView.setInt32(0, this) return dataView.getFloat32(0) diff --git a/core/src/jsMain/kotlin/world/phantasmal/core/StandardExtensions.kt b/core/src/jsMain/kotlin/world/phantasmal/core/StandardExtensions.kt new file mode 100644 index 00000000..a781b47c --- /dev/null +++ b/core/src/jsMain/kotlin/world/phantasmal/core/StandardExtensions.kt @@ -0,0 +1,5 @@ +package world.phantasmal.core + +@Suppress("NOTHING_TO_INLINE") +actual inline fun String.fastReplace(oldValue: String, newValue: String): String = + asDynamic().replaceAll(oldValue, newValue).unsafeCast() diff --git a/core/src/jsMain/kotlin/world/phantasmal/core/Strings.kt b/core/src/jsMain/kotlin/world/phantasmal/core/Strings.kt new file mode 100644 index 00000000..1ce4fa68 --- /dev/null +++ b/core/src/jsMain/kotlin/world/phantasmal/core/Strings.kt @@ -0,0 +1,5 @@ +package world.phantasmal.core + +@Suppress("NOTHING_TO_INLINE") +actual inline fun String.getCodePointAt(index: Int): Int = + asDynamic().charCodeAt(index).unsafeCast() diff --git a/core/src/jvmMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt b/core/src/jvmMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt index 538e7110..629f41f5 100644 --- a/core/src/jvmMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt +++ b/core/src/jvmMain/kotlin/world/phantasmal/core/PrimitiveExtensions.kt @@ -5,6 +5,12 @@ package world.phantasmal.core import java.lang.Float.floatToIntBits import java.lang.Float.intBitsToFloat +@Suppress("NOTHING_TO_INLINE") +actual inline fun Char.fastIsWhitespace(): Boolean = isWhitespace() + +@Suppress("NOTHING_TO_INLINE") +actual inline fun Char.isDigit(): Boolean = this in '0'..'9' + actual fun Int.reinterpretAsFloat(): Float = intBitsToFloat(this) actual fun Float.reinterpretAsInt(): Int = floatToIntBits(this) diff --git a/core/src/jvmMain/kotlin/world/phantasmal/core/StandardExtensions.kt b/core/src/jvmMain/kotlin/world/phantasmal/core/StandardExtensions.kt new file mode 100644 index 00000000..47cc1fa5 --- /dev/null +++ b/core/src/jvmMain/kotlin/world/phantasmal/core/StandardExtensions.kt @@ -0,0 +1,7 @@ +@file:JvmName("StandardExtensionsJvm") + +package world.phantasmal.core + +@Suppress("NOTHING_TO_INLINE") +actual inline fun String.fastReplace(oldValue: String, newValue: String): String = + replace(oldValue, newValue) diff --git a/core/src/jvmMain/kotlin/world/phantasmal/core/Strings.kt b/core/src/jvmMain/kotlin/world/phantasmal/core/Strings.kt new file mode 100644 index 00000000..71ae5808 --- /dev/null +++ b/core/src/jvmMain/kotlin/world/phantasmal/core/Strings.kt @@ -0,0 +1,6 @@ +@file:JvmName("StringsJvm") + +package world.phantasmal.core + +@Suppress("NOTHING_TO_INLINE") +actual inline fun String.getCodePointAt(index: Int): Int = codePointAt(index) diff --git a/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/AsmTokenization.kt b/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/AsmTokenization.kt index fc7f91f3..eb3e78c8 100644 --- a/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/AsmTokenization.kt +++ b/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/AsmTokenization.kt @@ -1,197 +1,79 @@ package world.phantasmal.lib.asm import world.phantasmal.core.fastIsWhitespace +import world.phantasmal.core.fastReplace +import world.phantasmal.core.getCodePointAt import world.phantasmal.core.isDigit -import kotlin.contracts.ExperimentalContracts -import kotlin.contracts.contract private val HEX_INT_REGEX = Regex("""^0[xX][0-9a-fA-F]+$""") private val FLOAT_REGEX = Regex("""^-?\d+(\.\d+)?(e-?\d+)?$""") -private val IDENT_REGEX = Regex("""^[a-z][a-z0-9_=<>!]*$""") -const val TOKEN_INT32 = 1 -const val TOKEN_FLOAT32 = 2 -const val TOKEN_INVALID_NUMBER = 3 -const val TOKEN_REGISTER = 4 -const val TOKEN_LABEL = 5 -const val TOKEN_SECTION_CODE = 6 -const val TOKEN_SECTION_DATA = 7 -const val TOKEN_SECTION_STR = 8 -const val TOKEN_INVALID_SECTION = 9 -const val TOKEN_STR = 10 -const val TOKEN_UNTERMINATED_STR = 11 -const val TOKEN_IDENT = 12 -const val TOKEN_INVALID_IDENT = 13 -const val TOKEN_ARG_SEP = 14 - -sealed class Token { - /** - * This property is used for increased perf type checks in JS. - */ - abstract val type: Int - abstract val col: Int - abstract val len: Int - - class Int32( - override val col: Int, - override val len: Int, - val value: Int, - ) : Token() { - override val type = TOKEN_INT32 - } - - class Float32( - override val col: Int, - override val len: Int, - val value: Float, - ) : Token() { - override val type = TOKEN_FLOAT32 - } - - class InvalidNumber( - override val col: Int, - override val len: Int, - ) : Token() { - override val type = TOKEN_INVALID_NUMBER - } - - class Register( - override val col: Int, - override val len: Int, - val value: Int, - ) : Token() { - override val type = TOKEN_REGISTER - } - - class Label( - override val col: Int, - override val len: Int, - val value: Int, - ) : Token() { - override val type = TOKEN_LABEL - } - - sealed class Section : Token() { - class Code( - override val col: Int, - override val len: Int, - ) : Section() { - override val type = TOKEN_SECTION_CODE - } - - class Data( - override val col: Int, - override val len: Int, - ) : Section() { - override val type = TOKEN_SECTION_DATA - } - - class Str( - override val col: Int, - override val len: Int, - ) : Section() { - override val type = TOKEN_SECTION_STR - } - } - - class InvalidSection( - override val col: Int, - override val len: Int, - ) : Token() { - override val type = TOKEN_INVALID_SECTION - } - - class Str( - override val col: Int, - override val len: Int, - val value: String, - ) : Token() { - override val type = TOKEN_STR - } - - class UnterminatedString( - override val col: Int, - override val len: Int, - val value: String, - ) : Token() { - override val type = TOKEN_UNTERMINATED_STR - } - - class Ident( - override val col: Int, - override val len: Int, - val value: String, - ) : Token() { - override val type = TOKEN_IDENT - } - - class InvalidIdent( - override val col: Int, - override val len: Int, - ) : Token() { - override val type = TOKEN_INVALID_IDENT - } - - class ArgSeparator( - override val col: Int, - override val len: Int, - ) : Token() { - override val type = TOKEN_ARG_SEP - } - - @OptIn(ExperimentalContracts::class) - @Suppress("NOTHING_TO_INLINE") - inline fun isInt32(): Boolean { - contract { returns(true) implies (this@Token is Int32) } - return type == TOKEN_INT32 - } - - @OptIn(ExperimentalContracts::class) - @Suppress("NOTHING_TO_INLINE") - inline fun isFloat32(): Boolean { - contract { returns(true) implies (this@Token is Float32) } - return type == TOKEN_FLOAT32 - } - - @OptIn(ExperimentalContracts::class) - @Suppress("NOTHING_TO_INLINE") - inline fun isRegister(): Boolean { - contract { returns(true) implies (this@Token is Register) } - return type == TOKEN_REGISTER - } - - @OptIn(ExperimentalContracts::class) - @Suppress("NOTHING_TO_INLINE") - inline fun isStr(): Boolean { - contract { returns(true) implies (this@Token is Str) } - return type == TOKEN_STR - } - - @OptIn(ExperimentalContracts::class) - @Suppress("NOTHING_TO_INLINE") - inline fun isArgSeparator(): Boolean { - contract { returns(true) implies (this@Token is ArgSeparator) } - return type == TOKEN_ARG_SEP - } +enum class Token { + Int32, + Float32, + InvalidNumber, + Register, + Label, + CodeSection, + DataSection, + StrSection, + InvalidSection, + Str, + UnterminatedStr, + Ident, + InvalidIdent, + ArgSeparator, } -fun tokenizeLine(line: String): MutableList = - LineTokenizer(line).tokenize() - -private class LineTokenizer(private var line: String) { +class LineTokenizer { + private var line = "" private var index = 0 + private var startIndex = 0 - private val col: Int - get() = index + 1 + private var value: Any? = null - private var mark = 0 + var type: Token? = null + private set - fun tokenize(): MutableList { - val tokens = mutableListOf() + val col: Int get() = startIndex + 1 + val len: Int get() = index - startIndex + + fun tokenize(line: String) { + this.line = line + index = 0 + startIndex = 0 + } + + val intValue: Int + get() { + require(type === Token.Int32 || type === Token.Register || type === Token.Label) + return value as Int + } + + val floatValue: Float + get() { + require(type === Token.Float32) + return value as Float + } + + val strValue: String + get() { + require( + type === Token.Str || + type === Token.UnterminatedStr || + type === Token.Ident || + type === Token.InvalidIdent + ) + return value as String + } + + fun nextToken(): Boolean { + type = null + value = null while (hasNext()) { + startIndex = index val char = peek() - var token: Token if (char == '/') { skip() @@ -207,25 +89,27 @@ private class LineTokenizer(private var line: String) { if (char.fastIsWhitespace()) { skip() continue - } else if (char == '-' || char.isDigit()) { - token = tokenizeNumberOrLabel() - } else if (char == ',') { - token = Token.ArgSeparator(col, 1) - skip() - } else if (char == '.') { - token = tokenizeSection() - } else if (char == '"') { - token = tokenizeString() - } else if (char == 'r') { - token = tokenizeRegisterOrIdent() - } else { - token = tokenizeIdent() } - tokens.add(token) + if (char == '-' || char.isDigit()) { + tokenizeNumberOrLabel() + } else if (char == ',') { + type = Token.ArgSeparator + skip() + } else if (char == '.') { + tokenizeSection() + } else if (char == '"') { + tokenizeString() + } else if (char == 'r') { + tokenizeRegisterOrIdent() + } else { + tokenizeIdent() + } + + break } - return tokens + return type != null } private fun hasNext(): Boolean = index < line.length @@ -242,13 +126,8 @@ private class LineTokenizer(private var line: String) { index-- } - private fun mark() { - mark = index - } - - private fun markedLen(): Int = index - mark - - private fun slice(): String = line.substring(mark, index) + private fun slice(from: Int = 0, to: Int = 0): String = + line.substring(startIndex + from, index - to) private fun eatRestOfToken() { while (hasNext()) { @@ -261,9 +140,7 @@ private class LineTokenizer(private var line: String) { } } - private fun tokenizeNumberOrLabel(): Token { - mark() - val col = this.col + private fun tokenizeNumberOrLabel() { val firstChar = next() var isLabel = false @@ -271,9 +148,11 @@ private class LineTokenizer(private var line: String) { val char = peek() if (char == '.' || char == 'e') { - return tokenizeFloat(col) + tokenizeFloat() + return } else if (firstChar == '0' && (char == 'x' || char == 'X')) { - return tokenizeHexNumber(col) + tokenizeHexNumber() + return } else if (char == ':') { isLabel = true break @@ -284,53 +163,53 @@ private class LineTokenizer(private var line: String) { } } - val value = slice().toIntOrNull() + value = slice().toIntOrNull() if (isLabel) { skip() } - if (value == null) { - return Token.InvalidNumber(col, markedLen()) - } - - return if (isLabel) { - Token.Label(col, markedLen(), value) - } else { - Token.Int32(col, markedLen(), value) + type = when { + value == null -> Token.InvalidNumber + isLabel -> Token.Label + else -> Token.Int32 } } - private fun tokenizeHexNumber(col: Int): Token { + private fun tokenizeHexNumber() { eatRestOfToken() val hexStr = slice() if (HEX_INT_REGEX.matches(hexStr)) { - hexStr.drop(2).toIntOrNull(16)?.let { value -> - return Token.Int32(col, markedLen(), value) + value = hexStr.drop(2).toIntOrNull(16) + + if (value != null) { + type = Token.Int32 + return } } - return Token.InvalidNumber(col, markedLen()) + type = Token.InvalidNumber } - private fun tokenizeFloat(col: Int): Token { + private fun tokenizeFloat() { eatRestOfToken() val floatStr = slice() if (FLOAT_REGEX.matches(floatStr)) { - floatStr.toFloatOrNull()?.let { value -> - return Token.Float32(col, markedLen(), value) + value = floatStr.toFloatOrNull() + + if (value != null) { + type = Token.Float32 + return } } - return Token.InvalidNumber(col, markedLen()) + type = Token.InvalidNumber } - private fun tokenizeRegisterOrIdent(): Token { - val col = this.col + private fun tokenizeRegisterOrIdent() { skip() - mark() var isRegister = false while (hasNext()) { @@ -344,20 +223,16 @@ private class LineTokenizer(private var line: String) { } } - return if (isRegister) { - val value = slice().toInt() - - Token.Register(col, markedLen() + 1, value) + if (isRegister) { + value = slice(from = 1).toInt() + type = Token.Register } else { back() tokenizeIdent() } } - private fun tokenizeSection(): Token { - val col = this.col - mark() - + private fun tokenizeSection() { while (hasNext()) { if (peek().fastIsWhitespace()) { break @@ -366,18 +241,16 @@ private class LineTokenizer(private var line: String) { } } - return when (slice()) { - ".code" -> Token.Section.Code(col, 5) - ".data" -> Token.Section.Data(col, 5) - ".string" -> Token.Section.Str(col, 7) - else -> Token.InvalidSection(col, markedLen()) + type = when (slice()) { + ".code" -> Token.CodeSection + ".data" -> Token.DataSection + ".string" -> Token.StrSection + else -> Token.InvalidSection } } - private fun tokenizeString(): Token { - val col = this.col + private fun tokenizeString() { skip() - mark() var prevWasBackSpace = false var terminated = false @@ -389,6 +262,7 @@ private class LineTokenizer(private var line: String) { } '"' -> { if (!prevWasBackSpace) { + skip() terminated = true break@loop } @@ -400,24 +274,21 @@ private class LineTokenizer(private var line: String) { } } - next() + skip() } - val lenWithoutQuotes = markedLen() - val value = slice().replace("\\\"", "\"").replace("\\n", "\n") + value = slice(from = 1, to = if (terminated) 1 else 0) + .fastReplace("\\\"", "\"") + .fastReplace("\\n", "\n") - return if (terminated) { - next() - Token.Str(col, lenWithoutQuotes + 2, value) + type = if (terminated) { + Token.Str } else { - Token.UnterminatedString(col, lenWithoutQuotes + 1, value) + Token.UnterminatedStr } } - private fun tokenizeIdent(): Token { - val col = this.col - mark() - + private fun tokenizeIdent() { while (hasNext()) { val char = peek() @@ -435,12 +306,33 @@ private class LineTokenizer(private var line: String) { } } - val value = slice() + val ident = slice() + value = ident - return if (IDENT_REGEX.matches(value)) { - Token.Ident(col, markedLen(), value) - } else { - Token.InvalidIdent(col, markedLen()) + if (ident.getCodePointAt(0) !in ('a'.toInt())..('z'.toInt())) { + type = Token.InvalidIdent + return } + + for (i in 1 until ident.length) { + when (ident.getCodePointAt(i)) { + in ('0'.toInt())..('9'.toInt()), + in ('a'.toInt())..('z'.toInt()), + ('_').toInt(), + ('=').toInt(), + ('<').toInt(), + ('>').toInt(), + ('!').toInt(), + -> { + // Valid character. + } + else -> { + type = Token.InvalidIdent + return + } + } + } + + type = Token.Ident } } diff --git a/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/Assembly.kt b/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/Assembly.kt index 6de69074..e8317e34 100644 --- a/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/Assembly.kt +++ b/lib/src/commonMain/kotlin/world/phantasmal/lib/asm/Assembly.kt @@ -43,7 +43,7 @@ fun assemble( private class Assembler(private val asm: List, private val inlineStackArgs: Boolean) { private var lineNo = 1 - private lateinit var tokens: MutableList + private val tokenizer = LineTokenizer() private var ir: MutableList = mutableListOf() /** @@ -64,51 +64,57 @@ private class Assembler(private val asm: List, private val inlineStackAr fun assemble(): PwResult { // Tokenize and assemble line by line. for (line in asm) { - tokens = tokenizeLine(line) + tokenizer.tokenize(line) + tokenizer.nextToken() - if (tokens.isNotEmpty()) { - val token = tokens.removeFirst() + if (tokenizer.type != null) { var hasLabel = false // Token type checks are ordered from most frequent to least frequent for increased // perf. - when (token) { - is Token.Ident -> { + when (tokenizer.type) { + Token.Ident -> { if (section === SegmentType.Instructions) { - parseInstruction(token) + parseInstruction() } else { - addUnexpectedTokenError(token) + addUnexpectedTokenError() } } - is Token.Label -> { - parseLabel(token) + Token.Label -> { + parseLabel() hasLabel = true } - is Token.Section -> { - parseSection(token) + Token.CodeSection -> { + parseCodeSection() } - is Token.Int32 -> { + Token.DataSection -> { + parseDataSection() + } + Token.StrSection -> { + parseStrSection() + } + Token.Int32 -> { if (section === SegmentType.Data) { - parseBytes(token) + parseBytes() } else { - addUnexpectedTokenError(token) + addUnexpectedTokenError() } } - is Token.Str -> { + Token.Str -> { if (section === SegmentType.String) { - parseString(token) + parseString() } else { - addUnexpectedTokenError(token) + addUnexpectedTokenError() } } - is Token.InvalidSection -> { - addError(token, "Invalid section type.") + Token.InvalidSection -> { + addError("Invalid section type.") } - is Token.InvalidIdent -> { - addError(token, "Invalid identifier.") + Token.InvalidIdent -> { + addError("Invalid identifier.") } else -> { - addUnexpectedTokenError(token) + addUnexpectedTokenError() } } @@ -124,9 +130,9 @@ private class Assembler(private val asm: List, private val inlineStackAr private fun addInstruction( opcode: Opcode, args: List, - token: Token?, - argTokens: List, - stackArgTokens: List, + mnemonicSrcLoc: SrcLoc?, + argSrcLocs: List, + stackArgSrcLocs: List, ) { when (val seg = segment) { null -> { @@ -146,17 +152,9 @@ private class Assembler(private val asm: List, private val inlineStackAr opcode, args, InstructionSrcLoc( - mnemonic = token?.let { - SrcLoc(lineNo, token.col, token.len) - }, - // Use mapTo with ArrayList for better perf in JS. - args = argTokens.mapTo(ArrayList(argTokens.size)) { - SrcLoc(lineNo, it.col, it.len) - }, - // Use mapTo with ArrayList for better perf in JS. - stackArgs = stackArgTokens.mapTo(ArrayList(argTokens.size)) { - SrcLoc(lineNo, it.col, it.len) - }, + mnemonic = mnemonicSrcLoc, + args = argSrcLocs, + stackArgs = stackArgSrcLocs, ) ) ) @@ -233,40 +231,37 @@ private class Assembler(private val asm: List, private val inlineStackAr ) } - private fun addError(token: Token, uiMessage: String, message: String? = null) { - addError(token.col, token.len, uiMessage, message) + private fun addError(uiMessage: String, message: String? = null) { + addError(tokenizer.col, tokenizer.len, uiMessage, message) } - private fun addUnexpectedTokenError(token: Token) { + private fun addUnexpectedTokenError() { addError( - token, "Unexpected token.", - "Unexpected ${token::class.simpleName} at ${token.srcLoc()}.", + "Unexpected ${tokenizer.type?.name} at $lineNo:${tokenizer.col}.", ) } - private fun addWarning(token: Token, uiMessage: String) { + private fun addWarning(uiMessage: String) { result.addProblem( AssemblyProblem( Severity.Warning, uiMessage, lineNo = lineNo, - col = token.col, - len = token.len, + col = tokenizer.col, + len = tokenizer.len, ) ) } - private fun parseLabel(token: Token.Label) { - val label = token.value + private fun parseLabel() { + val label = tokenizer.intValue if (!labels.add(label)) { - addError(token, "Duplicate label.") + addError("Duplicate label.") } - val nextToken = tokens.removeFirstOrNull() - - val srcLoc = SrcLoc(lineNo, token.col, token.len) + val srcLoc = srcLocFromTokenizer() if (prevLineHadLabel) { val segment = ir.last() @@ -274,6 +269,8 @@ private class Assembler(private val asm: List, private val inlineStackAr segment.srcLoc.labels.add(srcLoc) } + tokenizer.nextToken() + when (section) { SegmentType.Instructions -> { if (!prevLineHadLabel) { @@ -286,12 +283,10 @@ private class Assembler(private val asm: List, private val inlineStackAr ir.add(segment!!) } - if (nextToken != null) { - if (nextToken is Token.Ident) { - parseInstruction(nextToken) - } else { - addError(nextToken, "Expected opcode mnemonic.") - } + if (tokenizer.type === Token.Ident) { + parseInstruction() + } else if (tokenizer.type != null) { + addError("Expected opcode mnemonic.") } } @@ -305,12 +300,10 @@ private class Assembler(private val asm: List, private val inlineStackAr ir.add(segment!!) } - if (nextToken != null) { - if (nextToken is Token.Int32) { - parseBytes(nextToken) - } else { - addError(nextToken, "Expected bytes.") - } + if (tokenizer.type === Token.Int32) { + parseBytes() + } else if (tokenizer.type != null) { + addError("Expected bytes.") } } @@ -325,194 +318,86 @@ private class Assembler(private val asm: List, private val inlineStackAr ir.add(segment!!) } - if (nextToken != null) { - if (nextToken is Token.Str) { - parseString(nextToken) - } else { - addError(nextToken, "Expected a string.") - } + if (tokenizer.type === Token.Str) { + parseString() + } else if (tokenizer.type != null) { + addError("Expected a string.") } } } } - private fun parseSection(token: Token.Section) { - val section = when (token) { - is Token.Section.Code -> SegmentType.Instructions - is Token.Section.Data -> SegmentType.Data - is Token.Section.Str -> SegmentType.String - } + private fun parseCodeSection() { + parseSection(SegmentType.Instructions) + } + private fun parseDataSection() { + parseSection(SegmentType.Data) + } + + private fun parseStrSection() { + parseSection(SegmentType.String) + } + + private fun parseSection(section: SegmentType) { if (this.section == section && !firstSectionMarker) { - addWarning(token, "Unnecessary section marker.") + addWarning("Unnecessary section marker.") } this.section = section firstSectionMarker = false - tokens.removeFirstOrNull()?.let { nextToken -> - addUnexpectedTokenError(nextToken) + if (tokenizer.nextToken()) { + addUnexpectedTokenError() } } - private fun parseInstruction(identToken: Token.Ident) { - val opcode = mnemonicToOpcode(identToken.value) + private fun parseInstruction() { + val opcode = mnemonicToOpcode(tokenizer.strValue) + val mnemonicSrcLoc = srcLocFromTokenizer() if (opcode == null) { - addError(identToken, "Unknown opcode.") + addError("Unknown opcode.") } else { - // Use find instead of any for better JS perf. - val varargs = opcode.params.find { - it.type === ILabelVarType || it.type === RegRefVarType - } != null - - val paramCount = - if (!inlineStackArgs && opcode.stack === StackInteraction.Pop) 0 - else opcode.params.size - - // Use fold instead of count for better JS perf. - val argCount = tokens.fold(0) { sum, token -> - if (token.isArgSeparator()) sum else sum + 1 - } - - val lastToken = tokens.lastOrNull() - val errorLength = lastToken?.let { it.col + it.len - identToken.col } ?: 0 // Inline arguments. val inlineArgs = mutableListOf() - val inlineTokens = mutableListOf() + val inlineArgSrcLocs = mutableListOf() // Stack arguments. val stackArgs = mutableListOf() - val stackTokens = mutableListOf() + val stackArgSrcLocs = mutableListOf() - if (!varargs && argCount != paramCount) { - addError( - identToken.col, - errorLength, - "Expected $paramCount argument${ - if (paramCount == 1) "" else "s" - }, got $argCount.", - ) - - return - } else if (varargs && argCount < paramCount) { - // TODO: This check assumes we want at least 1 argument for a vararg parameter. - // Is this correct? - addError( - identToken.col, - errorLength, - "Expected at least $paramCount argument${ - if (paramCount == 1) "" else "s" - }, got $argCount.", - ) - - return - } else if (opcode.stack !== StackInteraction.Pop) { + if (opcode.stack !== StackInteraction.Pop) { // Arguments should be inlined right after the opcode. - if (!parseArgs(opcode.params, inlineArgs, inlineTokens, stack = false)) { + if (!parseArgs( + opcode, + mnemonicSrcLoc.col, + inlineArgs, + inlineArgSrcLocs, + stack = false, + ) + ) { return } } else { // Arguments should be passed to the opcode via the stack. - if (!parseArgs(opcode.params, stackArgs, stackTokens, stack = true)) { + if (!parseArgs( + opcode, + mnemonicSrcLoc.col, + stackArgs, + stackArgSrcLocs, + stack = true, + ) + ) { return } - - for (i in opcode.params.indices) { - val param = opcode.params[i] - val arg = stackArgs.getOrNull(i) ?: continue - val argToken = stackTokens.getOrNull(i) ?: continue - - if (argToken.isRegister()) { - if (param.type is RegTupRefType) { - addInstruction( - OP_ARG_PUSHB, - listOf(arg), - null, - listOf(argToken), - emptyList(), - ) - } else { - addInstruction( - OP_ARG_PUSHR, - listOf(arg), - null, - listOf(argToken), - emptyList(), - ) - } - } else { - when (param.type) { - ByteType, - RegRefType, - is RegTupRefType, - -> { - addInstruction( - OP_ARG_PUSHB, - listOf(arg), - null, - listOf(argToken), - emptyList(), - ) - } - - ShortType, - is LabelType, - -> { - addInstruction( - OP_ARG_PUSHW, - listOf(arg), - null, - listOf(argToken), - emptyList(), - ) - } - - IntType -> { - addInstruction( - OP_ARG_PUSHL, - listOf(arg), - null, - listOf(argToken), - emptyList(), - ) - } - - FloatType -> { - addInstruction( - OP_ARG_PUSHL, - listOf(Arg((arg.value as Float).toRawBits())), - null, - listOf(argToken), - emptyList(), - ) - } - - StringType -> { - addInstruction( - OP_ARG_PUSHS, - listOf(arg), - null, - listOf(argToken), - emptyList(), - ) - } - - else -> { - logger.error { - "Line $lineNo: Type ${param.type::class} not implemented." - } - } - } - } - } } addInstruction( opcode, inlineArgs, - identToken, - inlineTokens, - stackTokens, + mnemonicSrcLoc, + inlineArgSrcLocs, + stackArgSrcLocs, ) } } @@ -521,155 +406,283 @@ private class Assembler(private val asm: List, private val inlineStackAr * Returns true iff arguments can be translated to byte code, possibly after truncation. */ private fun parseArgs( - params: List, + opcode: Opcode, + startCol: Int, args: MutableList, - argTokens: MutableList, + srcLocs: MutableList, stack: Boolean, ): Boolean { + var varargs = false + var argCount = 0 var semiValid = true var shouldBeArg = true var paramI = 0 + var prevCol = 0 + var prevLen = 0 - for (i in 0 until tokens.size) { - val token = tokens[i] - val param = params[paramI] + while (tokenizer.nextToken()) { + if (tokenizer.type !== Token.ArgSeparator) { + argCount++ + } - if (token.isArgSeparator()) { - if (shouldBeArg) { - addError(token, "Expected an argument.") - } else if ( - param.type !== ILabelVarType && - param.type !== RegRefVarType - ) { - paramI++ + if (paramI < opcode.params.size) { + val param = opcode.params[paramI] + + if (param.type === ILabelVarType || param.type === RegRefVarType) { + // A varargs parameter is always the last parameter. + varargs = true } - shouldBeArg = true - } else { - if (!shouldBeArg) { - val prevToken = tokens[i - 1] - val col = prevToken.col + prevToken.len + if (tokenizer.type === Token.ArgSeparator) { + if (shouldBeArg) { + addError("Expected an argument.") + } else if (!varargs) { + paramI++ + } - addError(col, token.col - col, "Expected a comma.") - } + shouldBeArg = true + } else { + if (!shouldBeArg) { + val col = prevCol + prevLen + addError(col, tokenizer.col - col, "Expected a comma.") + } - shouldBeArg = false + shouldBeArg = false - var match: Boolean + // Try to match token type parameter type. + var typeMatch: Boolean - when { - token.isInt32() -> { - when (param.type) { - ByteType -> { - match = true - parseInt(1, token, args, argTokens) + // If arg is nonnull, types match and argument is syntactically valid. + val arg: Arg? = when (tokenizer.type) { + Token.Int32 -> { + when (param.type) { + ByteType -> { + typeMatch = true + parseInt(1) + } + ShortType, + is LabelType, + -> { + typeMatch = true + parseInt(2) + } + IntType -> { + typeMatch = true + parseInt(4) + } + FloatType -> { + typeMatch = true + Arg(tokenizer.intValue.toFloat()) + } + else -> { + typeMatch = false + null + } } - ShortType, - is LabelType, - -> { - match = true - parseInt(2, token, args, argTokens) + } + + Token.Float32 -> { + typeMatch = param.type === FloatType + + if (typeMatch) { + Arg(tokenizer.floatValue) + } else { + null } - IntType -> { - match = true - parseInt(4, token, args, argTokens) - } - FloatType -> { - match = true - args.add(Arg(token.value)) - argTokens.add(token) - } - else -> { - match = false + } + + Token.Register -> { + typeMatch = stack || + param.type === RegRefType || + param.type === RegRefVarType || + param.type is RegTupRefType + + parseRegister() + } + + Token.Str -> { + typeMatch = param.type === StringType + + if (typeMatch) { + Arg(tokenizer.strValue) + } else { + null } } + + else -> { + typeMatch = false + null + } } - token.isFloat32() -> { - match = param.type === FloatType + val srcLoc = srcLocFromTokenizer() - if (match) { - args.add(Arg(token.value)) - argTokens.add(token) + if (arg != null) { + args.add(arg) + srcLocs.add(srcLoc) + } + + if (!typeMatch) { + semiValid = false + + val typeStr: String? = when (param.type) { + ByteType -> "an 8-bit integer" + ShortType -> "a 16-bit integer" + IntType -> "a 32-bit integer" + FloatType -> "a float" + + ILabelType, + ILabelVarType, + -> "an instruction label" + + DLabelType -> "a data label" + SLabelType -> "a string label" + + is LabelType -> "a label" + + StringType -> "a string" + + RegRefType, + RegRefVarType, + is RegTupRefType, + -> "a register reference" + + else -> null + } + + addError( + if (typeStr == null) "Unexpected token." else "Expected ${typeStr}." + ) + } else if (stack && arg != null) { + // Inject stack push instructions if necessary. + // If the token is a register, push it as a register, otherwise coerce type. + if (tokenizer.type === Token.Register) { + if (param.type is RegTupRefType) { + addInstruction( + OP_ARG_PUSHB, + listOf(arg), + null, + listOf(srcLoc), + emptyList(), + ) + } else { + addInstruction( + OP_ARG_PUSHR, + listOf(arg), + null, + listOf(srcLoc), + emptyList(), + ) + } + } else { + when (param.type) { + ByteType, + RegRefType, + is RegTupRefType, + -> { + addInstruction( + OP_ARG_PUSHB, + listOf(arg), + null, + listOf(srcLoc), + emptyList(), + ) + } + + ShortType, + is LabelType, + -> { + addInstruction( + OP_ARG_PUSHW, + listOf(arg), + null, + listOf(srcLoc), + emptyList(), + ) + } + + IntType -> { + addInstruction( + OP_ARG_PUSHL, + listOf(arg), + null, + listOf(srcLoc), + emptyList(), + ) + } + + FloatType -> { + addInstruction( + OP_ARG_PUSHL, + listOf(Arg((arg.value as Float).toRawBits())), + null, + listOf(srcLoc), + emptyList(), + ) + } + + StringType -> { + addInstruction( + OP_ARG_PUSHS, + listOf(arg), + null, + listOf(srcLoc), + emptyList(), + ) + } + + else -> { + logger.error { + "Line $lineNo: Type ${param.type::class} not implemented." + } + } + } } } - - token.isRegister() -> { - match = stack || - param.type === RegRefType || - param.type === RegRefVarType || - param.type is RegTupRefType - - parseRegister(token, args, argTokens) - } - - token.isStr() -> { - match = param.type === StringType - - if (match) { - args.add(Arg(token.value)) - argTokens.add(token) - } - } - - else -> { - match = false - } - } - - if (!match) { - semiValid = false - - val typeStr: String? = when (param.type) { - ByteType -> "an 8-bit integer" - ShortType -> "a 16-bit integer" - IntType -> "a 32-bit integer" - FloatType -> "a float" - - ILabelType, - ILabelVarType, - -> "an instruction label" - - DLabelType -> "a data label" - SLabelType -> "a string label" - - is LabelType -> "a label" - - StringType -> "a string" - - RegRefType, - RegRefVarType, - is RegTupRefType, - -> "a register reference" - - else -> null - } - - addError( - token, - if (typeStr == null) "Unexpected token." else "Expected ${typeStr}." - ) } } + + prevCol = tokenizer.col + prevLen = tokenizer.len + } + + val paramCount = + if (!inlineStackArgs && opcode.stack === StackInteraction.Pop) 0 + else opcode.params.size + + val errorLength = prevCol + prevLen - startCol + + if (!varargs && argCount != paramCount) { + addError( + startCol, + errorLength, + "Expected $paramCount argument${ + if (paramCount == 1) "" else "s" + }, got $argCount.", + ) + } else if (varargs && argCount < paramCount) { + // TODO: This check assumes we want at least 1 argument for a vararg parameter. + // Is this correct? + addError( + startCol, + errorLength, + "Expected at least $paramCount argument${ + if (paramCount == 1) "" else "s" + }, got $argCount.", + ) } - tokens.clear() return semiValid } - private fun parseInt( - size: Int, - token: Token.Int32, - args: MutableList, - argTokens: MutableList, - ) { - val value = token.value + private fun parseInt(size: Int): Arg? { + val value = tokenizer.intValue // Fast-path 32-bit ints for improved JS perf. Otherwise maxValue would have to be a Long // or UInt, which incurs a perf hit in JS. if (size == 4) { - args.add(Arg(value)) - argTokens.add(token) + return Arg(value) } else { val bitSize = 8 * size // Minimum of the signed version of this integer type. @@ -677,71 +690,64 @@ private class Assembler(private val asm: List, private val inlineStackAr // Maximum of the unsigned version of this integer type. val maxValue = (1 shl (bitSize)) - 1 - when { + return when { value < minValue -> { - addError(token, "${bitSize}-Bit integer can't be less than ${minValue}.") + addError("${bitSize}-Bit integer can't be less than ${minValue}.") + null } value > maxValue -> { - addError(token, "${bitSize}-Bit integer can't be greater than ${maxValue}.") + addError("${bitSize}-Bit integer can't be greater than ${maxValue}.") + null } else -> { - args.add(Arg(value)) - argTokens.add(token) + Arg(value) } } } } - private fun parseRegister( - token: Token.Register, - args: MutableList, - argTokens: MutableList, - ) { - val value = token.value + private fun parseRegister(): Arg? { + val value = tokenizer.intValue - if (value > 255) { - addError(token, "Invalid register reference, expected r0-r255.") + return if (value > 255) { + addError("Invalid register reference, expected r0-r255.") + null } else { - args.add(Arg(value)) - argTokens.add(token) + Arg(value) } } - private fun parseBytes(firstToken: Token.Int32) { + private fun parseBytes() { val bytes = mutableListOf() - var token: Token = firstToken - var i = 0 - while (token is Token.Int32) { - if (token.value < 0) { - addError(token, "Unsigned 8-bit integer can't be less than 0.") - } else if (token.value > 255) { - addError(token, "Unsigned 8-bit integer can't be greater than 255.") + while (tokenizer.type === Token.Int32) { + val value = tokenizer.intValue + + if (value < 0) { + addError("Unsigned 8-bit integer can't be less than 0.") + } else if (value > 255) { + addError("Unsigned 8-bit integer can't be greater than 255.") } - bytes.add(token.value.toByte()) + bytes.add(value.toByte()) - if (i < tokens.size) { - token = tokens[i++] - } else { - break - } + tokenizer.nextToken() } - if (i < tokens.size) { - addError(token, "Expected an unsigned 8-bit integer.") + if (tokenizer.type != null) { + addError("Expected an unsigned 8-bit integer.") } addBytes(bytes.toByteArray()) } - private fun parseString(token: Token.Str) { - tokens.removeFirstOrNull()?.let { nextToken -> - addUnexpectedTokenError(nextToken) - } + private fun parseString() { + addString(tokenizer.strValue.replace("\n", "")) - addString(token.value.replace("\n", "")) + if (tokenizer.nextToken()) { + addUnexpectedTokenError() + } } - private fun Token.srcLoc(): String = "$lineNo:$col" + private fun srcLocFromTokenizer(): SrcLoc = SrcLoc(lineNo, tokenizer.col, tokenizer.len) } diff --git a/lib/src/commonTest/kotlin/world/phantasmal/lib/asm/AsmTokenizationTests.kt b/lib/src/commonTest/kotlin/world/phantasmal/lib/asm/AsmTokenizationTests.kt index eb697be7..c693a01a 100644 --- a/lib/src/commonTest/kotlin/world/phantasmal/lib/asm/AsmTokenizationTests.kt +++ b/lib/src/commonTest/kotlin/world/phantasmal/lib/asm/AsmTokenizationTests.kt @@ -4,82 +4,107 @@ import world.phantasmal.lib.test.LibTestSuite import world.phantasmal.testUtils.assertCloseTo import kotlin.test.Test import kotlin.test.assertEquals +import kotlin.test.assertFalse +import kotlin.test.assertTrue class AsmTokenizationTests : LibTestSuite { @Test fun hexadecimal_numbers_are_parsed_as_ints() { - assertEquals(0x00, (tokenizeLine("0X00")[0] as Token.Int32).value) - assertEquals(0x70, (tokenizeLine("0x70")[0] as Token.Int32).value) - assertEquals(0xA1, (tokenizeLine("0xa1")[0] as Token.Int32).value) - assertEquals(0xAB, (tokenizeLine("0xAB")[0] as Token.Int32).value) - assertEquals(0xAB, (tokenizeLine("0xAb")[0] as Token.Int32).value) - assertEquals(0xAB, (tokenizeLine("0xaB")[0] as Token.Int32).value) - assertEquals(0xFF, (tokenizeLine("0xff")[0] as Token.Int32).value) + val tokenizer = LineTokenizer() + + tokenizer.testInt("0X00", 0x00) + tokenizer.testInt("0x70", 0x70) + tokenizer.testInt("0xa1", 0xA1) + tokenizer.testInt("0xAB", 0xAB) + tokenizer.testInt("0xAb", 0xAB) + tokenizer.testInt("0xaB", 0xAB) + tokenizer.testInt("0xff", 0xFF) + } + + private fun LineTokenizer.testInt(line: String, value: Int) { + tokenize(line) + assertTrue(nextToken()) + assertEquals(Token.Int32, type) + assertEquals(value, intValue) + assertFalse(nextToken()) } @Test fun valid_floats_are_parsed_as_Float32_tokens() { - assertCloseTo(808.9f, (tokenizeLine("808.9")[0] as Token.Float32).value) - assertCloseTo(-0.9f, (tokenizeLine("-0.9")[0] as Token.Float32).value) - assertCloseTo(0.001f, (tokenizeLine("1e-3")[0] as Token.Float32).value) - assertCloseTo(-600.0f, (tokenizeLine("-6e2")[0] as Token.Float32).value) + val tokenizer = LineTokenizer() + + tokenizer.testFloat("808.9", 808.9f) + tokenizer.testFloat("-0.9", -0.9f) + tokenizer.testFloat("1e-3", 0.001f) + tokenizer.testFloat("-6e2", -600.0f) + } + + private fun LineTokenizer.testFloat(line: String, value: Float) { + tokenize(line) + assertTrue(nextToken()) + assertEquals(Token.Float32, type) + assertCloseTo(value, floatValue) + assertFalse(nextToken()) } @Test fun invalid_floats_area_parsed_as_InvalidNumber_tokens_or_InvalidSection_tokens() { - val tokens1 = tokenizeLine(" 808.9a ") + val tokenizer = LineTokenizer() - assertEquals(1, tokens1.size) - assertEquals(Token.InvalidNumber::class, tokens1[0]::class) - assertEquals(2, tokens1[0].col) - assertEquals(6, tokens1[0].len) + tokenizer.testInvalidFloat(" 808.9a ", Token.InvalidNumber, col = 2, len = 6) + tokenizer.testInvalidFloat(" -55e ", Token.InvalidNumber, col = 3, len = 4) + tokenizer.testInvalidFloat(".7429", Token.InvalidSection, col = 1, len = 5) + tokenizer.testInvalidFloat( + "\t\t\t4. test", + Token.InvalidNumber, + col = 4, + len = 2, + extraTokens = 1, + ) + } - val tokens2 = tokenizeLine(" -55e ") - - assertEquals(1, tokens2.size) - assertEquals(Token.InvalidNumber::class, tokens2[0]::class) - assertEquals(3, tokens2[0].col) - assertEquals(4, tokens2[0].len) - - val tokens3 = tokenizeLine(".7429") - - assertEquals(1, tokens3.size) - assertEquals(Token.InvalidSection::class, tokens3[0]::class) - assertEquals(1, tokens3[0].col) - assertEquals(5, tokens3[0].len) - - val tokens4 = tokenizeLine("\t\t\t4. test") - - assertEquals(2, tokens4.size) - assertEquals(Token.InvalidNumber::class, tokens4[0]::class) - assertEquals(4, tokens4[0].col) - assertEquals(2, tokens4[0].len) + private fun LineTokenizer.testInvalidFloat( + line: String, + type: Token, + col: Int, + len: Int, + extraTokens: Int = 0, + ) { + tokenize(line) + assertTrue(nextToken()) + assertEquals(type, this.type) + assertEquals(col, this.col) + assertEquals(len, this.len) + repeat(extraTokens) { assertTrue(nextToken()) } + assertFalse(nextToken()) } @Test fun strings_are_parsed_as_Str_tokens() { - val tokens0 = tokenizeLine(""" "one line" """) + val tokenizer = LineTokenizer() - assertEquals(1, tokens0.size) - assertEquals(Token.Str::class, tokens0[0]::class) - assertEquals("one line", (tokens0[0] as Token.Str).value) - assertEquals(2, tokens0[0].col) - assertEquals(10, tokens0[0].len) + tokenizer.testString(""" "one line" """, "one line", col = 2, len = 10) + tokenizer.testString(""" "two\nlines" """, "two\nlines", col = 2, len = 12) + tokenizer.testString( + """ "is \"this\" escaped?" """, + "is \"this\" escaped?", + col = 2, + len = 22, + ) + } - val tokens1 = tokenizeLine(""" "two\nlines" """) - - assertEquals(1, tokens1.size) - assertEquals(Token.Str::class, tokens1[0]::class) - assertEquals("two\nlines", (tokens1[0] as Token.Str).value) - assertEquals(2, tokens1[0].col) - assertEquals(12, tokens1[0].len) - - val tokens2 = tokenizeLine(""" "is \"this\" escaped?" """) - - assertEquals(1, tokens2.size) - assertEquals(Token.Str::class, tokens2[0]::class) - assertEquals("is \"this\" escaped?", (tokens2[0] as Token.Str).value) - assertEquals(2, tokens2[0].col) - assertEquals(22, tokens2[0].len) + private fun LineTokenizer.testString( + line: String, + value: String, + col: Int, + len: Int, + ) { + tokenize(line) + assertTrue(nextToken()) + assertEquals(Token.Str, this.type) + assertEquals(value, this.strValue) + assertEquals(col, this.col) + assertEquals(len, this.len) + assertFalse(nextToken()) } } diff --git a/lib/src/commonTest/kotlin/world/phantasmal/lib/asm/AssemblyTests.kt b/lib/src/commonTest/kotlin/world/phantasmal/lib/asm/AssemblyTests.kt index a8f3b1a9..50e54975 100644 --- a/lib/src/commonTest/kotlin/world/phantasmal/lib/asm/AssemblyTests.kt +++ b/lib/src/commonTest/kotlin/world/phantasmal/lib/asm/AssemblyTests.kt @@ -4,12 +4,14 @@ import world.phantasmal.core.Success import world.phantasmal.lib.test.LibTestSuite import world.phantasmal.lib.test.assertDeepEquals import kotlin.test.Test +import kotlin.test.assertEquals import kotlin.test.assertTrue class AssemblyTests : LibTestSuite { @Test fun basic_script() { - val result = assemble(""" + val result = assemble( + """ 0: set_episode 0 bb_map_designate 1, 2, 3, 4 @@ -18,236 +20,297 @@ class AssemblyTests : LibTestSuite { 150: set_mainwarp 1 ret - """.trimIndent().split('\n')) + """.trimIndent().split('\n') + ) assertTrue(result is Success) assertTrue(result.problems.isEmpty()) - assertDeepEquals(BytecodeIr(listOf( - InstructionSegment( - labels = mutableListOf(0), - instructions = mutableListOf( - Instruction( - opcode = OP_SET_EPISODE, - args = listOf(Arg(0)), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(2, 5, 11), - args = listOf(SrcLoc(2, 17, 1)), - stackArgs = emptyList(), - ), - ), - Instruction( - opcode = OP_BB_MAP_DESIGNATE, - args = listOf(Arg(1), Arg(2), Arg(3), Arg(4)), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(3, 5, 16), - args = listOf( - SrcLoc(3, 22, 1), - SrcLoc(3, 25, 1), - SrcLoc(3, 28, 1), - SrcLoc(3, 31, 1), + assertDeepEquals( + BytecodeIr( + listOf( + InstructionSegment( + labels = mutableListOf(0), + instructions = mutableListOf( + Instruction( + opcode = OP_SET_EPISODE, + args = listOf(Arg(0)), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(2, 5, 11), + args = listOf(SrcLoc(2, 17, 1)), + stackArgs = emptyList(), + ), ), - stackArgs = emptyList(), - ), - ), - Instruction( - opcode = OP_ARG_PUSHL, - args = listOf(Arg(0)), - srcLoc = InstructionSrcLoc( - mnemonic = null, - args = listOf(SrcLoc(4, 23, 1)), - stackArgs = emptyList(), - ), - ), - Instruction( - opcode = OP_ARG_PUSHW, - args = listOf(Arg(150)), - srcLoc = InstructionSrcLoc( - mnemonic = null, - args = listOf(SrcLoc(4, 26, 3)), - stackArgs = emptyList(), - ), - ), - Instruction( - opcode = OP_SET_FLOOR_HANDLER, - args = emptyList(), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(4, 5, 17), - args = emptyList(), - stackArgs = listOf( - SrcLoc(4, 23, 1), - SrcLoc(4, 26, 3), + Instruction( + opcode = OP_BB_MAP_DESIGNATE, + args = listOf(Arg(1), Arg(2), Arg(3), Arg(4)), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(3, 5, 16), + args = listOf( + SrcLoc(3, 22, 1), + SrcLoc(3, 25, 1), + SrcLoc(3, 28, 1), + SrcLoc(3, 31, 1), + ), + stackArgs = emptyList(), + ), + ), + Instruction( + opcode = OP_ARG_PUSHL, + args = listOf(Arg(0)), + srcLoc = InstructionSrcLoc( + mnemonic = null, + args = listOf(SrcLoc(4, 23, 1)), + stackArgs = emptyList(), + ), + ), + Instruction( + opcode = OP_ARG_PUSHW, + args = listOf(Arg(150)), + srcLoc = InstructionSrcLoc( + mnemonic = null, + args = listOf(SrcLoc(4, 26, 3)), + stackArgs = emptyList(), + ), + ), + Instruction( + opcode = OP_SET_FLOOR_HANDLER, + args = emptyList(), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(4, 5, 17), + args = emptyList(), + stackArgs = listOf( + SrcLoc(4, 23, 1), + SrcLoc(4, 26, 3), + ), + ), + ), + Instruction( + opcode = OP_RET, + args = emptyList(), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(5, 5, 3), + args = emptyList(), + stackArgs = emptyList(), + ), ), ), + srcLoc = SegmentSrcLoc(labels = mutableListOf(SrcLoc(1, 1, 2))), ), - Instruction( - opcode = OP_RET, - args = emptyList(), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(5, 5, 3), - args = emptyList(), - stackArgs = emptyList(), + InstructionSegment( + labels = mutableListOf(150), + instructions = mutableListOf( + Instruction( + opcode = OP_ARG_PUSHL, + args = listOf(Arg(1)), + srcLoc = InstructionSrcLoc( + mnemonic = null, + args = listOf(SrcLoc(7, 18, 1)), + stackArgs = emptyList(), + ), + ), + Instruction( + opcode = OP_SET_MAINWARP, + args = emptyList(), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(7, 5, 12), + args = emptyList(), + stackArgs = listOf(SrcLoc(7, 18, 1)), + ), + ), + Instruction( + opcode = OP_RET, + args = emptyList(), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(8, 5, 3), + args = emptyList(), + stackArgs = emptyList(), + ), + ), ), - ), - ), - srcLoc = SegmentSrcLoc(labels = mutableListOf(SrcLoc(1, 1, 2))), + srcLoc = SegmentSrcLoc(labels = mutableListOf(SrcLoc(6, 1, 4))), + ) + ) ), - InstructionSegment( - labels = mutableListOf(150), - instructions = mutableListOf( - Instruction( - opcode = OP_ARG_PUSHL, - args = listOf(Arg(1)), - srcLoc = InstructionSrcLoc( - mnemonic = null, - args = listOf(SrcLoc(7, 18, 1)), - stackArgs = emptyList(), - ), - ), - Instruction( - opcode = OP_SET_MAINWARP, - args = emptyList(), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(7, 5, 12), - args = emptyList(), - stackArgs = listOf(SrcLoc(7, 18, 1)), - ), - ), - Instruction( - opcode = OP_RET, - args = emptyList(), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(8, 5, 3), - args = emptyList(), - stackArgs = emptyList(), - ), - ), - ), - srcLoc = SegmentSrcLoc(labels = mutableListOf(SrcLoc(6, 1, 4))), - ) - )), result.value) + result.value + ) } @Test fun pass_register_value_via_stack_with_inline_args() { - val result = assemble(""" + val result = assemble( + """ 0: leti r255, 7 exit r255 ret - """.trimIndent().split('\n')) + """.trimIndent().split('\n') + ) assertTrue(result is Success) assertTrue(result.problems.isEmpty()) - assertDeepEquals(BytecodeIr( - listOf( - InstructionSegment( - labels = mutableListOf(0), - instructions = mutableListOf( - Instruction( - opcode = OP_LETI, - args = listOf(Arg(255), Arg(7)), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(2, 5, 4), - args = listOf(SrcLoc(2, 10, 4), SrcLoc(2, 16, 1)), - stackArgs = emptyList(), + assertDeepEquals( + BytecodeIr( + listOf( + InstructionSegment( + labels = mutableListOf(0), + instructions = mutableListOf( + Instruction( + opcode = OP_LETI, + args = listOf(Arg(255), Arg(7)), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(2, 5, 4), + args = listOf(SrcLoc(2, 10, 4), SrcLoc(2, 16, 1)), + stackArgs = emptyList(), + ), ), - ), - Instruction( - opcode = OP_ARG_PUSHR, - args = listOf(Arg(255)), - srcLoc = InstructionSrcLoc( - mnemonic = null, - args = listOf(SrcLoc(3, 10, 4)), - stackArgs = emptyList(), + Instruction( + opcode = OP_ARG_PUSHR, + args = listOf(Arg(255)), + srcLoc = InstructionSrcLoc( + mnemonic = null, + args = listOf(SrcLoc(3, 10, 4)), + stackArgs = emptyList(), + ), ), - ), - Instruction( - opcode = OP_EXIT, - args = emptyList(), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(3, 5, 4), + Instruction( + opcode = OP_EXIT, args = emptyList(), - stackArgs = listOf(SrcLoc(3, 10, 4)), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(3, 5, 4), + args = emptyList(), + stackArgs = listOf(SrcLoc(3, 10, 4)), + ), ), - ), - Instruction( - opcode = OP_RET, - args = emptyList(), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(4, 5, 3), + Instruction( + opcode = OP_RET, args = emptyList(), - stackArgs = emptyList(), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(4, 5, 3), + args = emptyList(), + stackArgs = emptyList(), + ), ), ), - ), - srcLoc = SegmentSrcLoc( - labels = mutableListOf(SrcLoc(1, 1, 2)) - ), + srcLoc = SegmentSrcLoc( + labels = mutableListOf(SrcLoc(1, 1, 2)) + ), + ) ) - ) - ), result.value) + ), + result.value + ) } @Test fun pass_register_reference_via_stack_with_inline_args() { - val result = assemble(""" + val result = assemble( + """ 0: p_dead_v3 r200, 3 ret - """.trimIndent().split('\n')) + """.trimIndent().split('\n') + ) assertTrue(result is Success) assertTrue(result.problems.isEmpty()) - assertDeepEquals(BytecodeIr( - listOf( - InstructionSegment( - labels = mutableListOf(0), - instructions = mutableListOf( - Instruction( - opcode = OP_ARG_PUSHB, - args = listOf(Arg(200)), - srcLoc = InstructionSrcLoc( - mnemonic = null, - args = listOf(SrcLoc(2, 15, 4)), - stackArgs = emptyList(), + assertDeepEquals( + BytecodeIr( + listOf( + InstructionSegment( + labels = mutableListOf(0), + instructions = mutableListOf( + Instruction( + opcode = OP_ARG_PUSHB, + args = listOf(Arg(200)), + srcLoc = InstructionSrcLoc( + mnemonic = null, + args = listOf(SrcLoc(2, 15, 4)), + stackArgs = emptyList(), + ), ), - ), - Instruction( - opcode = OP_ARG_PUSHL, - args = listOf(Arg(3)), - srcLoc = InstructionSrcLoc( - mnemonic = null, - args = listOf(SrcLoc(2, 21, 1)), - stackArgs = emptyList(), + Instruction( + opcode = OP_ARG_PUSHL, + args = listOf(Arg(3)), + srcLoc = InstructionSrcLoc( + mnemonic = null, + args = listOf(SrcLoc(2, 21, 1)), + stackArgs = emptyList(), + ), ), - ), - Instruction( - opcode = OP_P_DEAD_V3, - args = emptyList(), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(2, 5, 9), + Instruction( + opcode = OP_P_DEAD_V3, args = emptyList(), - stackArgs = listOf(SrcLoc(2, 15, 4), SrcLoc(2, 21, 1)), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(2, 5, 9), + args = emptyList(), + stackArgs = listOf(SrcLoc(2, 15, 4), SrcLoc(2, 21, 1)), + ), ), - ), - Instruction( - opcode = OP_RET, - args = emptyList(), - srcLoc = InstructionSrcLoc( - mnemonic = SrcLoc(3, 5, 3), + Instruction( + opcode = OP_RET, args = emptyList(), - stackArgs = emptyList(), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(3, 5, 3), + args = emptyList(), + stackArgs = emptyList(), + ), ), ), - ), - srcLoc = SegmentSrcLoc( - labels = mutableListOf(SrcLoc(1, 1, 2)) - ), + srcLoc = SegmentSrcLoc( + labels = mutableListOf(SrcLoc(1, 1, 2)) + ), + ) ) - ) - ), result.value) + ), + result.value + ) + } + + @Test + fun too_many_arguments() { + val result = assemble( + """ + 0: + ret 100 + """.trimIndent().split('\n') + ) + + assertTrue(result is Success) + assertEquals(1, result.problems.size) + + assertDeepEquals( + BytecodeIr( + listOf( + InstructionSegment( + labels = mutableListOf(0), + instructions = mutableListOf( + Instruction( + opcode = OP_RET, + args = emptyList(), + srcLoc = InstructionSrcLoc( + mnemonic = SrcLoc(2, 5, 3), + args = emptyList(), + stackArgs = emptyList(), + ), + ), + ), + srcLoc = SegmentSrcLoc( + labels = mutableListOf(SrcLoc(1, 1, 2)) + ), + ), + ), + ), + result.value + ) + + val problem = result.problems.first() + assertTrue(problem is AssemblyProblem) + assertEquals(2, problem.lineNo) + assertEquals(5, problem.col) + assertEquals(7, problem.len) + assertEquals("Expected 0 arguments, got 1. At 2:5.", problem.message) } } diff --git a/web/assembly-worker/src/main/kotlin/world/phantasmal/web/assemblyWorker/AssemblyWorker.kt b/web/assembly-worker/src/main/kotlin/world/phantasmal/web/assemblyWorker/AssemblyWorker.kt index 75040749..c1af4264 100644 --- a/web/assembly-worker/src/main/kotlin/world/phantasmal/web/assemblyWorker/AssemblyWorker.kt +++ b/web/assembly-worker/src/main/kotlin/world/phantasmal/web/assemblyWorker/AssemblyWorker.kt @@ -17,6 +17,7 @@ private val logger = KotlinLogging.logger {} class AssemblyWorker(private val sendMessage: (ServerMessage) -> Unit) { private val messageQueue: MutableList = mutableListOf() private val messageProcessingThrottle = Throttle(wait = 100) + private val tokenizer = LineTokenizer() // User input. private var inlineStackArgs: Boolean = true @@ -288,24 +289,22 @@ class AssemblyWorker(private val sendMessage: (ServerMessage) -> Unit) { var activeParam = -1 getLine(lineNo)?.let { text -> - val tokens = tokenizeLine(text) + tokenizer.tokenize(text) - tokens.find { it is Token.Ident }?.let { ident -> - ident as Token.Ident - - mnemonicToOpcode(ident.value)?.let { opcode -> - signature = getSignature(opcode) - - for (tkn in tokens) { - if (tkn.col + tkn.len > col) { - break - } else if (tkn is Token.Ident && activeParam == -1) { - activeParam = 0 - } else if (tkn is Token.ArgSeparator) { - activeParam++ - } + while (tokenizer.nextToken()) { + if (tokenizer.type === Token.Ident) { + mnemonicToOpcode(tokenizer.strValue)?.let { opcode -> + signature = getSignature(opcode) } } + + if (tokenizer.col + tokenizer.len > col) { + break + } else if (tokenizer.type === Token.Ident && activeParam == -1) { + activeParam = 0 + } else if (tokenizer.type === Token.ArgSeparator) { + activeParam++ + } } }