297 lines
9.5 KiB
Kotlin
297 lines
9.5 KiB
Kotlin
package net.sergeych.ling
|
|
|
|
val digitsSet = ('0'..'9').toSet()
|
|
val digits = { d: Char -> d in digitsSet }
|
|
val hexDigits = digitsSet + ('a'..'f') + ('A'..'F')
|
|
val idNextChars = { d: Char -> d.isLetter() || d == '_' || d.isDigit()}
|
|
|
|
@Suppress("unused")
|
|
val idFirstChars = { d: Char -> d.isLetter() || d == '_' }
|
|
|
|
fun parseLing(source: Source): List<Token> {
|
|
val p = Parser(fromPos = source.startPos)
|
|
val tokens = mutableListOf<Token>()
|
|
do {
|
|
val t = p.nextToken()
|
|
tokens += t
|
|
} while (t.type != Token.Type.EOF)
|
|
return tokens
|
|
}
|
|
|
|
private class Parser(fromPos: Pos) {
|
|
|
|
private val pos = MutablePos(fromPos)
|
|
|
|
/**
|
|
* Immutable copy of current position
|
|
*/
|
|
private val currentPos: Pos get() = pos.toPos()
|
|
|
|
private fun raise(msg: String): Nothing = throw ScriptError(currentPos, msg)
|
|
|
|
fun nextToken(): Token {
|
|
skipws()
|
|
if (pos.end) return Token("", currentPos, Token.Type.EOF)
|
|
val from = currentPos
|
|
return when (val ch = pos.currentChar.also { advance() }) {
|
|
'(' -> Token("(", from, Token.Type.LPAREN)
|
|
')' -> Token(")", from, Token.Type.RPAREN)
|
|
'{' -> Token("{", from, Token.Type.LBRACE)
|
|
'}' -> Token("}", from, Token.Type.RBRACE)
|
|
'[' -> Token("[", from, Token.Type.LBRACKET)
|
|
']' -> Token("]", from, Token.Type.RBRACKET)
|
|
',' -> Token(",", from, Token.Type.COMMA)
|
|
';' -> Token(";", from, Token.Type.SEMICOLON)
|
|
'=' -> {
|
|
if (pos.currentChar == '=') {
|
|
advance()
|
|
Token("==", from, Token.Type.EQ)
|
|
} else
|
|
Token("=", from, Token.Type.ASSIGN)
|
|
}
|
|
|
|
'+' -> {
|
|
if( currentChar == '+') {
|
|
advance()
|
|
Token("+", from, Token.Type.PLUS2)
|
|
}
|
|
else
|
|
Token("+", from, Token.Type.PLUS)
|
|
}
|
|
'-' -> {
|
|
if (currentChar == '-') {
|
|
advance()
|
|
Token("--", from, Token.Type.MINUS2)
|
|
} else
|
|
Token("-", from, Token.Type.MINUS)
|
|
}
|
|
'*' -> Token("*", from, Token.Type.STAR)
|
|
'/' -> {
|
|
if( currentChar == '/') {
|
|
advance()
|
|
Token(loadToEnd().trim(), from, Token.Type.SINLGE_LINE_COMMENT)
|
|
}
|
|
else
|
|
Token("/", from, Token.Type.SLASH)
|
|
}
|
|
'%' -> Token("%", from, Token.Type.PERCENT)
|
|
'.' -> Token(".", from, Token.Type.DOT)
|
|
'<' -> {
|
|
if(currentChar == '=') {
|
|
advance()
|
|
Token("<=", from, Token.Type.LTE)
|
|
}
|
|
else
|
|
Token("<", from, Token.Type.LT)
|
|
}
|
|
'>' -> {
|
|
if( currentChar == '=') {
|
|
advance()
|
|
Token(">=", from, Token.Type.GTE)
|
|
}
|
|
else
|
|
Token(">", from, Token.Type.GT)
|
|
}
|
|
'!' -> {
|
|
if( currentChar == '=') {
|
|
advance()
|
|
Token("!=", from, Token.Type.NEQ)
|
|
}
|
|
else
|
|
Token("!", from, Token.Type.NOT)
|
|
}
|
|
'|' -> {
|
|
if (currentChar == '|') {
|
|
advance()
|
|
Token("||", from, Token.Type.OR)
|
|
} else
|
|
Token("|", from, Token.Type.BITOR)
|
|
}
|
|
'&' -> {
|
|
if (currentChar == '&') {
|
|
advance()
|
|
Token("&&", from, Token.Type.AND)
|
|
} else
|
|
Token("&", from, Token.Type.BITAND)
|
|
}
|
|
'@' -> {
|
|
val label = loadChars(idNextChars)
|
|
if( label.isNotEmpty()) Token(label, from, Token.Type.ATLABEL)
|
|
else raise("unexpected @ character")
|
|
}
|
|
'\n' -> Token("\n", from, Token.Type.NEWLINE)
|
|
|
|
'"' -> loadStringToken()
|
|
in digitsSet -> {
|
|
pos.back()
|
|
decodeNumber(loadChars(digits), from)
|
|
}
|
|
|
|
else -> {
|
|
// Labels processing is complicated!
|
|
// some@ statement: label 'some', ID 'statement'
|
|
// statement@some: ID 'statement', LABEL 'some'!
|
|
if (ch.isLetter() || ch == '_') {
|
|
val text = ch + loadChars(idNextChars)
|
|
if( currentChar == '@') {
|
|
advance()
|
|
if( currentChar.isLetter()) {
|
|
// break@label or like
|
|
pos.back()
|
|
Token(text, from, Token.Type.ID)
|
|
}
|
|
else
|
|
Token(text, from, Token.Type.LABEL)
|
|
}
|
|
else
|
|
Token(text, from, Token.Type.ID)
|
|
}
|
|
else
|
|
raise("can't parse token")
|
|
}
|
|
}
|
|
}
|
|
|
|
private fun decodeNumber(p1: String, start: Pos): Token =
|
|
if (pos.end)
|
|
Token(p1, start, Token.Type.INT)
|
|
else if (currentChar == '.') {
|
|
// could be decimal
|
|
advance()
|
|
if (currentChar in digitsSet) {
|
|
// decimal part
|
|
val p2 = loadChars(digits)
|
|
// with exponent?
|
|
if (currentChar == 'e' || currentChar == 'E') {
|
|
advance()
|
|
var negative = false
|
|
if (currentChar == '+')
|
|
advance()
|
|
else if (currentChar == '-') {
|
|
negative = true
|
|
advance()
|
|
}
|
|
var p3 = loadChars(digits)
|
|
if (negative) p3 = "-$p3"
|
|
Token("$p1.${p2}e$p3", start, Token.Type.REAL)
|
|
} else {
|
|
// no exponent
|
|
Token("$p1.$p2", start, Token.Type.REAL)
|
|
}
|
|
} else {
|
|
// not decimal
|
|
// something like 10.times, method call on integer number
|
|
pos.back()
|
|
Token(p1, start, Token.Type.INT)
|
|
}
|
|
} else {
|
|
// could be integer, also hex:
|
|
if (currentChar == 'x' && p1 == "0") {
|
|
advance()
|
|
Token(loadChars({ it in hexDigits}), start, Token.Type.HEX).also {
|
|
if (currentChar.isLetter())
|
|
raise("invalid hex literal")
|
|
}
|
|
} else {
|
|
Token(p1, start, Token.Type.INT)
|
|
}
|
|
}
|
|
|
|
|
|
private val currentChar: Char get() = pos.currentChar
|
|
|
|
private fun loadStringToken(): Token {
|
|
var start = currentPos
|
|
|
|
if (currentChar == '"') advance()
|
|
else start = start.back()
|
|
|
|
val sb = StringBuilder()
|
|
while (currentChar != '"') {
|
|
if (pos.end) raise("unterminated string")
|
|
when (currentChar) {
|
|
'\\' -> {
|
|
advance() ?: raise("unterminated string")
|
|
when (currentChar) {
|
|
'n' -> sb.append('\n')
|
|
'r' -> sb.append('\r')
|
|
't' -> sb.append('\t')
|
|
'"' -> sb.append('"')
|
|
else -> sb.append('\\').append(currentChar)
|
|
}
|
|
}
|
|
|
|
else -> {
|
|
sb.append(currentChar)
|
|
advance()
|
|
}
|
|
}
|
|
}
|
|
advance()
|
|
return Token(sb.toString(), start, Token.Type.STRING)
|
|
}
|
|
|
|
/**
|
|
* Load characters from the set until it reaches EOF or invalid character found.
|
|
* stop at EOF on character filtered by [isValidChar].
|
|
*
|
|
* Note this function loads only on one string. Multiline texts are not supported by
|
|
* this method.
|
|
*
|
|
* @return the string of valid characters, could be empty
|
|
*/
|
|
private fun loadChars(isValidChar: (Char)->Boolean): String {
|
|
val startLine = pos.line
|
|
val result = StringBuilder()
|
|
while (!pos.end && pos.line == startLine) {
|
|
val ch = pos.currentChar
|
|
if (isValidChar(ch)) {
|
|
result.append(ch)
|
|
advance()
|
|
} else
|
|
break
|
|
}
|
|
return result.toString()
|
|
}
|
|
|
|
@Suppress("unused")
|
|
private fun loadUntil(endChars: Set<Char>): String {
|
|
return if (pos.end) ""
|
|
else {
|
|
val result = StringBuilder()
|
|
while (!pos.end) {
|
|
val ch = pos.currentChar
|
|
if (ch in endChars) break
|
|
result.append(ch)
|
|
pos.advance()
|
|
}
|
|
result.toString()
|
|
}
|
|
}
|
|
|
|
private fun loadToEnd(): String {
|
|
val result = StringBuilder()
|
|
val l = pos.line
|
|
do {
|
|
result.append(pos.currentChar)
|
|
advance()
|
|
} while (pos.line == l)
|
|
return result.toString()
|
|
}
|
|
|
|
/**
|
|
* next non-whitespace char (newline are skipped too) or null if EOF
|
|
*/
|
|
private fun skipws(): Char? {
|
|
while (!pos.end) {
|
|
val ch = pos.currentChar
|
|
if (ch.isWhitespace())
|
|
advance()
|
|
else
|
|
return ch
|
|
}
|
|
return null
|
|
}
|
|
|
|
private fun advance() = pos.advance()
|
|
} |