You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

257 lines
8.8 KiB
TypeScript

export type Token = (
// Literals
| { kind: 'int', value: number }
| { kind: 'float', value: number }
| { kind: 'string', value: string }
| { kind: 'ident', value: string }
| { kind: 'type-ident', value: string }
// Brackets
| { kind: 'open-paren' }
| { kind: 'close-paren' }
| { kind: 'open-brace' }
| { kind: 'close-brace' }
| { kind: 'open-bracket' }
| { kind: 'close-bracket' }
// Comparison
| { kind: 'equals' }
| { kind: 'equals-equals' }
| { kind: 'not-equals' }
| { kind: 'greater-than' }
| { kind: 'greater-equals' }
| { kind: 'less-than' }
| { kind: 'less-equals' }
// Symbols
| { kind: 'colon' }
| { kind: 'colon-equals' }
| { kind: 'semicolon' }
| { kind: 'backslash' }
| { kind: 'pipe' }
| { kind: 'tilde' }
| { kind: 'comma' }
| { kind: 'ampersand' }
| { kind: 'underscore' }
| { kind: 'dot' }
| { kind: 'dot-dot-dot' }
| { kind: 'at' }
// Anithmetic
| { kind: 'plus' }
| { kind: 'minus' }
| { kind: 'star' }
| { kind: 'slash' }
| { kind: 'percent' }
| { kind: 'caret' }
| { kind: 'eof' }
) & {
line: number;
column: number;
start: number; // char offset in source
}
export function tokenize(source: string): Token[] {
const tokens: Token[] = [];
let i = 0;
let line = 1;
let column = 1;
function advance() {
if (source[i] === '\n') {
line++;
column = 1
} else {
column++;
}
i++;
}
while (i < source.length) {
const char = source[i];
const start = i;
const startLine = line;
const startColumn = column;
// Whitespace
if (/\s/.test(char)) {
advance();
continue;
}
// Comments
if (char === '#') {
while (i < source.length && source[i] !== '\n') {
advance();
}
continue;
}
// Numbers
if (/[0-9]/.test(char)) { // have to start with a digit (?)
let num = '';
let hasDot = false;
while (i < source.length && /[0-9.]/.test(source[i])) {
if (source[i] === '.') {
if (hasDot) break;
hasDot = true;
}
num += source[i];
advance();
}
tokens.push(hasDot
? { kind: 'float', value: parseFloat(num), line: startLine, column: startColumn, start }
: { kind: 'int', value: parseInt(num), line: startLine, column: startColumn, start });
continue;
}
// Idents & Wildcard
if (/[A-Za-z_]/.test(char)) {
let str = '';
while (i < source.length && /[A-Za-z0-9_!-]/.test(source[i])) {
str += source[i];
advance();
}
if (str === '_') {
// Wildcards
tokens.push({ kind: 'underscore', line: startLine, column: startColumn, start });
} else {
const isType = /[A-Z]/.test(str[0]);
tokens.push(isType
? { kind: 'type-ident', value: str, line: startLine, column: startColumn, start }
: { kind: 'ident', value: str, line: startLine, column: startColumn, start });
}
continue;
}
// Strings
if (char === '"') {
advance();
let str = '';
while (i < source.length && source[i] !== '"') {
if (source[i] === '\\') {
advance();
if (i >= source.length) {
throw new Error('Unterminated string');
}
switch(source[i]) {
case 'n': str += '\n'; break;
case '"': str += '"'; break;
case '\\': str += '\\'; break;
case 't': str += '\t'; break;
}
} else {
str += source[i];
}
advance();
}
if (i >= source.length) {
throw new Error('Unterminated string');
}
tokens.push({ kind: 'string', value: str, line: startLine, column: startColumn, start });
advance();
continue;
}
switch (char) {
case '>': {
if (source[i + 1] === '=') {
tokens.push({ kind: 'greater-equals', line: startLine, column: startColumn, start });
advance();
} else {
tokens.push({ kind: 'greater-than', line: startLine, column: startColumn, start });
}
break;
}
case '<': {
if (source[i + 1] === '=') {
tokens.push({ kind: 'less-equals', line: startLine, column: startColumn, start });
advance();
} else {
tokens.push({ kind: 'less-than', line: startLine, column: startColumn, start });
}
break;
}
case '=': {
if (source[i + 1] === '=') {
tokens.push({ kind: 'equals-equals', line: startLine, column: startColumn, start });
advance();
} else {
tokens.push({ kind: 'equals', line: startLine, column: startColumn, start });
}
break;
}
case '!': {
if (source[i + 1] === '=') {
tokens.push({ kind: 'not-equals', line: startLine, column: startColumn, start });
advance();
} else {
throw new Error(`Unexpected character: ${char}`)
}
break;
}
case '.': {
if (source[i + 1] === '.' && source[i + 2] === '.') {
tokens.push({ kind: 'dot-dot-dot', line: startLine, column: startColumn, start })
i += 2;
} else {
tokens.push({ kind: 'dot', line: startLine, column: startColumn, start });
}
break;
}
case ':': {
if (source[i + 1] === '=') {
tokens.push({ kind: 'colon-equals', line: startLine, column: startColumn, start });
advance();
} else {
tokens.push({ kind: 'colon', line: startLine, column: startColumn, start });
}
break;
}
case ';': tokens.push({ kind: 'semicolon', line: startLine, column: startColumn, start }); break;
case '\\': tokens.push({ kind: 'backslash', line: startLine, column: startColumn, start }); break;
case '~': tokens.push({ kind: 'tilde', line: startLine, column: startColumn, start }); break;
case '|': tokens.push({ kind: 'pipe', line: startLine, column: startColumn, start }); break;
case ',': tokens.push({ kind: 'comma', line: startLine, column: startColumn, start }); break;
case '&': tokens.push({ kind: 'ampersand', line: startLine, column: startColumn, start }); break;
case '@': tokens.push({ kind: 'at', line: startLine, column: startColumn, start }); break;
// Arithmetic
case '+': tokens.push({ kind: 'plus', line: startLine, column: startColumn, start }); break;
case '-': tokens.push({ kind: 'minus', line: startLine, column: startColumn, start }); break;
case '*': tokens.push({ kind: 'star', line: startLine, column: startColumn, start }); break;
case '/': tokens.push({ kind: 'slash', line: startLine, column: startColumn, start }); break;
case '^': tokens.push({ kind: 'caret', line: startLine, column: startColumn, start }); break;
case '%': tokens.push({ kind: 'percent', line: startLine, column: startColumn, start }); break;
// Brackets
case '(': tokens.push({ kind: 'open-paren', line: startLine, column: startColumn, start }); break;
case ')': tokens.push({ kind: 'close-paren', line: startLine, column: startColumn, start }); break;
case '{': tokens.push({ kind: 'open-brace', line: startLine, column: startColumn, start }); break;
case '}': tokens.push({ kind: 'close-brace', line: startLine, column: startColumn, start }); break;
case '[': tokens.push({ kind: 'open-bracket', line: startLine, column: startColumn, start }); break;
case ']': tokens.push({ kind: 'close-bracket', line: startLine, column: startColumn, start }); break;
}
advance();
}
return tokens;
}