You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
257 lines
8.8 KiB
TypeScript
257 lines
8.8 KiB
TypeScript
export type Token = (
|
|
// Literals
|
|
| { kind: 'int', value: number }
|
|
| { kind: 'float', value: number }
|
|
| { kind: 'string', value: string }
|
|
| { kind: 'ident', value: string }
|
|
| { kind: 'type-ident', value: string }
|
|
|
|
// Brackets
|
|
| { kind: 'open-paren' }
|
|
| { kind: 'close-paren' }
|
|
| { kind: 'open-brace' }
|
|
| { kind: 'close-brace' }
|
|
| { kind: 'open-bracket' }
|
|
| { kind: 'close-bracket' }
|
|
|
|
// Comparison
|
|
| { kind: 'equals' }
|
|
| { kind: 'equals-equals' }
|
|
| { kind: 'not-equals' }
|
|
| { kind: 'greater-than' }
|
|
| { kind: 'greater-equals' }
|
|
| { kind: 'less-than' }
|
|
| { kind: 'less-equals' }
|
|
|
|
// Symbols
|
|
| { kind: 'colon' }
|
|
| { kind: 'colon-equals' }
|
|
| { kind: 'semicolon' }
|
|
| { kind: 'backslash' }
|
|
| { kind: 'pipe' }
|
|
| { kind: 'tilde' }
|
|
| { kind: 'comma' }
|
|
| { kind: 'ampersand' }
|
|
| { kind: 'underscore' }
|
|
| { kind: 'dot' }
|
|
| { kind: 'dot-dot-dot' }
|
|
| { kind: 'at' }
|
|
|
|
// Anithmetic
|
|
| { kind: 'plus' }
|
|
| { kind: 'minus' }
|
|
| { kind: 'star' }
|
|
| { kind: 'slash' }
|
|
| { kind: 'percent' }
|
|
| { kind: 'caret' }
|
|
|
|
| { kind: 'eof' }
|
|
) & {
|
|
line: number;
|
|
column: number;
|
|
start: number; // char offset in source
|
|
}
|
|
|
|
export function tokenize(source: string): Token[] {
|
|
const tokens: Token[] = [];
|
|
|
|
let i = 0;
|
|
let line = 1;
|
|
let column = 1;
|
|
|
|
function advance() {
|
|
if (source[i] === '\n') {
|
|
line++;
|
|
column = 1
|
|
} else {
|
|
column++;
|
|
}
|
|
i++;
|
|
}
|
|
|
|
while (i < source.length) {
|
|
const char = source[i];
|
|
const start = i;
|
|
const startLine = line;
|
|
const startColumn = column;
|
|
|
|
// Whitespace
|
|
if (/\s/.test(char)) {
|
|
advance();
|
|
continue;
|
|
}
|
|
|
|
// Comments
|
|
if (char === '#') {
|
|
while (i < source.length && source[i] !== '\n') {
|
|
advance();
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Numbers
|
|
if (/[0-9]/.test(char)) { // have to start with a digit (?)
|
|
let num = '';
|
|
let hasDot = false;
|
|
|
|
while (i < source.length && /[0-9.]/.test(source[i])) {
|
|
if (source[i] === '.') {
|
|
if (hasDot) break;
|
|
hasDot = true;
|
|
}
|
|
num += source[i];
|
|
advance();
|
|
}
|
|
|
|
tokens.push(hasDot
|
|
? { kind: 'float', value: parseFloat(num), line: startLine, column: startColumn, start }
|
|
: { kind: 'int', value: parseInt(num), line: startLine, column: startColumn, start });
|
|
|
|
continue;
|
|
}
|
|
|
|
// Idents & Wildcard
|
|
if (/[A-Za-z_]/.test(char)) {
|
|
let str = '';
|
|
while (i < source.length && /[A-Za-z0-9_!-]/.test(source[i])) {
|
|
str += source[i];
|
|
advance();
|
|
}
|
|
|
|
if (str === '_') {
|
|
// Wildcards
|
|
tokens.push({ kind: 'underscore', line: startLine, column: startColumn, start });
|
|
} else {
|
|
const isType = /[A-Z]/.test(str[0]);
|
|
|
|
tokens.push(isType
|
|
? { kind: 'type-ident', value: str, line: startLine, column: startColumn, start }
|
|
: { kind: 'ident', value: str, line: startLine, column: startColumn, start });
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
// Strings
|
|
if (char === '"') {
|
|
advance();
|
|
let str = '';
|
|
|
|
while (i < source.length && source[i] !== '"') {
|
|
if (source[i] === '\\') {
|
|
advance();
|
|
|
|
if (i >= source.length) {
|
|
throw new Error('Unterminated string');
|
|
}
|
|
|
|
switch(source[i]) {
|
|
case 'n': str += '\n'; break;
|
|
case '"': str += '"'; break;
|
|
case '\\': str += '\\'; break;
|
|
case 't': str += '\t'; break;
|
|
}
|
|
} else {
|
|
str += source[i];
|
|
}
|
|
|
|
advance();
|
|
}
|
|
|
|
if (i >= source.length) {
|
|
throw new Error('Unterminated string');
|
|
}
|
|
|
|
tokens.push({ kind: 'string', value: str, line: startLine, column: startColumn, start });
|
|
|
|
advance();
|
|
|
|
continue;
|
|
}
|
|
|
|
switch (char) {
|
|
case '>': {
|
|
if (source[i + 1] === '=') {
|
|
tokens.push({ kind: 'greater-equals', line: startLine, column: startColumn, start });
|
|
advance();
|
|
} else {
|
|
tokens.push({ kind: 'greater-than', line: startLine, column: startColumn, start });
|
|
}
|
|
break;
|
|
}
|
|
case '<': {
|
|
if (source[i + 1] === '=') {
|
|
tokens.push({ kind: 'less-equals', line: startLine, column: startColumn, start });
|
|
advance();
|
|
} else {
|
|
tokens.push({ kind: 'less-than', line: startLine, column: startColumn, start });
|
|
}
|
|
break;
|
|
}
|
|
case '=': {
|
|
if (source[i + 1] === '=') {
|
|
tokens.push({ kind: 'equals-equals', line: startLine, column: startColumn, start });
|
|
advance();
|
|
} else {
|
|
tokens.push({ kind: 'equals', line: startLine, column: startColumn, start });
|
|
}
|
|
break;
|
|
}
|
|
case '!': {
|
|
if (source[i + 1] === '=') {
|
|
tokens.push({ kind: 'not-equals', line: startLine, column: startColumn, start });
|
|
advance();
|
|
} else {
|
|
throw new Error(`Unexpected character: ${char}`)
|
|
}
|
|
break;
|
|
}
|
|
case '.': {
|
|
if (source[i + 1] === '.' && source[i + 2] === '.') {
|
|
tokens.push({ kind: 'dot-dot-dot', line: startLine, column: startColumn, start })
|
|
i += 2;
|
|
} else {
|
|
tokens.push({ kind: 'dot', line: startLine, column: startColumn, start });
|
|
}
|
|
break;
|
|
}
|
|
case ':': {
|
|
if (source[i + 1] === '=') {
|
|
tokens.push({ kind: 'colon-equals', line: startLine, column: startColumn, start });
|
|
advance();
|
|
} else {
|
|
tokens.push({ kind: 'colon', line: startLine, column: startColumn, start });
|
|
}
|
|
break;
|
|
}
|
|
case ';': tokens.push({ kind: 'semicolon', line: startLine, column: startColumn, start }); break;
|
|
case '\\': tokens.push({ kind: 'backslash', line: startLine, column: startColumn, start }); break;
|
|
case '~': tokens.push({ kind: 'tilde', line: startLine, column: startColumn, start }); break;
|
|
case '|': tokens.push({ kind: 'pipe', line: startLine, column: startColumn, start }); break;
|
|
case ',': tokens.push({ kind: 'comma', line: startLine, column: startColumn, start }); break;
|
|
case '&': tokens.push({ kind: 'ampersand', line: startLine, column: startColumn, start }); break;
|
|
case '@': tokens.push({ kind: 'at', line: startLine, column: startColumn, start }); break;
|
|
|
|
// Arithmetic
|
|
case '+': tokens.push({ kind: 'plus', line: startLine, column: startColumn, start }); break;
|
|
case '-': tokens.push({ kind: 'minus', line: startLine, column: startColumn, start }); break;
|
|
case '*': tokens.push({ kind: 'star', line: startLine, column: startColumn, start }); break;
|
|
case '/': tokens.push({ kind: 'slash', line: startLine, column: startColumn, start }); break;
|
|
case '^': tokens.push({ kind: 'caret', line: startLine, column: startColumn, start }); break;
|
|
case '%': tokens.push({ kind: 'percent', line: startLine, column: startColumn, start }); break;
|
|
|
|
// Brackets
|
|
case '(': tokens.push({ kind: 'open-paren', line: startLine, column: startColumn, start }); break;
|
|
case ')': tokens.push({ kind: 'close-paren', line: startLine, column: startColumn, start }); break;
|
|
case '{': tokens.push({ kind: 'open-brace', line: startLine, column: startColumn, start }); break;
|
|
case '}': tokens.push({ kind: 'close-brace', line: startLine, column: startColumn, start }); break;
|
|
case '[': tokens.push({ kind: 'open-bracket', line: startLine, column: startColumn, start }); break;
|
|
case ']': tokens.push({ kind: 'close-bracket', line: startLine, column: startColumn, start }); break;
|
|
}
|
|
|
|
advance();
|
|
}
|
|
|
|
return tokens;
|
|
}
|