Better errors!

This commit is contained in:
Dustin Swan 2026-02-04 21:21:52 -07:00
parent c44f06268f
commit 9d1b079361
No known key found for this signature in database
GPG key ID: 30D46587E2100467
8 changed files with 305 additions and 132 deletions

View file

@ -1,4 +1,4 @@
export type Token =
export type Token = (
// Literals
| { kind: 'int', value: number }
| { kind: 'float', value: number }
@ -45,24 +45,45 @@ export type Token =
| { kind: 'caret' }
| { kind: 'eof' }
) & {
line: number;
column: number;
start: number; // char offset in source
}
export function tokenize(source: string): Token[] {
const tokens: Token[] = [];
let i = 0;
let line = 1;
let column = 1;
function advance() {
if (source[i] === '\n') {
line++;
column = 1
} else {
column++;
}
i++;
}
while (i < source.length) {
const char = source[i];
const start = i;
const startLine = line;
const startColumn = column;
// Whitespace
if (/\s/.test(char)) {
i++;
advance();
continue;
}
// Comments
if (char === '#') {
while (i < source.length && source[i] !== '\n') {
i++;
advance();
}
continue;
}
@ -78,12 +99,12 @@ export function tokenize(source: string): Token[] {
hasDot = true;
}
num += source[i];
i++;
advance();
}
tokens.push(hasDot
? { kind: 'float', value: parseFloat(num) }
: { kind: 'int', value: parseInt(num) });
? { kind: 'float', value: parseFloat(num), line: startLine, column: startColumn, start }
: { kind: 'int', value: parseInt(num), line: startLine, column: startColumn, start });
continue;
}
@ -93,18 +114,18 @@ export function tokenize(source: string): Token[] {
let str = '';
while (i < source.length && /[A-Za-z0-9_!-]/.test(source[i])) {
str += source[i];
i++;
advance();
}
if (str === '_') {
// Wildcards
tokens.push({ kind: 'underscore' });
tokens.push({ kind: 'underscore', line: startLine, column: startColumn, start });
} else {
const isType = /[A-Z]/.test(str[0]);
tokens.push(isType
? { kind: 'type-ident', value: str }
: { kind: 'ident', value: str });
? { kind: 'type-ident', value: str, line: startLine, column: startColumn, start }
: { kind: 'ident', value: str, line: startLine, column: startColumn, start });
}
continue;
@ -112,12 +133,12 @@ export function tokenize(source: string): Token[] {
// Strings
if (char === '"') {
i++;
advance();
let str = '';
while (i < source.length && source[i] !== '"') {
if (source[i] === '\\') {
i++;
advance();
if (i >= source.length) {
throw new Error('Unterminated string');
@ -133,16 +154,16 @@ export function tokenize(source: string): Token[] {
str += source[i];
}
i++;
advance();
}
if (i >= source.length) {
throw new Error('Unterminated string');
}
tokens.push({ kind: 'string', value: str });
tokens.push({ kind: 'string', value: str, line: startLine, column: startColumn, start });
i++;
advance();
continue;
}
@ -150,35 +171,35 @@ export function tokenize(source: string): Token[] {
switch (char) {
case '>': {
if (source[i + 1] === '=') {
tokens.push({ kind: 'greater-equals' });
i++;
tokens.push({ kind: 'greater-equals', line: startLine, column: startColumn, start });
advance();
} else {
tokens.push({ kind: 'greater-than' });
tokens.push({ kind: 'greater-than', line: startLine, column: startColumn, start });
}
break;
}
case '<': {
if (source[i + 1] === '=') {
tokens.push({ kind: 'less-equals' });
i++;
tokens.push({ kind: 'less-equals', line: startLine, column: startColumn, start });
advance();
} else {
tokens.push({ kind: 'less-than' });
tokens.push({ kind: 'less-than', line: startLine, column: startColumn, start });
}
break;
}
case '=': {
if (source[i + 1] === '=') {
tokens.push({ kind: 'equals-equals' });
i++;
tokens.push({ kind: 'equals-equals', line: startLine, column: startColumn, start });
advance();
} else {
tokens.push({ kind: 'equals' });
tokens.push({ kind: 'equals', line: startLine, column: startColumn, start });
}
break;
}
case '!': {
if (source[i + 1] === '=') {
tokens.push({ kind: 'not-equals' });
i++;
tokens.push({ kind: 'not-equals', line: startLine, column: startColumn, start });
advance();
} else {
throw new Error(`Unexpected character: ${char}`)
}
@ -186,40 +207,40 @@ export function tokenize(source: string): Token[] {
}
case '.': {
if (source[i + 1] === '.' && source[i + 2] === '.') {
tokens.push({ kind: 'dot-dot-dot' })
tokens.push({ kind: 'dot-dot-dot', line: startLine, column: startColumn, start })
i += 2;
} else {
tokens.push({ kind: 'dot' });
tokens.push({ kind: 'dot', line: startLine, column: startColumn, start });
}
break;
}
case ':': tokens.push({ kind: 'colon' }); break;
case ';': tokens.push({ kind: 'semicolon' }); break;
case '\\': tokens.push({ kind: 'backslash' }); break;
case '~': tokens.push({ kind: 'tilde' }); break;
case '|': tokens.push({ kind: 'pipe' }); break;
case ',': tokens.push({ kind: 'comma' }); break;
case '&': tokens.push({ kind: 'ampersand' }); break;
case '@': tokens.push({ kind: 'at' }); break;
case ':': tokens.push({ kind: 'colon', line: startLine, column: startColumn, start }); break;
case ';': tokens.push({ kind: 'semicolon', line: startLine, column: startColumn, start }); break;
case '\\': tokens.push({ kind: 'backslash', line: startLine, column: startColumn, start }); break;
case '~': tokens.push({ kind: 'tilde', line: startLine, column: startColumn, start }); break;
case '|': tokens.push({ kind: 'pipe', line: startLine, column: startColumn, start }); break;
case ',': tokens.push({ kind: 'comma', line: startLine, column: startColumn, start }); break;
case '&': tokens.push({ kind: 'ampersand', line: startLine, column: startColumn, start }); break;
case '@': tokens.push({ kind: 'at', line: startLine, column: startColumn, start }); break;
// Arithmetic
case '+': tokens.push({ kind: 'plus' }); break;
case '-': tokens.push({ kind: 'minus' }); break;
case '*': tokens.push({ kind: 'star' }); break;
case '/': tokens.push({ kind: 'slash' }); break;
case '^': tokens.push({ kind: 'caret' }); break;
case '%': tokens.push({ kind: 'percent' }); break;
case '+': tokens.push({ kind: 'plus', line: startLine, column: startColumn, start }); break;
case '-': tokens.push({ kind: 'minus', line: startLine, column: startColumn, start }); break;
case '*': tokens.push({ kind: 'star', line: startLine, column: startColumn, start }); break;
case '/': tokens.push({ kind: 'slash', line: startLine, column: startColumn, start }); break;
case '^': tokens.push({ kind: 'caret', line: startLine, column: startColumn, start }); break;
case '%': tokens.push({ kind: 'percent', line: startLine, column: startColumn, start }); break;
// Brackets
case '(': tokens.push({ kind: 'open-paren' }); break;
case ')': tokens.push({ kind: 'close-paren' }); break;
case '{': tokens.push({ kind: 'open-brace' }); break;
case '}': tokens.push({ kind: 'close-brace' }); break;
case '[': tokens.push({ kind: 'open-bracket' }); break;
case ']': tokens.push({ kind: 'close-bracket' }); break;
case '(': tokens.push({ kind: 'open-paren', line: startLine, column: startColumn, start }); break;
case ')': tokens.push({ kind: 'close-paren', line: startLine, column: startColumn, start }); break;
case '{': tokens.push({ kind: 'open-brace', line: startLine, column: startColumn, start }); break;
case '}': tokens.push({ kind: 'close-brace', line: startLine, column: startColumn, start }); break;
case '[': tokens.push({ kind: 'open-bracket', line: startLine, column: startColumn, start }); break;
case ']': tokens.push({ kind: 'close-bracket', line: startLine, column: startColumn, start }); break;
}
i++;
advance();
}
return tokens;