Better errors!
This commit is contained in:
parent
c44f06268f
commit
9d1b079361
8 changed files with 305 additions and 132 deletions
119
src/lexer.ts
119
src/lexer.ts
|
|
@ -1,4 +1,4 @@
|
|||
export type Token =
|
||||
export type Token = (
|
||||
// Literals
|
||||
| { kind: 'int', value: number }
|
||||
| { kind: 'float', value: number }
|
||||
|
|
@ -45,24 +45,45 @@ export type Token =
|
|||
| { kind: 'caret' }
|
||||
|
||||
| { kind: 'eof' }
|
||||
) & {
|
||||
line: number;
|
||||
column: number;
|
||||
start: number; // char offset in source
|
||||
}
|
||||
|
||||
export function tokenize(source: string): Token[] {
|
||||
const tokens: Token[] = [];
|
||||
|
||||
let i = 0;
|
||||
let line = 1;
|
||||
let column = 1;
|
||||
|
||||
function advance() {
|
||||
if (source[i] === '\n') {
|
||||
line++;
|
||||
column = 1
|
||||
} else {
|
||||
column++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
while (i < source.length) {
|
||||
const char = source[i];
|
||||
const start = i;
|
||||
const startLine = line;
|
||||
const startColumn = column;
|
||||
|
||||
// Whitespace
|
||||
if (/\s/.test(char)) {
|
||||
i++;
|
||||
advance();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Comments
|
||||
if (char === '#') {
|
||||
while (i < source.length && source[i] !== '\n') {
|
||||
i++;
|
||||
advance();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
|
@ -78,12 +99,12 @@ export function tokenize(source: string): Token[] {
|
|||
hasDot = true;
|
||||
}
|
||||
num += source[i];
|
||||
i++;
|
||||
advance();
|
||||
}
|
||||
|
||||
tokens.push(hasDot
|
||||
? { kind: 'float', value: parseFloat(num) }
|
||||
: { kind: 'int', value: parseInt(num) });
|
||||
? { kind: 'float', value: parseFloat(num), line: startLine, column: startColumn, start }
|
||||
: { kind: 'int', value: parseInt(num), line: startLine, column: startColumn, start });
|
||||
|
||||
continue;
|
||||
}
|
||||
|
|
@ -93,18 +114,18 @@ export function tokenize(source: string): Token[] {
|
|||
let str = '';
|
||||
while (i < source.length && /[A-Za-z0-9_!-]/.test(source[i])) {
|
||||
str += source[i];
|
||||
i++;
|
||||
advance();
|
||||
}
|
||||
|
||||
if (str === '_') {
|
||||
// Wildcards
|
||||
tokens.push({ kind: 'underscore' });
|
||||
tokens.push({ kind: 'underscore', line: startLine, column: startColumn, start });
|
||||
} else {
|
||||
const isType = /[A-Z]/.test(str[0]);
|
||||
|
||||
tokens.push(isType
|
||||
? { kind: 'type-ident', value: str }
|
||||
: { kind: 'ident', value: str });
|
||||
? { kind: 'type-ident', value: str, line: startLine, column: startColumn, start }
|
||||
: { kind: 'ident', value: str, line: startLine, column: startColumn, start });
|
||||
}
|
||||
|
||||
continue;
|
||||
|
|
@ -112,12 +133,12 @@ export function tokenize(source: string): Token[] {
|
|||
|
||||
// Strings
|
||||
if (char === '"') {
|
||||
i++;
|
||||
advance();
|
||||
let str = '';
|
||||
|
||||
while (i < source.length && source[i] !== '"') {
|
||||
if (source[i] === '\\') {
|
||||
i++;
|
||||
advance();
|
||||
|
||||
if (i >= source.length) {
|
||||
throw new Error('Unterminated string');
|
||||
|
|
@ -133,16 +154,16 @@ export function tokenize(source: string): Token[] {
|
|||
str += source[i];
|
||||
}
|
||||
|
||||
i++;
|
||||
advance();
|
||||
}
|
||||
|
||||
if (i >= source.length) {
|
||||
throw new Error('Unterminated string');
|
||||
}
|
||||
|
||||
tokens.push({ kind: 'string', value: str });
|
||||
tokens.push({ kind: 'string', value: str, line: startLine, column: startColumn, start });
|
||||
|
||||
i++;
|
||||
advance();
|
||||
|
||||
continue;
|
||||
}
|
||||
|
|
@ -150,35 +171,35 @@ export function tokenize(source: string): Token[] {
|
|||
switch (char) {
|
||||
case '>': {
|
||||
if (source[i + 1] === '=') {
|
||||
tokens.push({ kind: 'greater-equals' });
|
||||
i++;
|
||||
tokens.push({ kind: 'greater-equals', line: startLine, column: startColumn, start });
|
||||
advance();
|
||||
} else {
|
||||
tokens.push({ kind: 'greater-than' });
|
||||
tokens.push({ kind: 'greater-than', line: startLine, column: startColumn, start });
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '<': {
|
||||
if (source[i + 1] === '=') {
|
||||
tokens.push({ kind: 'less-equals' });
|
||||
i++;
|
||||
tokens.push({ kind: 'less-equals', line: startLine, column: startColumn, start });
|
||||
advance();
|
||||
} else {
|
||||
tokens.push({ kind: 'less-than' });
|
||||
tokens.push({ kind: 'less-than', line: startLine, column: startColumn, start });
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '=': {
|
||||
if (source[i + 1] === '=') {
|
||||
tokens.push({ kind: 'equals-equals' });
|
||||
i++;
|
||||
tokens.push({ kind: 'equals-equals', line: startLine, column: startColumn, start });
|
||||
advance();
|
||||
} else {
|
||||
tokens.push({ kind: 'equals' });
|
||||
tokens.push({ kind: 'equals', line: startLine, column: startColumn, start });
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '!': {
|
||||
if (source[i + 1] === '=') {
|
||||
tokens.push({ kind: 'not-equals' });
|
||||
i++;
|
||||
tokens.push({ kind: 'not-equals', line: startLine, column: startColumn, start });
|
||||
advance();
|
||||
} else {
|
||||
throw new Error(`Unexpected character: ${char}`)
|
||||
}
|
||||
|
|
@ -186,40 +207,40 @@ export function tokenize(source: string): Token[] {
|
|||
}
|
||||
case '.': {
|
||||
if (source[i + 1] === '.' && source[i + 2] === '.') {
|
||||
tokens.push({ kind: 'dot-dot-dot' })
|
||||
tokens.push({ kind: 'dot-dot-dot', line: startLine, column: startColumn, start })
|
||||
i += 2;
|
||||
} else {
|
||||
tokens.push({ kind: 'dot' });
|
||||
tokens.push({ kind: 'dot', line: startLine, column: startColumn, start });
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ':': tokens.push({ kind: 'colon' }); break;
|
||||
case ';': tokens.push({ kind: 'semicolon' }); break;
|
||||
case '\\': tokens.push({ kind: 'backslash' }); break;
|
||||
case '~': tokens.push({ kind: 'tilde' }); break;
|
||||
case '|': tokens.push({ kind: 'pipe' }); break;
|
||||
case ',': tokens.push({ kind: 'comma' }); break;
|
||||
case '&': tokens.push({ kind: 'ampersand' }); break;
|
||||
case '@': tokens.push({ kind: 'at' }); break;
|
||||
case ':': tokens.push({ kind: 'colon', line: startLine, column: startColumn, start }); break;
|
||||
case ';': tokens.push({ kind: 'semicolon', line: startLine, column: startColumn, start }); break;
|
||||
case '\\': tokens.push({ kind: 'backslash', line: startLine, column: startColumn, start }); break;
|
||||
case '~': tokens.push({ kind: 'tilde', line: startLine, column: startColumn, start }); break;
|
||||
case '|': tokens.push({ kind: 'pipe', line: startLine, column: startColumn, start }); break;
|
||||
case ',': tokens.push({ kind: 'comma', line: startLine, column: startColumn, start }); break;
|
||||
case '&': tokens.push({ kind: 'ampersand', line: startLine, column: startColumn, start }); break;
|
||||
case '@': tokens.push({ kind: 'at', line: startLine, column: startColumn, start }); break;
|
||||
|
||||
// Arithmetic
|
||||
case '+': tokens.push({ kind: 'plus' }); break;
|
||||
case '-': tokens.push({ kind: 'minus' }); break;
|
||||
case '*': tokens.push({ kind: 'star' }); break;
|
||||
case '/': tokens.push({ kind: 'slash' }); break;
|
||||
case '^': tokens.push({ kind: 'caret' }); break;
|
||||
case '%': tokens.push({ kind: 'percent' }); break;
|
||||
case '+': tokens.push({ kind: 'plus', line: startLine, column: startColumn, start }); break;
|
||||
case '-': tokens.push({ kind: 'minus', line: startLine, column: startColumn, start }); break;
|
||||
case '*': tokens.push({ kind: 'star', line: startLine, column: startColumn, start }); break;
|
||||
case '/': tokens.push({ kind: 'slash', line: startLine, column: startColumn, start }); break;
|
||||
case '^': tokens.push({ kind: 'caret', line: startLine, column: startColumn, start }); break;
|
||||
case '%': tokens.push({ kind: 'percent', line: startLine, column: startColumn, start }); break;
|
||||
|
||||
// Brackets
|
||||
case '(': tokens.push({ kind: 'open-paren' }); break;
|
||||
case ')': tokens.push({ kind: 'close-paren' }); break;
|
||||
case '{': tokens.push({ kind: 'open-brace' }); break;
|
||||
case '}': tokens.push({ kind: 'close-brace' }); break;
|
||||
case '[': tokens.push({ kind: 'open-bracket' }); break;
|
||||
case ']': tokens.push({ kind: 'close-bracket' }); break;
|
||||
case '(': tokens.push({ kind: 'open-paren', line: startLine, column: startColumn, start }); break;
|
||||
case ')': tokens.push({ kind: 'close-paren', line: startLine, column: startColumn, start }); break;
|
||||
case '{': tokens.push({ kind: 'open-brace', line: startLine, column: startColumn, start }); break;
|
||||
case '}': tokens.push({ kind: 'close-brace', line: startLine, column: startColumn, start }); break;
|
||||
case '[': tokens.push({ kind: 'open-bracket', line: startLine, column: startColumn, start }); break;
|
||||
case ']': tokens.push({ kind: 'close-bracket', line: startLine, column: startColumn, start }); break;
|
||||
}
|
||||
|
||||
i++;
|
||||
advance();
|
||||
}
|
||||
|
||||
return tokens;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue