export type Token = ( // Literals | { kind: 'int', value: number } | { kind: 'float', value: number } | { kind: 'string', value: string } | { kind: 'ident', value: string } | { kind: 'type-ident', value: string } // Brackets | { kind: 'open-paren' } | { kind: 'close-paren' } | { kind: 'open-brace' } | { kind: 'close-brace' } | { kind: 'open-bracket' } | { kind: 'close-bracket' } // Comparison | { kind: 'equals' } | { kind: 'equals-equals' } | { kind: 'not-equals' } | { kind: 'greater-than' } | { kind: 'greater-equals' } | { kind: 'less-than' } | { kind: 'less-equals' } // Symbols | { kind: 'colon' } | { kind: 'colon-equals' } | { kind: 'semicolon' } | { kind: 'backslash' } | { kind: 'pipe' } | { kind: 'tilde' } | { kind: 'comma' } | { kind: 'ampersand' } | { kind: 'underscore' } | { kind: 'dot' } | { kind: 'dot-dot-dot' } | { kind: 'at' } // Anithmetic | { kind: 'plus' } | { kind: 'minus' } | { kind: 'star' } | { kind: 'slash' } | { kind: 'percent' } | { kind: 'caret' } | { kind: 'eof' } ) & { line: number; column: number; start: number; // char offset in source } export function tokenize(source: string): Token[] { const tokens: Token[] = []; let i = 0; let line = 1; let column = 1; function advance() { if (source[i] === '\n') { line++; column = 1 } else { column++; } i++; } while (i < source.length) { const char = source[i]; const start = i; const startLine = line; const startColumn = column; // Whitespace if (/\s/.test(char)) { advance(); continue; } // Comments if (char === '#') { while (i < source.length && source[i] !== '\n') { advance(); } continue; } // Numbers if (/[0-9]/.test(char)) { // have to start with a digit (?) let num = ''; let hasDot = false; while (i < source.length && /[0-9.]/.test(source[i])) { if (source[i] === '.') { if (hasDot) break; hasDot = true; } num += source[i]; advance(); } tokens.push(hasDot ? { kind: 'float', value: parseFloat(num), line: startLine, column: startColumn, start } : { kind: 'int', value: parseInt(num), line: startLine, column: startColumn, start }); continue; } // Idents & Wildcard if (/[A-Za-z_]/.test(char)) { let str = ''; while (i < source.length && /[A-Za-z0-9_!-]/.test(source[i])) { str += source[i]; advance(); } if (str === '_') { // Wildcards tokens.push({ kind: 'underscore', line: startLine, column: startColumn, start }); } else { const isType = /[A-Z]/.test(str[0]); tokens.push(isType ? { kind: 'type-ident', value: str, line: startLine, column: startColumn, start } : { kind: 'ident', value: str, line: startLine, column: startColumn, start }); } continue; } // Strings if (char === '"') { advance(); let str = ''; while (i < source.length && source[i] !== '"') { if (source[i] === '\\') { advance(); if (i >= source.length) { throw new Error('Unterminated string'); } switch(source[i]) { case 'n': str += '\n'; break; case '"': str += '"'; break; case '\\': str += '\\'; break; case 't': str += '\t'; break; } } else { str += source[i]; } advance(); } if (i >= source.length) { throw new Error('Unterminated string'); } tokens.push({ kind: 'string', value: str, line: startLine, column: startColumn, start }); advance(); continue; } switch (char) { case '>': { if (source[i + 1] === '=') { tokens.push({ kind: 'greater-equals', line: startLine, column: startColumn, start }); advance(); } else { tokens.push({ kind: 'greater-than', line: startLine, column: startColumn, start }); } break; } case '<': { if (source[i + 1] === '=') { tokens.push({ kind: 'less-equals', line: startLine, column: startColumn, start }); advance(); } else { tokens.push({ kind: 'less-than', line: startLine, column: startColumn, start }); } break; } case '=': { if (source[i + 1] === '=') { tokens.push({ kind: 'equals-equals', line: startLine, column: startColumn, start }); advance(); } else { tokens.push({ kind: 'equals', line: startLine, column: startColumn, start }); } break; } case '!': { if (source[i + 1] === '=') { tokens.push({ kind: 'not-equals', line: startLine, column: startColumn, start }); advance(); } else { throw new Error(`Unexpected character: ${char}`) } break; } case '.': { if (source[i + 1] === '.' && source[i + 2] === '.') { tokens.push({ kind: 'dot-dot-dot', line: startLine, column: startColumn, start }) i += 2; } else { tokens.push({ kind: 'dot', line: startLine, column: startColumn, start }); } break; } case ':': { if (source[i + 1] === '=') { tokens.push({ kind: 'colon-equals', line: startLine, column: startColumn, start }); advance(); } else { tokens.push({ kind: 'colon', line: startLine, column: startColumn, start }); } break; } case ';': tokens.push({ kind: 'semicolon', line: startLine, column: startColumn, start }); break; case '\\': tokens.push({ kind: 'backslash', line: startLine, column: startColumn, start }); break; case '~': tokens.push({ kind: 'tilde', line: startLine, column: startColumn, start }); break; case '|': tokens.push({ kind: 'pipe', line: startLine, column: startColumn, start }); break; case ',': tokens.push({ kind: 'comma', line: startLine, column: startColumn, start }); break; case '&': tokens.push({ kind: 'ampersand', line: startLine, column: startColumn, start }); break; case '@': tokens.push({ kind: 'at', line: startLine, column: startColumn, start }); break; // Arithmetic case '+': tokens.push({ kind: 'plus', line: startLine, column: startColumn, start }); break; case '-': tokens.push({ kind: 'minus', line: startLine, column: startColumn, start }); break; case '*': tokens.push({ kind: 'star', line: startLine, column: startColumn, start }); break; case '/': tokens.push({ kind: 'slash', line: startLine, column: startColumn, start }); break; case '^': tokens.push({ kind: 'caret', line: startLine, column: startColumn, start }); break; case '%': tokens.push({ kind: 'percent', line: startLine, column: startColumn, start }); break; // Brackets case '(': tokens.push({ kind: 'open-paren', line: startLine, column: startColumn, start }); break; case ')': tokens.push({ kind: 'close-paren', line: startLine, column: startColumn, start }); break; case '{': tokens.push({ kind: 'open-brace', line: startLine, column: startColumn, start }); break; case '}': tokens.push({ kind: 'close-brace', line: startLine, column: startColumn, start }); break; case '[': tokens.push({ kind: 'open-bracket', line: startLine, column: startColumn, start }); break; case ']': tokens.push({ kind: 'close-bracket', line: startLine, column: startColumn, start }); break; } advance(); } return tokens; }