diff --git a/src/lexer.ts b/src/lexer.ts index a5ff034..73261b0 100644 --- a/src/lexer.ts +++ b/src/lexer.ts @@ -1,135 +1,161 @@ export type Token = - | { kind: 'let' } - | { kind: 'in' } - - | { kind: 'number', value: number } + // Literals + | { kind: 'int', value: number } + | { kind: 'float', value: number } + | { kind: 'string', value: string } | { kind: 'ident', value: string } + | { kind: 'type-ident', value: string } - | { kind: 'equals' } - + // Brackets | { kind: 'open-paren' } | { kind: 'close-paren' } + | { kind: 'open-brace' } + | { kind: 'close-brace' } + | { kind: 'open-bracket' } + | { kind: 'close-bracket' } + // Symbols + | { kind: 'equals' } + | { kind: 'colon' } + | { kind: 'backslash' } + | { kind: 'pipe' } + | { kind: 'greater-than' } | { kind: 'comma' } - | { kind: 'arrow' } - - | { kind: 'if' } - | { kind: 'then' } - | { kind: 'else' } - - | { kind: 'true' } - | { kind: 'false' } + | { kind: 'ampersand' } + | { kind: 'underscore' } + | { kind: 'dot' } + | { kind: 'at' } + // Anithmetic | { kind: 'plus' } | { kind: 'minus' } | { kind: 'star' } | { kind: 'slash' } - | { kind: 'less-than' } - | { kind: 'greater-than' } - | { kind: 'double-equals' } + + | { kind: 'eof' } export function tokenize(source: string): Token[] { - const tokens = []; + const tokens: Token[] = []; let i = 0; while (i < source.length) { const char = source[i]; - // skip whitespace + // Whitespace if (/\s/.test(char)) { i++; continue; } - // Multi-char: numbers - if (/[0-9]/.test(char)) { - let num = ''; - while (i < source.length && /[0-9]/.test(source[i])) { - num += source[i]; + if (char === '#') { + while (i < source.length && source[i] !== '\n') { i++; } - tokens.push({ kind: 'number', value: parseInt(num) }); continue; } - // Multi-char: equals - if (char === '=') { - const nextChar = source[i + 1]; + // Numbers + if (/[0-9]/.test(char)) { // have to start with a digit (?) + let num = ''; + let hasDot = false; - if (nextChar === '=') { - tokens.push({ kind: 'double-equals' }); - i++; - continue; - } else if (nextChar === '>') { - tokens.push({ kind: 'arrow' }); - i++; - continue; - } else { - tokens.push({ kind: 'equals' }); + while (i < source.length && /[0-9.]/.test(source[i])) { + if (source[i] === '.') { + if (hasDot) break; + hasDot = true; + } + num += source[i]; i++; - continue; } + + tokens.push(hasDot + ? { kind: 'float', value: parseFloat(num) } + : { kind: 'int', value: parseInt(num) }); + + continue; } - // Multi-char: strings - if (/[A-Za-z]/.test(char)) { + // Idents + if (/[A-Za-z_]/.test(char)) { let str = ''; - while (i < source.length && /[A-Za-z]/.test(source[i])) { + while (i < source.length && /[A-Za-z0-9_!-]/.test(source[i])) { str += source[i]; i++; } - if (str === 'let') { - tokens.push({ kind: 'let' }); - } else if (str === 'in') { - tokens.push({ kind: 'in' }); - } else if (str === 'if') { - tokens.push({ kind: 'if' }); - } else if (str === 'then') { - tokens.push({ kind: 'then' }); - } else if (str === 'else') { - tokens.push({ kind: 'else' }); - } else if (str === 'true') { - tokens.push({ kind: 'true' }); - } else if (str === 'false') { - tokens.push({ kind: 'false' }); - } else { - tokens.push({ kind: 'ident', value: str }); - } + const isType = /[A-Z]/.test(str[0]); + + tokens.push(isType + ? { kind: 'type-ident', value: str } + : { kind: 'ident', value: str }); continue; } - // TODO: floats + // Strings + if (char === '"') { + i++; + let str = ''; + + while (i < source.length && source[i] !== '"') { + if (source[i] === '\\') { + i++; + + if (i >= source.length) { + throw new Error('Unterminated string'); + } + + switch(source[i]) { + case 'n': str += '\n'; break; + case '"': str += '"'; break; + case '\\': str += '\\'; break; + case 't': str += '\t'; break; + } + } else { + str += source[i]; + } + + i++; + } + + if (i >= source.length) { + throw new Error('Unterminated string'); + } + + tokens.push({ kind: 'string', value: str }); + + i++; + + continue; + } switch (char) { - case ',': - tokens.push({ kind: 'comma' }); - break; - case '+': - tokens.push({ kind: 'plus' }); - break; - case '-': - tokens.push({ kind: 'minus' }); - break; - case '*': - tokens.push({ kind: 'star' }); - break; - case '/': - tokens.push({ kind: 'slash' }); - break; - case '(': - tokens.push({ kind: 'open-paren' }); - break; - case ')': - tokens.push({ kind: 'close-paren' }); - break; - case '<': - tokens.push({ kind: 'less-than' }); - break; - case '>': - tokens.push({ kind: 'greater-than' }); - break; + // Brackets + case '(': tokens.push({ kind: 'open-paren' }); break; + case ')': tokens.push({ kind: 'close-paren' }); break; + case '{': tokens.push({ kind: 'open-brace' }); break; + case '}': tokens.push({ kind: 'close-brace' }); break; + case '[': tokens.push({ kind: 'open-bracket' }); break; + case ']': tokens.push({ kind: 'close-bracket' }); break; + + // Symbols + case '=': tokens.push({ kind: 'equals' }); break; + case ':': tokens.push({ kind: 'colon' }); break; + case '\\': tokens.push({ kind: 'backslash' }); break; + case '|': tokens.push({ kind: 'pipe' }); break; + // case '<': tokens.push({ kind: 'less-than' }); break; + case '>': tokens.push({ kind: 'greater-than' }); break; + case ',': tokens.push({ kind: 'comma' }); break; + case '&': tokens.push({ kind: 'ampersand' }); break; + case '_': tokens.push({ kind: 'underscore' }); break; + case '.': tokens.push({ kind: 'dot' }); break; + case '@': tokens.push({ kind: 'at' }); break; + + // Arithmetic + case '+': tokens.push({ kind: 'plus' }); break; + case '-': tokens.push({ kind: 'minus' }); break; + case '*': tokens.push({ kind: 'star' }); break; + case '/': tokens.push({ kind: 'slash' }); break; } i++; diff --git a/src/main.ts b/src/main.ts index 0c54a72..08f3329 100644 --- a/src/main.ts +++ b/src/main.ts @@ -2,6 +2,7 @@ import { evaluate } from './interpreter' import type { AST } from './ast' import type { Env } from './env' import { tokenize } from './lexer' +import { Parser } from './parser' const ast: AST = { kind: 'binaryop', @@ -26,52 +27,28 @@ const ast: AST = { const env: Env = new Map(); -const res = evaluate(ast, env); - -console.log(res); - -const ast2: AST = { - kind: 'let', - name: 'add', - value: { - kind: 'lambda', - params: ['x', 'y'], - body: { - kind: 'binaryop', - operator: '+', - left: { - kind: 'variable', - name: 'x', - }, - right: { - kind: 'variable', - name: 'y', - } - } - }, - body: { - kind: 'apply', - func: { - kind: 'variable', - name: 'add' - }, - args: [ - { - kind: 'literal', - value: { kind: 'int', value: 2 } - }, - { - kind: 'literal', - value: { kind: 'int', value: 3 } - }, - ] - } -}; - -const env2: Env = new Map(); - -const res2 = evaluate(ast2, env2); - -console.log(res2); - -console.log(tokenize("let x = 5")); +// const res = evaluate(ast, env); + +// console.log(res); + +const str = ` +# This is a comment +double = x \\ x * 2 +read_line! : Unit \\ String +`; +console.log(tokenize(str)); + +// const tokens = tokenize("x"); +// const p = new Parser(tokens); +// console.log(p.parse()); + +/* +const tokens2 = tokenize("let x = (y) => 5 + y in x(3)"); +// const tokens2 = tokenize("let x = 5 in x * 4"); +// const tokens2 = tokenize("(x, y) => x + y"); +const p2 = new Parser(tokens2); +const ast3 = p2.parse(); +const env3: Env = new Map(); +console.log(ast3); +console.log(evaluate(ast3, env3)); +*/ diff --git a/src/parser.ts b/src/parser.ts new file mode 100644 index 0000000..df16e4f --- /dev/null +++ b/src/parser.ts @@ -0,0 +1,177 @@ +import type { Token } from './lexer' +import type { AST } from './ast' + +export class Parser { + private tokens: Token[] + private pos: number = 0 + + constructor(tokens: Token[]) { + this.tokens = tokens; + console.log("tokens", tokens); + } + + private current(): Token { + if (this.pos >= this.tokens.length) { + return { kind: 'eof' } as Token; + } + return this.tokens[this.pos]; + } + + private advance(): Token { + return this.tokens[this.pos++]; + } + + parse(): AST { + return this.parseExpression(); + } + + private parseExpression(): AST { + if (this.current().kind === 'let') { + return this.parseLet(); + } + return this.parseAdditive(); + } + + private parsePrimary(): AST { + const token = this.current(); + + if (token.kind === 'open-paren') { + const savedPos = this.pos; + this.advance(); + + let isLambda = false; + if (this.current().kind === 'close-paren') { + isLambda = true; + } else if (this.current().kind === 'ident') { + let tempPos = this.pos; + this.advance(); + if (this.current().kind === 'comma' || (this.current().kind === 'close-paren' && this.tokens[this.pos + 1]?.kind === 'arrow')) { + isLambda = true; + } + this.pos = tempPos; + } + + this.pos = savedPos; + + if (isLambda) { + return this.parseLambda(); + } + + this.advance(); + const expr = this.parseExpression(); + this.expect('close-paren'); + return expr; + } + + if (token.kind === 'number') { + this.advance(); + + return { kind: 'literal', value: { kind: 'int', value: token.value } }; + } + + if (token.kind === 'ident') { + this.advance(); + + return { kind: 'variable', name: token.value }; + } + + throw new Error(`Unexpected token: ${token.kind}`); + } + + private parseMultiplicative(): AST { + let left = this.parsePostfix(); + + while (this.current().kind === 'star' || this.current().kind === 'slash') { + const op = this.current().kind === 'star' ? '*' : '/'; + this.advance(); + const right = this.parsePostfix(); + + left = { kind: 'binaryop', operator: op, left, right }; + } + + return left; + } + + private parseAdditive(): AST { + let left = this.parseMultiplicative(); + + while (this.current().kind === 'plus' || this.current().kind === 'minus') { + const op = this.current().kind === 'plus' ? '+' : '-'; + this.advance(); + const right = this.parseMultiplicative(); + + left = { kind: 'binaryop', operator: op, left, right }; + } + + return left; + } + + private parseLet(): AST { + this.expect('let'); + const nameToken = this.expect('ident'); + const name = (nameToken as { value: string }).value; + this.expect('equals'); + const value = this.parseExpression(); + this.expect('in'); + const body = this.parseExpression(); + + return { kind: 'let', name, value, body }; + } + + private parseLambda(): AST { + this.expect('open-paren'); + + const params: string[] = []; + + if (this.current().kind !== 'close-paren') { + const first = this.expect('ident'); + params.push((first as { value: string }).value); + + while (this.current().kind === 'comma') { + this.advance(); + const param = this.expect('ident'); + params.push((param as { value: string }).value); + } + } + + this.expect('close-paren'); + this.expect('arrow'); + + const body = this.parseExpression(); + + return { kind: 'lambda', params, body } + } + + private parsePostfix(): AST { + let expr = this.parsePrimary(); + + while (this.current().kind === 'open-paren') { + this.advance(); + + const args: AST[] = []; + if (this.current().kind !== 'close-paren') { + args.push(this.parseExpression()); + + while (this.current().kind === 'comma') { + this.advance(); + args.push(this.parseExpression()); + } + } + + this.expect('close-paren'); + + expr = { kind: 'apply', func: expr, args }; + } + + return expr; + } + + private expect(kind: Token['kind']): Token { + const token = this.current(); + if (token.kind !== kind) { + throw new Error(`Expected ${kind}, got ${token.kind}`); + } + + return this.advance(); + } +}