Got parser working. now i'm switching from toy language to CG syntax. lexer is... done??? maybe

master
Dustin Swan 7 days ago
parent f74d374555
commit 71237d0307
Signed by: dustinswan
GPG Key ID: 30D46587E2100467

@ -1,135 +1,161 @@
export type Token = export type Token =
| { kind: 'let' } // Literals
| { kind: 'in' } | { kind: 'int', value: number }
| { kind: 'float', value: number }
| { kind: 'number', value: number } | { kind: 'string', value: string }
| { kind: 'ident', value: string } | { kind: 'ident', value: string }
| { kind: 'type-ident', value: string }
| { kind: 'equals' } // Brackets
| { kind: 'open-paren' } | { kind: 'open-paren' }
| { kind: 'close-paren' } | { kind: 'close-paren' }
| { kind: 'open-brace' }
| { kind: 'close-brace' }
| { kind: 'open-bracket' }
| { kind: 'close-bracket' }
// Symbols
| { kind: 'equals' }
| { kind: 'colon' }
| { kind: 'backslash' }
| { kind: 'pipe' }
| { kind: 'greater-than' }
| { kind: 'comma' } | { kind: 'comma' }
| { kind: 'arrow' } | { kind: 'ampersand' }
| { kind: 'underscore' }
| { kind: 'if' } | { kind: 'dot' }
| { kind: 'then' } | { kind: 'at' }
| { kind: 'else' }
| { kind: 'true' }
| { kind: 'false' }
// Anithmetic
| { kind: 'plus' } | { kind: 'plus' }
| { kind: 'minus' } | { kind: 'minus' }
| { kind: 'star' } | { kind: 'star' }
| { kind: 'slash' } | { kind: 'slash' }
| { kind: 'less-than' }
| { kind: 'greater-than' } | { kind: 'eof' }
| { kind: 'double-equals' }
export function tokenize(source: string): Token[] { export function tokenize(source: string): Token[] {
const tokens = []; const tokens: Token[] = [];
let i = 0; let i = 0;
while (i < source.length) { while (i < source.length) {
const char = source[i]; const char = source[i];
// skip whitespace // Whitespace
if (/\s/.test(char)) { if (/\s/.test(char)) {
i++; i++;
continue; continue;
} }
// Multi-char: numbers if (char === '#') {
if (/[0-9]/.test(char)) { while (i < source.length && source[i] !== '\n') {
let num = '';
while (i < source.length && /[0-9]/.test(source[i])) {
num += source[i];
i++; i++;
} }
tokens.push({ kind: 'number', value: parseInt(num) });
continue; continue;
} }
// Multi-char: equals // Numbers
if (char === '=') { if (/[0-9]/.test(char)) { // have to start with a digit (?)
const nextChar = source[i + 1]; let num = '';
let hasDot = false;
if (nextChar === '=') { while (i < source.length && /[0-9.]/.test(source[i])) {
tokens.push({ kind: 'double-equals' }); if (source[i] === '.') {
i++; if (hasDot) break;
continue; hasDot = true;
} else if (nextChar === '>') { }
tokens.push({ kind: 'arrow' }); num += source[i];
i++; i++;
}
tokens.push(hasDot
? { kind: 'float', value: parseFloat(num) }
: { kind: 'int', value: parseInt(num) });
continue; continue;
} else { }
tokens.push({ kind: 'equals' });
// Idents
if (/[A-Za-z_]/.test(char)) {
let str = '';
while (i < source.length && /[A-Za-z0-9_!-]/.test(source[i])) {
str += source[i];
i++; i++;
continue;
} }
const isType = /[A-Z]/.test(str[0]);
tokens.push(isType
? { kind: 'type-ident', value: str }
: { kind: 'ident', value: str });
continue;
} }
// Multi-char: strings // Strings
if (/[A-Za-z]/.test(char)) { if (char === '"') {
i++;
let str = ''; let str = '';
while (i < source.length && /[A-Za-z]/.test(source[i])) {
str += source[i]; while (i < source.length && source[i] !== '"') {
if (source[i] === '\\') {
i++; i++;
if (i >= source.length) {
throw new Error('Unterminated string');
} }
if (str === 'let') { switch(source[i]) {
tokens.push({ kind: 'let' }); case 'n': str += '\n'; break;
} else if (str === 'in') { case '"': str += '"'; break;
tokens.push({ kind: 'in' }); case '\\': str += '\\'; break;
} else if (str === 'if') { case 't': str += '\t'; break;
tokens.push({ kind: 'if' }); }
} else if (str === 'then') {
tokens.push({ kind: 'then' });
} else if (str === 'else') {
tokens.push({ kind: 'else' });
} else if (str === 'true') {
tokens.push({ kind: 'true' });
} else if (str === 'false') {
tokens.push({ kind: 'false' });
} else { } else {
tokens.push({ kind: 'ident', value: str }); str += source[i];
} }
continue; i++;
}
if (i >= source.length) {
throw new Error('Unterminated string');
} }
// TODO: floats tokens.push({ kind: 'string', value: str });
i++;
continue;
}
switch (char) { switch (char) {
case ',': // Brackets
tokens.push({ kind: 'comma' }); case '(': tokens.push({ kind: 'open-paren' }); break;
break; case ')': tokens.push({ kind: 'close-paren' }); break;
case '+': case '{': tokens.push({ kind: 'open-brace' }); break;
tokens.push({ kind: 'plus' }); case '}': tokens.push({ kind: 'close-brace' }); break;
break; case '[': tokens.push({ kind: 'open-bracket' }); break;
case '-': case ']': tokens.push({ kind: 'close-bracket' }); break;
tokens.push({ kind: 'minus' });
break; // Symbols
case '*': case '=': tokens.push({ kind: 'equals' }); break;
tokens.push({ kind: 'star' }); case ':': tokens.push({ kind: 'colon' }); break;
break; case '\\': tokens.push({ kind: 'backslash' }); break;
case '/': case '|': tokens.push({ kind: 'pipe' }); break;
tokens.push({ kind: 'slash' }); // case '<': tokens.push({ kind: 'less-than' }); break;
break; case '>': tokens.push({ kind: 'greater-than' }); break;
case '(': case ',': tokens.push({ kind: 'comma' }); break;
tokens.push({ kind: 'open-paren' }); case '&': tokens.push({ kind: 'ampersand' }); break;
break; case '_': tokens.push({ kind: 'underscore' }); break;
case ')': case '.': tokens.push({ kind: 'dot' }); break;
tokens.push({ kind: 'close-paren' }); case '@': tokens.push({ kind: 'at' }); break;
break;
case '<': // Arithmetic
tokens.push({ kind: 'less-than' }); case '+': tokens.push({ kind: 'plus' }); break;
break; case '-': tokens.push({ kind: 'minus' }); break;
case '>': case '*': tokens.push({ kind: 'star' }); break;
tokens.push({ kind: 'greater-than' }); case '/': tokens.push({ kind: 'slash' }); break;
break;
} }
i++; i++;

@ -2,6 +2,7 @@ import { evaluate } from './interpreter'
import type { AST } from './ast' import type { AST } from './ast'
import type { Env } from './env' import type { Env } from './env'
import { tokenize } from './lexer' import { tokenize } from './lexer'
import { Parser } from './parser'
const ast: AST = { const ast: AST = {
kind: 'binaryop', kind: 'binaryop',
@ -26,52 +27,28 @@ const ast: AST = {
const env: Env = new Map(); const env: Env = new Map();
const res = evaluate(ast, env); // const res = evaluate(ast, env);
console.log(res); // console.log(res);
const ast2: AST = { const str = `
kind: 'let', # This is a comment
name: 'add', double = x \\ x * 2
value: { read_line! : Unit \\ String
kind: 'lambda', `;
params: ['x', 'y'], console.log(tokenize(str));
body: {
kind: 'binaryop', // const tokens = tokenize("x");
operator: '+', // const p = new Parser(tokens);
left: { // console.log(p.parse());
kind: 'variable',
name: 'x', /*
}, const tokens2 = tokenize("let x = (y) => 5 + y in x(3)");
right: { // const tokens2 = tokenize("let x = 5 in x * 4");
kind: 'variable', // const tokens2 = tokenize("(x, y) => x + y");
name: 'y', const p2 = new Parser(tokens2);
} const ast3 = p2.parse();
} const env3: Env = new Map();
}, console.log(ast3);
body: { console.log(evaluate(ast3, env3));
kind: 'apply', */
func: {
kind: 'variable',
name: 'add'
},
args: [
{
kind: 'literal',
value: { kind: 'int', value: 2 }
},
{
kind: 'literal',
value: { kind: 'int', value: 3 }
},
]
}
};
const env2: Env = new Map();
const res2 = evaluate(ast2, env2);
console.log(res2);
console.log(tokenize("let x = 5"));

@ -0,0 +1,177 @@
import type { Token } from './lexer'
import type { AST } from './ast'
export class Parser {
private tokens: Token[]
private pos: number = 0
constructor(tokens: Token[]) {
this.tokens = tokens;
console.log("tokens", tokens);
}
private current(): Token {
if (this.pos >= this.tokens.length) {
return { kind: 'eof' } as Token;
}
return this.tokens[this.pos];
}
private advance(): Token {
return this.tokens[this.pos++];
}
parse(): AST {
return this.parseExpression();
}
private parseExpression(): AST {
if (this.current().kind === 'let') {
return this.parseLet();
}
return this.parseAdditive();
}
private parsePrimary(): AST {
const token = this.current();
if (token.kind === 'open-paren') {
const savedPos = this.pos;
this.advance();
let isLambda = false;
if (this.current().kind === 'close-paren') {
isLambda = true;
} else if (this.current().kind === 'ident') {
let tempPos = this.pos;
this.advance();
if (this.current().kind === 'comma' || (this.current().kind === 'close-paren' && this.tokens[this.pos + 1]?.kind === 'arrow')) {
isLambda = true;
}
this.pos = tempPos;
}
this.pos = savedPos;
if (isLambda) {
return this.parseLambda();
}
this.advance();
const expr = this.parseExpression();
this.expect('close-paren');
return expr;
}
if (token.kind === 'number') {
this.advance();
return { kind: 'literal', value: { kind: 'int', value: token.value } };
}
if (token.kind === 'ident') {
this.advance();
return { kind: 'variable', name: token.value };
}
throw new Error(`Unexpected token: ${token.kind}`);
}
private parseMultiplicative(): AST {
let left = this.parsePostfix();
while (this.current().kind === 'star' || this.current().kind === 'slash') {
const op = this.current().kind === 'star' ? '*' : '/';
this.advance();
const right = this.parsePostfix();
left = { kind: 'binaryop', operator: op, left, right };
}
return left;
}
private parseAdditive(): AST {
let left = this.parseMultiplicative();
while (this.current().kind === 'plus' || this.current().kind === 'minus') {
const op = this.current().kind === 'plus' ? '+' : '-';
this.advance();
const right = this.parseMultiplicative();
left = { kind: 'binaryop', operator: op, left, right };
}
return left;
}
private parseLet(): AST {
this.expect('let');
const nameToken = this.expect('ident');
const name = (nameToken as { value: string }).value;
this.expect('equals');
const value = this.parseExpression();
this.expect('in');
const body = this.parseExpression();
return { kind: 'let', name, value, body };
}
private parseLambda(): AST {
this.expect('open-paren');
const params: string[] = [];
if (this.current().kind !== 'close-paren') {
const first = this.expect('ident');
params.push((first as { value: string }).value);
while (this.current().kind === 'comma') {
this.advance();
const param = this.expect('ident');
params.push((param as { value: string }).value);
}
}
this.expect('close-paren');
this.expect('arrow');
const body = this.parseExpression();
return { kind: 'lambda', params, body }
}
private parsePostfix(): AST {
let expr = this.parsePrimary();
while (this.current().kind === 'open-paren') {
this.advance();
const args: AST[] = [];
if (this.current().kind !== 'close-paren') {
args.push(this.parseExpression());
while (this.current().kind === 'comma') {
this.advance();
args.push(this.parseExpression());
}
}
this.expect('close-paren');
expr = { kind: 'apply', func: expr, args };
}
return expr;
}
private expect(kind: Token['kind']): Token {
const token = this.current();
if (token.kind !== kind) {
throw new Error(`Expected ${kind}, got ${token.kind}`);
}
return this.advance();
}
}
Loading…
Cancel
Save