cg/src/parser.ts
2026-03-28 21:39:53 -06:00

738 lines
24 KiB
TypeScript

import type { Token } from './lexer'
import type { AST, MatchCase, Pattern, Definition, TypeAST, TypeDefinition, TypeConstructor, Annotation, ClassDefinition, InstanceDeclaration } from './ast'
import { ParseError } from './error'
export class Parser {
private tokens: Token[]
private pos: number = 0
private source: string
constructor(tokens: Token[], source: string) {
this.tokens = tokens;
this.source = source;
}
private current(): Token {
if (this.pos >= this.tokens.length) {
return { kind: 'eof' } as Token;
}
return this.tokens[this.pos];
}
private advance(): Token {
return this.tokens[this.pos++];
}
private peek(offset = 1): Token {
const pos = this.pos + offset;
if (pos >= this.tokens.length) {
return { kind: 'eof' } as Token;
}
return this.tokens[pos];
}
private expect(kind: Token['kind']): Token {
const token = this.current();
if (token.kind !== kind) {
throw ParseError(`Expected ${kind}, got ${token.kind}`, token.line, token.column, this.source);
}
return this.advance();
}
private expectIdent(): Token {
const token = this.current();
if (token.kind === 'ident' || token.kind === 'type-ident' || token.kind === 'string') {
this.advance();
return token;
}
throw ParseError(`Expected identifier, got ${token.kind}`);
}
private getPos(token: Token) {
return {
line: token.line,
column: token.column,
start: token.start
};
}
private isInfixOp(): boolean {
const kind = this.current().kind;
return kind === 'plus' || kind === 'minus' ||
kind === 'star' || kind === 'slash' ||
kind === 'percent' || kind === 'caret' ||
kind === 'ampersand' ||
kind === 'equals-equals' || kind === 'not-equals' ||
kind === 'less-than' || kind === 'less-equals' ||
kind === 'greater-than' || kind === 'greater-equals' ||
kind === 'tilde';
}
private tokenToOpName(token: Token): string {
switch (token.kind) {
case 'ampersand': return 'cat';
// Arithmetic
case 'plus': return 'add';
case 'minus': return 'sub';
case 'star': return 'mul';
case 'slash': return 'div';
case 'percent': return 'mod';
case 'caret': return 'pow';
// Comparison
case 'equals-equals': return 'eq';
case 'not-equals': return 'neq';
case 'greater-than': return 'gt';
case 'greater-equals': return 'gte';
case 'less-than': return 'lt';
case 'less-equals': return 'lte';
default: throw ParseError(`Not an operator: ${token.kind}`, token.line, token.column, this.source);
}
}
private precOf(token: Token): number {
switch (token.kind) {
case 'ampersand':
return 1;
case 'equals-equals':
case 'not-equals':
case 'greater-than':
case 'greater-equals':
case 'less-than':
case 'less-equals':
return 2;
case 'plus':
case 'minus':
return 3;
case 'star':
case 'slash':
case 'percent':
return 4;
case 'caret':
return 5;
case 'tilde':
return 6;
default:
return 0;
}
}
private canStartPrimary(): boolean {
const kind = this.current().kind;
return kind === 'ident' || kind === 'type-ident' ||
kind === 'int' || kind === 'float' || kind === 'string' ||
kind === 'open-paren' || kind === 'open-bracket' ||
kind === 'open-brace';
}
private isLambdaStart(): boolean {
const kind = this.current().kind;
if (kind === 'backslash') {
return true;
}
if (kind !== 'ident' && kind !== 'underscore') {
return false;
}
let offset = 1;
while (true) {
const token = this.peek(offset);
if (token.kind === 'backslash') return true;
if (token.kind !== 'ident' && token.kind !== 'underscore') return false;
offset++;
}
}
parse(): { definitions: Definition[], typeDefinitions: TypeDefinition[], classDefinitions: ClassDefinition[], instanceDeclarations: InstanceDeclaration[] } {
const definitions: Definition[] = [];
const typeDefinitions: TypeDefinition[] = [];
const classDefinitions: ClassDefinition[] = [];
const instanceDeclarations: InstanceDeclaration[] = [];
while (this.current().kind !== 'eof') {
if (this.current().kind === 'type-ident') {
let offset = 1;
while (this.peek(offset).kind === 'ident') offset++;
const after = this.peek(offset);
if (after.kind === 'open-brace') {
classDefinitions.push(this.parseClassDefinition());
} else if (after.kind === 'colon') {
instanceDeclarations.push(this.parseInstanceDeclaration());
} else {
typeDefinitions.push(this.parseTypeDefinition());
}
continue;
}
definitions.push(this.parseDefinition());
}
return { definitions, typeDefinitions, classDefinitions, instanceDeclarations };
}
private parseDefinition(): Definition {
const nameToken = this.expect('ident');
const name = (nameToken as { value: string }).value;
let annotation: Annotation | undefined;
if (this.current().kind === 'colon') {
this.advance();
annotation = { constraints: this.parsedConstraints, type: this.parseType() };
// Declaration only
if (this.current().kind === 'semicolon') {
this.advance();
return { kind: 'definition', name, annotation, ...this.getPos(nameToken) };
}
}
this.expect('equals');
const body = this.parseExpression();
if (this.current().kind !== 'eof') {
this.expect('semicolon');
}
return { kind: 'definition', name, body, annotation, ...this.getPos(nameToken) };
}
private parseExpression(): AST {
let expr = this.parseExpressionNoMatch();
// Match
if (this.current().kind === 'pipe') {
return this.parseMatch(expr);
}
return expr;
}
// Used in match, doesn't match another match
private parseExpressionNoMatch(): AST {
// Lambda
if (this.isLambdaStart()) {
return this.parseLambda();
}
// Let
if ((this.current().kind === 'ident' || this.current().kind === 'underscore') && this.peek().kind === 'equals') {
return this.parseLet();
}
let expr = this.parseInfix();
// Rebind
if (this.current().kind === 'colon-equals') {
const token = this.current();
this.advance();
const value = this.parseExpressionNoMatch();
return { kind: 'rebind', target: expr, value, ...this.getPos(token) };
}
return expr;
}
private parseCommaSeparated<T>(closeToken: Token['kind'], parseItem: () => T): T[] {
const items: T[] = [];
let first = true;
while (this.current().kind !== closeToken) {
if (!first) {
this.expect('comma');
if (this.current().kind === closeToken) break; // trailing commas
}
first = false;
items.push(parseItem());
}
return items;
}
private parseMatch(expr: AST): AST {
const token = this.current();
const cases: MatchCase[] = [];
while(this.current().kind === 'pipe') {
this.advance();
const pattern = this.parsePattern();
this.expect('backslash');
const result = this.parseExpressionNoMatch();
cases.push({ pattern, result, ...this.getPos(token) })
}
return { kind: 'match', expr, cases, ...this.getPos(token) };
}
private parsePattern(): Pattern {
const token = this.current();
// Wildcard
if (token.kind === 'underscore') {
this.advance();
return { kind: 'wildcard' };
}
// Literal
if (token.kind === 'int' || token.kind === 'float' || token.kind === 'string') {
this.advance();
return { kind: 'literal', value: token.value };
}
// Variable
if (token.kind === 'ident') {
this.advance();
return { kind: 'var', name: token.value };
}
// Constructor
if (token.kind === 'type-ident') {
this.advance();
const name = token.value;
const args: Pattern[] = [];
while (this.canStartPattern()) {
args.push(this.parsePattern());
}
return { kind: 'constructor', name, args };
}
// List
if (token.kind === 'open-bracket') {
this.advance();
const elements: Pattern[] = [];
let first = true;
let spreadName: string | null = null;
while (this.current().kind !== 'close-bracket') {
if (!first) {
this.expect('comma');
if (this.current().kind === 'close-bracket') break; // trailing commas
}
first = false;
// Spread
if (this.current().kind === 'dot-dot-dot') {
this.advance();
const nameToken = this.expectIdent();
spreadName = (nameToken as { value: string }).value;
break;
}
elements.push(this.parsePattern());
}
this.expect('close-bracket');
if (spreadName !== null) {
return { kind: 'list-spread', head: elements, spread: spreadName, ...this.getPos(token) };
}
return { kind: 'list', elements };
}
// Record
if (token.kind === 'open-brace') {
this.advance();
const items = this.parseCommaSeparated('close-brace', () => {
const keyToken = this.expect('ident');
const key = (keyToken as { value: string }).value;
this.expect('equals');
return { key, pattern: this.parsePattern() };
});
this.expect('close-brace');
const fields: { [key: string]: Pattern } = {};
for (const item of items) fields[item.key] = item.pattern;
return { kind: 'record', fields };
}
// Parens
if (token.kind === 'open-paren') {
this.advance();
const pattern = this.parsePattern();
this.expect('close-paren');
return pattern;
}
throw ParseError(`Unexpected token in pattern: ${token.kind}`, token.line, token.column, this.source);
}
private canStartPattern(): boolean {
const kind = this.current().kind;
return kind === 'underscore' || kind === 'ident' ||
kind === 'type-ident' || kind === 'int' ||
kind === 'float' || kind === 'string' ||
kind === 'open-paren' || kind === 'open-brace' ||
kind === 'open-bracket';
}
private parseLambda(): AST {
const token = this.current();
const params: string[] = [];
while (this.current().kind === 'ident' || this.current().kind === 'underscore') {
const param = this.advance();
if (param.kind === 'underscore') {
params.push('_');
} else {
params.push((param as { value: string }).value);
}
}
this.expect('backslash');
const body = this.parseExpression();
return { kind: 'lambda', params, body, ...this.getPos(token) };
}
private parseLet(): AST {
const nameToken = this.current();
let name: string;
if (nameToken.kind === 'underscore') {
name = '_';
this.advance();
} else if (nameToken.kind === 'ident') {
name = (nameToken as { value: string }).value;
this.advance();
} else {
throw ParseError(`Expected ident or underscore, got ${nameToken.kind}`, nameToken.line, nameToken.column, this.source);
}
this.expect('equals');
const value = this.parseExpression();
this.expect('semicolon');
const body = this.parseExpression();
return { kind: 'let', name, value, body, ...this.getPos(nameToken) };
}
private parseInfix(minPrec: number = 0): AST {
const token = this.current();
let left = this.parseApplication();
while (this.isInfixOp() && this.precOf(this.current()) >= minPrec) {
const opToken = this.advance();
const prec = this.precOf(opToken);
if (opToken.kind === 'tilde') {
// function application operator
const right = this.parseInfix(prec + 1);
left = {
kind: 'apply',
func: right,
args: [left],
...this.getPos(token)
};
} else {
// operators desugar to function calls
const opName = this.tokenToOpName(opToken);
const right = this.parseInfix(prec + 1);
left = {
kind: 'apply',
func: { kind: 'variable', name: opName, ...this.getPos(token) },
args: [left, right],
...this.getPos(token)
}
}
}
return left;
}
private parseApplication(): AST {
const token = this.current();
let func = this.parsePostfix();
while (this.canStartPrimary()) {
const arg = this.parsePostfix();
func = { kind: 'apply', func, args: [arg], ...this.getPos(token) };
}
return func;
}
private parsePostfix(): AST {
const token = this.current();
let expr = this.parsePrimary();
while (true) {
if (this.current().kind === 'dot') {
this.advance();
if (this.current().kind === 'open-brace') {
// Record update
this.advance();
const items = this.parseCommaSeparated('close-brace', () => {
const keyToken = this.expectIdent();
const key = (keyToken as { value: string }).value;
this.expect('equals');
return { key, value: this.parseExpression() };
});
this.expect('close-brace');
const updates: { [key: string]: AST } = {};
for (const item of items) updates[item.key] = item.value;
expr = { kind: 'record-update', record: expr, updates, ...this.getPos(token) }
} else {
// Record access
const fieldToken = this.expectIdent();
const field = (fieldToken as { value: string }).value;
expr = { kind: 'record-access', record: expr, field, ...this.getPos(token) };
}
} else {
break;
}
}
return expr;
}
private parsePrimary(): AST {
const token = this.current();
if (token.kind === 'open-paren') {
this.advance();
const expr = this.parseExpression();
this.expect('close-paren');
return expr;
}
if (token.kind === 'open-bracket') {
this.advance();
const items = this.parseCommaSeparated('close-bracket', () => {
// Spread
if (this.current().kind === 'dot-dot-dot') {
const spreadToken = this.current();
this.advance();
return { kind: 'list-spread' as const, spread: this.parseExpression(), ...this.getPos(spreadToken) };
}
return this.parseExpression();
});
this.expect('close-bracket');
return { kind: 'list', elements: items, ...this.getPos(token) };
}
if (token.kind === 'open-brace') {
this.advance();
const entries = this.parseCommaSeparated('close-brace', () => {
if (this.current().kind === 'dot-dot-dot') {
this.advance();
return { kind: 'spread' as const, expr: this.parseExpression() };
}
const keyToken = this.expectIdent();
const key = (keyToken as { value: string }).value;
this.expect('equals');
return { kind: 'field' as const, key, value: this.parseExpression() };
});
this.expect('close-brace');
return { kind: 'record', entries, ...this.getPos(token) };
}
if (token.kind === 'int') {
this.advance();
return { kind: 'literal', value: { kind: 'int', value: token.value }, ...this.getPos(token) };
}
if (token.kind === 'float') {
this.advance();
return { kind: 'literal', value: { kind: 'float', value: token.value }, ...this.getPos(token) };
}
if (token.kind === 'string') {
this.advance();
return { kind: 'literal', value: { kind: 'string', value: token.value }, ...this.getPos(token) };
}
if (token.kind === 'ident') {
this.advance();
return { kind: 'variable', name: token.value, ...this.getPos(token) };
}
if (token.kind === 'type-ident') {
this.advance();
return { kind: 'constructor', name: token.value, ...this.getPos(token) };
}
throw ParseError(`Unexpected token: ${token.kind}`, token.line, token.column, this.source);
}
private parseTypeAtom(): TypeAST {
const token = this.current();
if (token.kind === 'type-ident') {
this.advance();
return { kind: 'type-name', name: token.value };
}
if (token.kind === 'ident') {
this.advance();
return { kind: 'type-var', name: token.value };
}
if (token.kind === 'open-paren') {
this.advance();
const type = this.parseType();
this.expect('close-paren');
return type;
}
if (token.kind === 'open-brace') {
this.advance();
const fields: { name: string, type: TypeAST }[] = [];
while (this.current().kind !== 'close-brace') {
const name = this.expectIdent() as { value: string };
this.expect('colon');
const type = this.parseType();
fields.push({ name: name.value, type });
if (this.current().kind === 'comma') this.advance();
}
this.expect('close-brace');
return { kind: 'type-record', fields };
}
throw ParseError(`Expected type, got ${token.kind}`, token.line, token.column, this.source);
}
private parseTypeApply(): TypeAST {
const base = this.parseTypeAtom();
const args: TypeAST[] = [];
while (this.canStartTypeAtom()) {
args.push(this.parseTypeAtom());
}
if (args.length === 0) return base;
return { kind: 'type-apply', constructor: base, args };
}
private canStartTypeAtom(): boolean {
const kind = this.current().kind;
return kind === 'type-ident' || kind === 'ident' ||
kind === 'open-paren' || kind === 'open-brace';
}
private parsedConstraints: { className: string, typeVar: string }[] = [];
private parseType(): TypeAST {
// Check for constraints: Num a, Eq b :: <type>
this.parsedConstraints = this.tryParseConstraints();
const left = this.parseTypeApply();
if (this.current().kind === 'backslash') {
this.advance();
const right = this.parseType();
return { kind: 'type-function', param: left, result: right };
}
return left;
}
private tryParseConstraints(): { className: string, typeVar: string }[] {
// Look ahead for :: to decide if we have constraints
let offset = 0;
let foundDoubleColon = false;
while (true) {
const t = this.peek(offset);
if (t.kind === 'double-colon') { foundDoubleColon = true; break }
if (t.kind === 'backslash' || t.kind === 'semicolon' || t.kind === 'equals' || t.kind === 'eof') break;
offset++;
}
if (!foundDoubleColon) return [];
// Parse constraints: ClassName varName (, ClassName varName)*
const constraints: { className: string, typeVar: string }[] = [];
while (true) {
const className = (this.expect('type-ident') as { value: string }).value;
const typeVar = (this.expect('ident') as { value: string }).value;
constraints.push({ className, typeVar });
if (this.current().kind === 'comma') {
this.advance();
} else {
break;
}
}
this.expect('double-colon');
return constraints;
}
private parseTypeDefinition(): TypeDefinition {
const nameToken = this.advance();
const name = (nameToken as { value: string }).value;
// Collect type params
const params: string[] = [];
while (this.current().kind === 'ident') {
params.push((this.advance() as { value: string }).value);
}
this.expect('equals');
// Parse constructors separated by |
const constructors: TypeConstructor[] = [];
constructors.push(this.parseTypeConstructor());
while (this.current().kind === 'pipe') {
this.advance();
constructors.push(this.parseTypeConstructor());
}
if (this.current().kind === 'semicolon') this.advance();
return { kind: 'type-definition', name, params, constructors, ...this.getPos(nameToken) };
}
private parseTypeConstructor(): TypeConstructor {
const name = (this.expect('type-ident') as { value: string }).value;
const args: TypeAST[] = [];
while (this.canStartTypeAtom()) {
args.push(this.parseTypeAtom());
}
return { name, args };
}
private parseClassDefinition(): ClassDefinition {
const nameToken = this.advance();
const param = (this.expect('ident') as { value: string }).value;
this.expect('open-brace');
const methods: { name: string, type: TypeAST }[] = [];
while (this.current().kind !== 'close-brace') {
const methodName = (this.expect('ident') as { value: string }).value;
this.expect('colon');
const type = this.parseType();
methods.push({ name: methodName, type });
if (this.current().kind === 'semicolon') this.advance();
}
this.expect('close-brace');
if (this.current().kind === 'semicolon') this.advance();
return { kind: 'class-definition', name: (nameToken as { value: string }).value, param, methods, ...this.getPos(nameToken) };
}
private parseInstanceDeclaration(): InstanceDeclaration {
const typeToken = this.advance();
this.expect('colon');
const classToken = this.expect('type-ident');
if (this.current().kind === 'semicolon') this.advance();
return { kind: 'instance-declaration', typeName: (typeToken as { value: string }).value, className: (classToken as { value: string }).value, ...this.getPos(typeToken) };
}
}