···11+#![cfg_attr(not(feature = "std"), no_std)]
22+33+extern crate alloc;
44+55+pub mod ast;
66+pub mod error;
77+pub mod lexer;
88+pub mod parser;
99+pub mod span;
1010+pub mod validate;
1111+pub mod workspace;
1212+1313+pub use ast::Lexicon;
1414+pub use error::{ParseError, ValidationError, ValidationErrors};
1515+pub use parser::parse_lexicon;
1616+pub use validate::validate_lexicon;
1717+pub use workspace::Workspace;
1818+1919+pub const PRELUDE: &str = include_str!("../../resources/prelude.mlf");
+1249
mlf-lang/src/parser.rs
···11+use alloc::vec::Vec;
22+use crate::{Lexicon, ParseError, ast::*, lexer::{tokenize, SpannedToken}, span::{Span, Spanned}};
33+use crate::lexer::Token as LexToken;
44+55+struct Parser {
66+ tokens: Vec<SpannedToken>,
77+ pos: usize,
88+}
99+1010+impl Parser {
1111+ fn new(tokens: Vec<SpannedToken>) -> Self {
1212+ Parser { tokens, pos: 0 }
1313+ }
1414+1515+ fn current(&self) -> &SpannedToken {
1616+ &self.tokens[self.pos]
1717+ }
1818+1919+ fn peek(&self, offset: usize) -> Option<&SpannedToken> {
2020+ self.tokens.get(self.pos + offset)
2121+ }
2222+2323+ fn is_eof(&self) -> bool {
2424+ matches!(self.current().token, LexToken::Eof)
2525+ }
2626+2727+ fn advance(&mut self) -> &SpannedToken {
2828+ let current = &self.tokens[self.pos];
2929+ if !self.is_eof() {
3030+ self.pos += 1;
3131+ }
3232+ current
3333+ }
3434+3535+ fn expect(&mut self, expected: LexToken) -> Result<Span, ParseError> {
3636+ let current = self.current();
3737+ if current.token == expected {
3838+ Ok(self.advance().span)
3939+ } else {
4040+ Err(ParseError::Syntax {
4141+ message: alloc::format!("Expected {}, found {}", expected, current.token),
4242+ span: current.span,
4343+ })
4444+ }
4545+ }
4646+4747+ fn parse_ident(&mut self) -> Result<Ident, ParseError> {
4848+ let current = self.current();
4949+ if let LexToken::Ident(name) = ¤t.token {
5050+ let ident = Ident {
5151+ name: name.clone(),
5252+ span: current.span,
5353+ };
5454+ self.advance();
5555+ Ok(ident)
5656+ } else {
5757+ Err(ParseError::Syntax {
5858+ message: alloc::format!("Expected identifier, found {}", current.token),
5959+ span: current.span,
6060+ })
6161+ }
6262+ }
6363+6464+ fn parse_path(&mut self) -> Result<Path, ParseError> {
6565+ let mut segments = Vec::new();
6666+ let start = self.current().span.start;
6767+6868+ segments.push(self.parse_ident()?);
6969+7070+ while matches!(self.current().token, LexToken::Dot) {
7171+ self.advance();
7272+ segments.push(self.parse_ident()?);
7373+ }
7474+7575+ let end = segments.last().unwrap().span.end;
7676+ Ok(Path {
7777+ segments,
7878+ span: Span::new(start, end),
7979+ })
8080+ }
8181+}
8282+8383+pub fn parse_lexicon(input: &str) -> Result<Lexicon, ParseError> {
8484+ let tokens = tokenize(input)?;
8585+ let mut parser = Parser::new(tokens);
8686+ let start = parser.current().span.start;
8787+8888+ let mut items = Vec::new();
8989+9090+ while !parser.is_eof() {
9191+ items.push(parser.parse_item()?);
9292+ }
9393+9494+ let end = if let Some(last) = items.last() {
9595+ last.span().end
9696+ } else {
9797+ start
9898+ };
9999+100100+ Ok(Lexicon {
101101+ items,
102102+ span: Span::new(start, end),
103103+ })
104104+}
105105+106106+impl Parser {
107107+ fn parse_item(&mut self) -> Result<Item, ParseError> {
108108+ while matches!(self.current().token, LexToken::DocComment(_)) {
109109+ self.advance();
110110+ }
111111+112112+ let annotations = self.parse_annotations()?;
113113+114114+ match &self.current().token {
115115+ LexToken::Record => self.parse_record(annotations),
116116+ LexToken::Alias => self.parse_alias(annotations),
117117+ LexToken::Token => self.parse_token(annotations),
118118+ LexToken::Query => self.parse_query(annotations),
119119+ LexToken::Procedure => self.parse_procedure(annotations),
120120+ LexToken::Subscription => self.parse_subscription(annotations),
121121+ LexToken::Namespace => self.parse_namespace(),
122122+ LexToken::Use => self.parse_use(),
123123+ _ => Err(ParseError::Syntax {
124124+ message: alloc::format!("Expected item definition, found {}", self.current().token),
125125+ span: self.current().span,
126126+ }),
127127+ }
128128+ }
129129+130130+ fn parse_annotations(&mut self) -> Result<Vec<Annotation>, ParseError> {
131131+ let mut annotations = Vec::new();
132132+133133+ while matches!(self.current().token, LexToken::At) {
134134+ annotations.push(self.parse_annotation()?);
135135+ }
136136+137137+ Ok(annotations)
138138+ }
139139+140140+ fn parse_annotation(&mut self) -> Result<Annotation, ParseError> {
141141+ let start = self.expect(LexToken::At)?;
142142+ let name = self.parse_ident()?;
143143+144144+ let mut args = Vec::new();
145145+146146+ if matches!(self.current().token, LexToken::LeftParen) {
147147+ self.advance();
148148+149149+ while !matches!(self.current().token, LexToken::RightParen) {
150150+ args.push(self.parse_annotation_arg()?);
151151+152152+ if matches!(self.current().token, LexToken::Comma) {
153153+ self.advance();
154154+ } else {
155155+ break;
156156+ }
157157+ }
158158+159159+ self.expect(LexToken::RightParen)?;
160160+ }
161161+162162+ let end = if args.is_empty() {
163163+ name.span.end
164164+ } else {
165165+ self.tokens[self.pos - 1].span.end
166166+ };
167167+168168+ Ok(Annotation {
169169+ name,
170170+ args,
171171+ span: Span::new(start.start, end),
172172+ })
173173+ }
174174+175175+ fn parse_annotation_arg(&mut self) -> Result<AnnotationArg, ParseError> {
176176+ if let Some(next) = self.peek(1) {
177177+ if matches!(next.token, LexToken::Colon) {
178178+ let name = self.parse_ident()?;
179179+ self.expect(LexToken::Colon)?;
180180+ let value = self.parse_annotation_value()?;
181181+ return Ok(AnnotationArg::Named { name, value });
182182+ }
183183+ }
184184+185185+ let value = self.parse_annotation_value()?;
186186+ Ok(AnnotationArg::Positional(value))
187187+ }
188188+189189+ fn parse_annotation_value(&mut self) -> Result<AnnotationValue, ParseError> {
190190+ let current = self.current();
191191+ match ¤t.token {
192192+ LexToken::StringLit(s) => {
193193+ let value = AnnotationValue::String(s.clone());
194194+ self.advance();
195195+ Ok(value)
196196+ }
197197+ LexToken::IntLit(i) => {
198198+ let value = AnnotationValue::Number(*i as f64);
199199+ self.advance();
200200+ Ok(value)
201201+ }
202202+ LexToken::FloatLit(f) => {
203203+ let value = AnnotationValue::Number(*f);
204204+ self.advance();
205205+ Ok(value)
206206+ }
207207+ LexToken::True => {
208208+ self.advance();
209209+ Ok(AnnotationValue::Boolean(true))
210210+ }
211211+ LexToken::False => {
212212+ self.advance();
213213+ Ok(AnnotationValue::Boolean(false))
214214+ }
215215+ _ => Err(ParseError::Syntax {
216216+ message: alloc::format!("Expected annotation value, found {}", current.token),
217217+ span: current.span,
218218+ }),
219219+ }
220220+ }
221221+222222+ fn parse_record(&mut self, annotations: Vec<Annotation>) -> Result<Item, ParseError> {
223223+ let start = self.expect(LexToken::Record)?;
224224+ let name = self.parse_ident()?;
225225+ self.expect(LexToken::LeftBrace)?;
226226+227227+ let mut fields = Vec::new();
228228+ let mut doc_comments = Vec::new();
229229+230230+ while !matches!(self.current().token, LexToken::RightBrace) {
231231+ if let LexToken::DocComment(comment) = &self.current().token {
232232+ let span = self.current().span;
233233+ doc_comments.push(DocComment {
234234+ text: comment.clone(),
235235+ span,
236236+ });
237237+ self.advance();
238238+ } else {
239239+ fields.push(self.parse_field(doc_comments.clone())?);
240240+ doc_comments.clear();
241241+ }
242242+ }
243243+244244+ let end = self.expect(LexToken::RightBrace)?;
245245+ self.expect(LexToken::Semicolon)?;
246246+247247+ Ok(Item::Record(Record {
248248+ docs: Vec::new(),
249249+ annotations,
250250+ name,
251251+ fields,
252252+ span: Span::new(start.start, end.end),
253253+ }))
254254+ }
255255+256256+ fn parse_field(&mut self, docs: Vec<DocComment>) -> Result<Field, ParseError> {
257257+ let annotations = self.parse_annotations()?;
258258+ let name = self.parse_ident()?;
259259+260260+ let optional = if matches!(self.current().token, LexToken::Question) {
261261+ self.advance();
262262+ true
263263+ } else {
264264+ false
265265+ };
266266+267267+ self.expect(LexToken::Colon)?;
268268+ let ty = self.parse_type()?;
269269+ self.expect(LexToken::Comma)?;
270270+271271+ let span = Span::new(name.span.start, ty.span().end);
272272+273273+ Ok(Field {
274274+ docs,
275275+ annotations,
276276+ name,
277277+ ty,
278278+ optional,
279279+ span,
280280+ })
281281+ }
282282+283283+ fn parse_alias(&mut self, annotations: Vec<Annotation>) -> Result<Item, ParseError> {
284284+ let start = self.expect(LexToken::Alias)?;
285285+ let name = self.parse_ident()?;
286286+ self.expect(LexToken::Equals)?;
287287+ let ty = self.parse_type()?;
288288+ let end = self.expect(LexToken::Semicolon)?;
289289+290290+ Ok(Item::Alias(Alias {
291291+ docs: Vec::new(),
292292+ annotations,
293293+ name,
294294+ ty,
295295+ span: Span::new(start.start, end.end),
296296+ }))
297297+ }
298298+299299+ fn parse_token(&mut self, annotations: Vec<Annotation>) -> Result<Item, ParseError> {
300300+ let start = self.expect(LexToken::Token)?;
301301+ let name = self.parse_ident()?;
302302+ let end = self.expect(LexToken::Semicolon)?;
303303+304304+ Ok(Item::Token(Token {
305305+ docs: Vec::new(),
306306+ annotations,
307307+ name,
308308+ span: Span::new(start.start, end.end),
309309+ }))
310310+ }
311311+312312+ fn parse_query(&mut self, annotations: Vec<Annotation>) -> Result<Item, ParseError> {
313313+ let start = self.expect(LexToken::Query)?;
314314+ let name = self.parse_ident()?;
315315+ self.expect(LexToken::LeftParen)?;
316316+317317+ let params = self.parse_params()?;
318318+319319+ self.expect(LexToken::RightParen)?;
320320+ self.expect(LexToken::Colon)?;
321321+322322+ let output = self.parse_base_type()?;
323323+324324+ let returns = if matches!(self.current().token, LexToken::Pipe) {
325325+ self.advance();
326326+ if matches!(self.current().token, LexToken::Error) {
327327+ self.advance();
328328+ let errors = self.parse_errors()?;
329329+ let error_span = errors.last().map(|e| e.span).unwrap_or(output.span());
330330+ ReturnType::TypeWithErrors {
331331+ success: output,
332332+ errors,
333333+ span: error_span,
334334+ }
335335+ } else {
336336+ let mut types = alloc::vec![output];
337337+ types.push(self.parse_base_type()?);
338338+339339+ while matches!(self.current().token, LexToken::Pipe) {
340340+ self.advance();
341341+ types.push(self.parse_base_type()?);
342342+ }
343343+344344+ let span = Span::new(types[0].span().start, types.last().unwrap().span().end);
345345+ ReturnType::Type(Type::Union { types, span })
346346+ }
347347+ } else {
348348+ ReturnType::Type(output)
349349+ };
350350+351351+ let end = self.expect(LexToken::Semicolon)?;
352352+353353+ Ok(Item::Query(Query {
354354+ docs: Vec::new(),
355355+ annotations,
356356+ name,
357357+ params,
358358+ returns,
359359+ span: Span::new(start.start, end.end),
360360+ }))
361361+ }
362362+363363+ fn parse_procedure(&mut self, annotations: Vec<Annotation>) -> Result<Item, ParseError> {
364364+ let start = self.expect(LexToken::Procedure)?;
365365+ let name = self.parse_ident()?;
366366+ self.expect(LexToken::LeftParen)?;
367367+368368+ let params = self.parse_params()?;
369369+370370+ self.expect(LexToken::RightParen)?;
371371+ self.expect(LexToken::Colon)?;
372372+373373+ let output = self.parse_base_type()?;
374374+375375+ let returns = if matches!(self.current().token, LexToken::Pipe) {
376376+ self.advance();
377377+ if matches!(self.current().token, LexToken::Error) {
378378+ self.advance();
379379+ let errors = self.parse_errors()?;
380380+ let error_span = errors.last().map(|e| e.span).unwrap_or(output.span());
381381+ ReturnType::TypeWithErrors {
382382+ success: output,
383383+ errors,
384384+ span: error_span,
385385+ }
386386+ } else {
387387+ let mut types = alloc::vec![output];
388388+ types.push(self.parse_base_type()?);
389389+390390+ while matches!(self.current().token, LexToken::Pipe) {
391391+ self.advance();
392392+ types.push(self.parse_base_type()?);
393393+ }
394394+395395+ let span = Span::new(types[0].span().start, types.last().unwrap().span().end);
396396+ ReturnType::Type(Type::Union { types, span })
397397+ }
398398+ } else {
399399+ ReturnType::Type(output)
400400+ };
401401+402402+ let end = self.expect(LexToken::Semicolon)?;
403403+404404+ Ok(Item::Procedure(Procedure {
405405+ docs: Vec::new(),
406406+ annotations,
407407+ name,
408408+ params,
409409+ returns,
410410+ span: Span::new(start.start, end.end),
411411+ }))
412412+ }
413413+414414+ fn parse_subscription(&mut self, annotations: Vec<Annotation>) -> Result<Item, ParseError> {
415415+ let start = self.expect(LexToken::Subscription)?;
416416+ let name = self.parse_ident()?;
417417+ self.expect(LexToken::LeftParen)?;
418418+419419+ let params = self.parse_params()?;
420420+421421+ self.expect(LexToken::RightParen)?;
422422+ self.expect(LexToken::Colon)?;
423423+424424+ let messages = self.parse_type()?;
425425+426426+ let end = self.expect(LexToken::Semicolon)?;
427427+428428+ Ok(Item::Subscription(Subscription {
429429+ docs: Vec::new(),
430430+ annotations,
431431+ name,
432432+ params,
433433+ messages,
434434+ span: Span::new(start.start, end.end),
435435+ }))
436436+ }
437437+438438+ fn parse_namespace(&mut self) -> Result<Item, ParseError> {
439439+ let start = self.expect(LexToken::Namespace)?;
440440+ let path = self.parse_path()?;
441441+442442+ // Convert path to a single identifier with dotted name
443443+ let name = Ident {
444444+ name: path.segments.iter().map(|s| s.name.as_str()).collect::<Vec<_>>().join("."),
445445+ span: path.span,
446446+ };
447447+448448+ let end = self.expect(LexToken::Semicolon)?;
449449+450450+ Ok(Item::Namespace(Namespace {
451451+ name,
452452+ items: Vec::new(),
453453+ span: Span::new(start.start, end.end),
454454+ }))
455455+ }
456456+457457+ fn parse_use(&mut self) -> Result<Item, ParseError> {
458458+ let start = self.expect(LexToken::Use)?;
459459+ let path = self.parse_path()?;
460460+461461+ let imports = if matches!(self.current().token, LexToken::As) {
462462+ self.advance();
463463+ let alias = self.parse_ident()?;
464464+ UseImports::Items(alloc::vec![UseItem {
465465+ name: path.segments.last().unwrap().clone(),
466466+ alias: Some(alias),
467467+ }])
468468+ } else {
469469+ UseImports::All
470470+ };
471471+472472+ let end = self.expect(LexToken::Semicolon)?;
473473+474474+ Ok(Item::Use(Use {
475475+ path,
476476+ imports,
477477+ span: Span::new(start.start, end.end),
478478+ }))
479479+ }
480480+481481+ fn parse_params(&mut self) -> Result<Vec<Field>, ParseError> {
482482+ let mut params = Vec::new();
483483+484484+ while !matches!(self.current().token, LexToken::RightParen) {
485485+ let annotations = self.parse_annotations()?;
486486+ let name = self.parse_ident()?;
487487+488488+ let optional = if matches!(self.current().token, LexToken::Question) {
489489+ self.advance();
490490+ true
491491+ } else {
492492+ false
493493+ };
494494+495495+ self.expect(LexToken::Colon)?;
496496+ let ty = self.parse_type()?;
497497+498498+ let span = Span::new(name.span.start, ty.span().end);
499499+500500+ params.push(Field {
501501+ docs: Vec::new(),
502502+ annotations,
503503+ name,
504504+ ty,
505505+ optional,
506506+ span,
507507+ });
508508+509509+ if matches!(self.current().token, LexToken::Comma) {
510510+ self.advance();
511511+ } else {
512512+ break;
513513+ }
514514+ }
515515+516516+ Ok(params)
517517+ }
518518+519519+ fn parse_errors(&mut self) -> Result<Vec<ErrorDef>, ParseError> {
520520+ self.expect(LexToken::LeftBrace)?;
521521+522522+ let mut errors = Vec::new();
523523+ let mut doc_comments = Vec::new();
524524+525525+ while !matches!(self.current().token, LexToken::RightBrace) {
526526+ if let LexToken::DocComment(comment) = &self.current().token {
527527+ let span = self.current().span;
528528+ doc_comments.push(DocComment {
529529+ text: comment.clone(),
530530+ span,
531531+ });
532532+ self.advance();
533533+ } else {
534534+ let name = self.parse_ident()?;
535535+ let span = name.span;
536536+ self.expect(LexToken::Comma)?;
537537+ errors.push(ErrorDef {
538538+ docs: doc_comments.clone(),
539539+ name,
540540+ span,
541541+ });
542542+ doc_comments.clear();
543543+ }
544544+ }
545545+546546+ self.expect(LexToken::RightBrace)?;
547547+548548+ Ok(errors)
549549+ }
550550+551551+ fn parse_type(&mut self) -> Result<Type, ParseError> {
552552+ let base = self.parse_base_type()?;
553553+554554+ if matches!(self.current().token, LexToken::Pipe) {
555555+ let mut types = alloc::vec![base];
556556+557557+ while matches!(self.current().token, LexToken::Pipe) {
558558+ self.advance();
559559+ if matches!(self.current().token, LexToken::Error) {
560560+ break;
561561+ }
562562+ types.push(self.parse_base_type()?);
563563+ }
564564+565565+ let span = Span::new(types[0].span().start, types.last().unwrap().span().end);
566566+ return Ok(Type::Union { types, span });
567567+ }
568568+569569+ Ok(base)
570570+ }
571571+572572+ fn parse_base_type(&mut self) -> Result<Type, ParseError> {
573573+ let current = self.current();
574574+ let start = current.span.start;
575575+576576+ let mut ty = match ¤t.token {
577577+ LexToken::String => {
578578+ let span = self.advance().span;
579579+ Type::Primitive {
580580+ kind: PrimitiveType::String,
581581+ span,
582582+ }
583583+ }
584584+ LexToken::Integer => {
585585+ let span = self.advance().span;
586586+ Type::Primitive {
587587+ kind: PrimitiveType::Integer,
588588+ span,
589589+ }
590590+ }
591591+ LexToken::Number => {
592592+ let span = self.advance().span;
593593+ Type::Primitive {
594594+ kind: PrimitiveType::Number,
595595+ span,
596596+ }
597597+ }
598598+ LexToken::Boolean => {
599599+ let span = self.advance().span;
600600+ Type::Primitive {
601601+ kind: PrimitiveType::Boolean,
602602+ span,
603603+ }
604604+ }
605605+ LexToken::Blob => {
606606+ let span = self.advance().span;
607607+ Type::Primitive {
608608+ kind: PrimitiveType::Blob,
609609+ span,
610610+ }
611611+ }
612612+ LexToken::Bytes => {
613613+ let span = self.advance().span;
614614+ Type::Primitive {
615615+ kind: PrimitiveType::Bytes,
616616+ span,
617617+ }
618618+ }
619619+ LexToken::Null => {
620620+ let span = self.advance().span;
621621+ Type::Primitive {
622622+ kind: PrimitiveType::Null,
623623+ span,
624624+ }
625625+ }
626626+ LexToken::Unknown => {
627627+ let span = self.advance().span;
628628+ Type::Unknown { span }
629629+ }
630630+ LexToken::Ident(_) => {
631631+ let path = self.parse_path()?;
632632+ let span = path.span;
633633+ Type::Reference { path, span }
634634+ }
635635+ LexToken::LeftBracket => {
636636+ self.advance();
637637+ let inner = self.parse_type()?;
638638+ let end = self.expect(LexToken::RightBracket)?;
639639+ Type::Array {
640640+ inner: alloc::boxed::Box::new(inner),
641641+ span: Span::new(start, end.end),
642642+ }
643643+ }
644644+ LexToken::LeftBrace => {
645645+ self.advance();
646646+ let mut fields = Vec::new();
647647+648648+ while !matches!(self.current().token, LexToken::RightBrace) {
649649+ fields.push(self.parse_field(Vec::new())?);
650650+ }
651651+652652+ let end = self.expect(LexToken::RightBrace)?;
653653+ Type::Object {
654654+ fields,
655655+ span: Span::new(start, end.end),
656656+ }
657657+ }
658658+ _ => {
659659+ return Err(ParseError::Syntax {
660660+ message: alloc::format!("Expected type, found {}", current.token),
661661+ span: current.span,
662662+ });
663663+ }
664664+ };
665665+666666+ // Handle array suffix: Type[]
667667+ if matches!(self.current().token, LexToken::LeftBracket) {
668668+ self.advance();
669669+ let end = self.expect(LexToken::RightBracket)?;
670670+ ty = Type::Array {
671671+ inner: alloc::boxed::Box::new(ty),
672672+ span: Span::new(start, end.end),
673673+ };
674674+ }
675675+676676+ if matches!(self.current().token, LexToken::Constrained) {
677677+ self.advance();
678678+ self.expect(LexToken::LeftBrace)?;
679679+680680+ let mut constraints = Vec::new();
681681+682682+ while !matches!(self.current().token, LexToken::RightBrace) {
683683+ constraints.push(self.parse_constraint()?);
684684+685685+ if matches!(self.current().token, LexToken::Comma) {
686686+ self.advance();
687687+ } else {
688688+ break;
689689+ }
690690+ }
691691+692692+ let end = self.expect(LexToken::RightBrace)?;
693693+694694+ ty = Type::Constrained {
695695+ base: alloc::boxed::Box::new(ty),
696696+ constraints,
697697+ span: Span::new(start, end.end),
698698+ };
699699+ }
700700+701701+ Ok(ty)
702702+ }
703703+704704+ fn parse_constraint(&mut self) -> Result<Constraint, ParseError> {
705705+ let name = self.parse_ident()?;
706706+ self.expect(LexToken::Colon)?;
707707+708708+ let start = name.span.start;
709709+ let current = self.current();
710710+ let current_span = current.span;
711711+712712+ let constraint = match name.name.as_str() {
713713+ "minLength" => {
714714+ if let LexToken::IntLit(i) = current.token {
715715+ let value = i;
716716+ self.advance();
717717+ Constraint::MinLength {
718718+ value: value as usize,
719719+ span: Span::new(start, current_span.end),
720720+ }
721721+ } else {
722722+ return Err(ParseError::Syntax {
723723+ message: alloc::format!("Expected integer for minLength"),
724724+ span: current_span,
725725+ });
726726+ }
727727+ }
728728+ "maxLength" => {
729729+ if let LexToken::IntLit(i) = current.token {
730730+ let value = i;
731731+ self.advance();
732732+ Constraint::MaxLength {
733733+ value: value as usize,
734734+ span: Span::new(start, current_span.end),
735735+ }
736736+ } else {
737737+ return Err(ParseError::Syntax {
738738+ message: alloc::format!("Expected integer for maxLength"),
739739+ span: current_span,
740740+ });
741741+ }
742742+ }
743743+ "minimum" => {
744744+ if let LexToken::IntLit(i) = current.token {
745745+ let value = i;
746746+ self.advance();
747747+ Constraint::Minimum {
748748+ value,
749749+ span: Span::new(start, current_span.end),
750750+ }
751751+ } else {
752752+ return Err(ParseError::Syntax {
753753+ message: alloc::format!("Expected integer for minimum"),
754754+ span: current_span,
755755+ });
756756+ }
757757+ }
758758+ "maximum" => {
759759+ if let LexToken::IntLit(i) = current.token {
760760+ let value = i;
761761+ self.advance();
762762+ Constraint::Maximum {
763763+ value,
764764+ span: Span::new(start, current_span.end),
765765+ }
766766+ } else {
767767+ return Err(ParseError::Syntax {
768768+ message: alloc::format!("Expected integer for maximum"),
769769+ span: current_span,
770770+ });
771771+ }
772772+ }
773773+ "enum" => {
774774+ if matches!(current.token, LexToken::LeftBracket) {
775775+ self.advance();
776776+ let mut values = Vec::new();
777777+778778+ while !matches!(self.current().token, LexToken::RightBracket) {
779779+ let current = self.current();
780780+ match ¤t.token {
781781+ LexToken::StringLit(s) => {
782782+ values.push(s.clone());
783783+ self.advance();
784784+ }
785785+ _ => {
786786+ return Err(ParseError::Syntax {
787787+ message: alloc::format!("Expected string literal in enum"),
788788+ span: current.span,
789789+ });
790790+ }
791791+ }
792792+793793+ if matches!(self.current().token, LexToken::Comma) {
794794+ self.advance();
795795+ } else {
796796+ break;
797797+ }
798798+ }
799799+800800+ let end = self.expect(LexToken::RightBracket)?;
801801+ Constraint::Enum {
802802+ values,
803803+ span: Span::new(start, end.end),
804804+ }
805805+ } else {
806806+ return Err(ParseError::Syntax {
807807+ message: alloc::format!("Expected array for enum"),
808808+ span: current_span,
809809+ });
810810+ }
811811+ }
812812+ "format" => {
813813+ if let LexToken::StringLit(s) = ¤t.token {
814814+ let value = s.clone();
815815+ self.advance();
816816+ Constraint::Format {
817817+ value,
818818+ span: Span::new(start, current_span.end),
819819+ }
820820+ } else {
821821+ return Err(ParseError::Syntax {
822822+ message: alloc::format!("Expected string for format"),
823823+ span: current_span,
824824+ });
825825+ }
826826+ }
827827+ "minGraphemes" => {
828828+ if let LexToken::IntLit(i) = current.token {
829829+ let value = i;
830830+ self.advance();
831831+ Constraint::MinGraphemes {
832832+ value: value as usize,
833833+ span: Span::new(start, current_span.end),
834834+ }
835835+ } else {
836836+ return Err(ParseError::Syntax {
837837+ message: alloc::format!("Expected integer for minGraphemes"),
838838+ span: current_span,
839839+ });
840840+ }
841841+ }
842842+ "maxGraphemes" => {
843843+ if let LexToken::IntLit(i) = current.token {
844844+ let value = i;
845845+ self.advance();
846846+ Constraint::MaxGraphemes {
847847+ value: value as usize,
848848+ span: Span::new(start, current_span.end),
849849+ }
850850+ } else {
851851+ return Err(ParseError::Syntax {
852852+ message: alloc::format!("Expected integer for maxGraphemes"),
853853+ span: current_span,
854854+ });
855855+ }
856856+ }
857857+ "maxSize" => {
858858+ if let LexToken::IntLit(i) = current.token {
859859+ let value = i;
860860+ self.advance();
861861+ Constraint::MaxSize {
862862+ value: value as usize,
863863+ span: Span::new(start, current_span.end),
864864+ }
865865+ } else {
866866+ return Err(ParseError::Syntax {
867867+ message: alloc::format!("Expected integer for maxSize"),
868868+ span: current_span,
869869+ });
870870+ }
871871+ }
872872+ "accept" => {
873873+ if matches!(current.token, LexToken::LeftBracket) {
874874+ self.advance();
875875+ let mut mimes = Vec::new();
876876+877877+ while !matches!(self.current().token, LexToken::RightBracket) {
878878+ let current = self.current();
879879+ match ¤t.token {
880880+ LexToken::StringLit(s) => {
881881+ mimes.push(s.clone());
882882+ self.advance();
883883+ }
884884+ _ => {
885885+ return Err(ParseError::Syntax {
886886+ message: alloc::format!("Expected string literal in accept"),
887887+ span: current.span,
888888+ });
889889+ }
890890+ }
891891+892892+ if matches!(self.current().token, LexToken::Comma) {
893893+ self.advance();
894894+ } else {
895895+ break;
896896+ }
897897+ }
898898+899899+ let end = self.expect(LexToken::RightBracket)?;
900900+ Constraint::Accept {
901901+ mimes,
902902+ span: Span::new(start, end.end),
903903+ }
904904+ } else {
905905+ return Err(ParseError::Syntax {
906906+ message: alloc::format!("Expected array for accept"),
907907+ span: current_span,
908908+ });
909909+ }
910910+ }
911911+ "knownValues" => {
912912+ if matches!(current.token, LexToken::LeftBracket) {
913913+ self.advance();
914914+ let mut values = Vec::new();
915915+916916+ while !matches!(self.current().token, LexToken::RightBracket) {
917917+ values.push(self.parse_path()?);
918918+919919+ if matches!(self.current().token, LexToken::Comma) {
920920+ self.advance();
921921+ } else {
922922+ break;
923923+ }
924924+ }
925925+926926+ let end = self.expect(LexToken::RightBracket)?;
927927+ Constraint::KnownValues {
928928+ values,
929929+ span: Span::new(start, end.end),
930930+ }
931931+ } else {
932932+ return Err(ParseError::Syntax {
933933+ message: alloc::format!("Expected array for knownValues"),
934934+ span: current_span,
935935+ });
936936+ }
937937+ }
938938+ "default" => {
939939+ use crate::ast::ConstraintValue;
940940+ let end_span = current_span.end;
941941+ let value = match ¤t.token {
942942+ LexToken::StringLit(s) => {
943943+ let v = ConstraintValue::String(s.clone());
944944+ self.advance();
945945+ v
946946+ }
947947+ LexToken::IntLit(i) => {
948948+ let v = ConstraintValue::Integer(*i);
949949+ self.advance();
950950+ v
951951+ }
952952+ LexToken::True => {
953953+ let v = ConstraintValue::Boolean(true);
954954+ self.advance();
955955+ v
956956+ }
957957+ LexToken::False => {
958958+ let v = ConstraintValue::Boolean(false);
959959+ self.advance();
960960+ v
961961+ }
962962+ _ => {
963963+ return Err(ParseError::Syntax {
964964+ message: alloc::format!("Expected string, integer, or boolean for default"),
965965+ span: current_span,
966966+ });
967967+ }
968968+ };
969969+ Constraint::Default {
970970+ value,
971971+ span: Span::new(start, end_span),
972972+ }
973973+ }
974974+ _ => {
975975+ return Err(ParseError::Syntax {
976976+ message: alloc::format!("Unknown constraint: {}", name.name),
977977+ span: name.span,
978978+ });
979979+ }
980980+ };
981981+982982+ Ok(constraint)
983983+ }
984984+}
985985+986986+#[cfg(test)]
987987+mod tests {
988988+ use super::*;
989989+990990+ #[test]
991991+ fn test_parse_record() {
992992+ let input = r#"record user {
993993+ name: string,
994994+ age: integer,
995995+ };"#;
996996+ let result = parse_lexicon(input);
997997+ assert!(result.is_ok());
998998+ let lexicon = result.unwrap();
999999+ assert_eq!(lexicon.items.len(), 1);
10001000+ match &lexicon.items[0] {
10011001+ Item::Record(r) => {
10021002+ assert_eq!(r.name.name, "user");
10031003+ assert_eq!(r.fields.len(), 2);
10041004+ }
10051005+ _ => panic!("Expected record"),
10061006+ }
10071007+ }
10081008+10091009+ #[test]
10101010+ fn test_parse_alias() {
10111011+ let input = "alias userId = string;";
10121012+ let result = parse_lexicon(input);
10131013+ assert!(result.is_ok());
10141014+ let lexicon = result.unwrap();
10151015+ assert_eq!(lexicon.items.len(), 1);
10161016+ match &lexicon.items[0] {
10171017+ Item::Alias(a) => {
10181018+ assert_eq!(a.name.name, "userId");
10191019+ }
10201020+ _ => panic!("Expected alias"),
10211021+ }
10221022+ }
10231023+10241024+ #[test]
10251025+ fn test_parse_token() {
10261026+ let input = "token like;";
10271027+ let result = parse_lexicon(input);
10281028+ assert!(result.is_ok());
10291029+ let lexicon = result.unwrap();
10301030+ assert_eq!(lexicon.items.len(), 1);
10311031+ match &lexicon.items[0] {
10321032+ Item::Token(t) => {
10331033+ assert_eq!(t.name.name, "like");
10341034+ }
10351035+ _ => panic!("Expected token"),
10361036+ }
10371037+ }
10381038+10391039+ #[test]
10401040+ fn test_parse_query() {
10411041+ let input = "query getUser(id: string,): user;";
10421042+ let result = parse_lexicon(input);
10431043+ assert!(result.is_ok());
10441044+ let lexicon = result.unwrap();
10451045+ assert_eq!(lexicon.items.len(), 1);
10461046+ match &lexicon.items[0] {
10471047+ Item::Query(q) => {
10481048+ assert_eq!(q.name.name, "getUser");
10491049+ assert_eq!(q.params.len(), 1);
10501050+ }
10511051+ _ => panic!("Expected query"),
10521052+ }
10531053+ }
10541054+10551055+ #[test]
10561056+ fn test_parse_query_with_errors() {
10571057+ let input = r#"query getUser(id: string,): user | error {
10581058+ /// User not found
10591059+ NotFound,
10601060+ };"#;
10611061+ let result = parse_lexicon(input);
10621062+ if let Err(e) = &result {
10631063+ eprintln!("Parse error: {:?}", e);
10641064+ }
10651065+ assert!(result.is_ok());
10661066+ let lexicon = result.unwrap();
10671067+ assert_eq!(lexicon.items.len(), 1);
10681068+ match &lexicon.items[0] {
10691069+ Item::Query(q) => {
10701070+ assert_eq!(q.name.name, "getUser");
10711071+ match &q.returns {
10721072+ ReturnType::TypeWithErrors { errors, .. } => {
10731073+ assert_eq!(errors.len(), 1);
10741074+ assert_eq!(errors[0].name.name, "NotFound");
10751075+ }
10761076+ _ => panic!("Expected TypeWithErrors"),
10771077+ }
10781078+ }
10791079+ _ => panic!("Expected query"),
10801080+ }
10811081+ }
10821082+10831083+ #[test]
10841084+ fn test_parse_subscription() {
10851085+ let input = "subscription subscribeRepos(cursor?: integer,): commit | identity;";
10861086+ let result = parse_lexicon(input);
10871087+ assert!(result.is_ok());
10881088+ let lexicon = result.unwrap();
10891089+ assert_eq!(lexicon.items.len(), 1);
10901090+ match &lexicon.items[0] {
10911091+ Item::Subscription(s) => {
10921092+ assert_eq!(s.name.name, "subscribeRepos");
10931093+ assert_eq!(s.params.len(), 1);
10941094+ assert!(s.params[0].optional);
10951095+ }
10961096+ _ => panic!("Expected subscription"),
10971097+ }
10981098+ }
10991099+11001100+ #[test]
11011101+ fn test_parse_namespace() {
11021102+ let input = "namespace actor;";
11031103+ let result = parse_lexicon(input);
11041104+ assert!(result.is_ok());
11051105+ let lexicon = result.unwrap();
11061106+ assert_eq!(lexicon.items.len(), 1);
11071107+ match &lexicon.items[0] {
11081108+ Item::Namespace(n) => {
11091109+ assert_eq!(n.name.name, "actor");
11101110+ }
11111111+ _ => panic!("Expected namespace"),
11121112+ }
11131113+ }
11141114+11151115+ #[test]
11161116+ fn test_parse_constrained_type() {
11171117+ let input = r#"alias shortString = string constrained {
11181118+ maxLength: 100,
11191119+ };"#;
11201120+ let result = parse_lexicon(input);
11211121+ assert!(result.is_ok());
11221122+ let lexicon = result.unwrap();
11231123+ assert_eq!(lexicon.items.len(), 1);
11241124+ match &lexicon.items[0] {
11251125+ Item::Alias(a) => {
11261126+ match &a.ty {
11271127+ Type::Constrained { constraints, .. } => {
11281128+ assert_eq!(constraints.len(), 1);
11291129+ }
11301130+ _ => panic!("Expected constrained type"),
11311131+ }
11321132+ }
11331133+ _ => panic!("Expected alias"),
11341134+ }
11351135+ }
11361136+11371137+ #[test]
11381138+ fn test_parse_union_type() {
11391139+ let input = "alias result = success | failure;";
11401140+ let result = parse_lexicon(input);
11411141+ assert!(result.is_ok());
11421142+ let lexicon = result.unwrap();
11431143+ assert_eq!(lexicon.items.len(), 1);
11441144+ match &lexicon.items[0] {
11451145+ Item::Alias(a) => {
11461146+ match &a.ty {
11471147+ Type::Union { types, .. } => {
11481148+ assert_eq!(types.len(), 2);
11491149+ }
11501150+ _ => panic!("Expected union type"),
11511151+ }
11521152+ }
11531153+ _ => panic!("Expected alias"),
11541154+ }
11551155+ }
11561156+11571157+ #[test]
11581158+ fn test_parse_array_type() {
11591159+ let input = "alias userList = [user];";
11601160+ let result = parse_lexicon(input);
11611161+ assert!(result.is_ok());
11621162+ let lexicon = result.unwrap();
11631163+ assert_eq!(lexicon.items.len(), 1);
11641164+ match &lexicon.items[0] {
11651165+ Item::Alias(a) => {
11661166+ match &a.ty {
11671167+ Type::Array { .. } => {}
11681168+ _ => panic!("Expected array type"),
11691169+ }
11701170+ }
11711171+ _ => panic!("Expected alias"),
11721172+ }
11731173+ }
11741174+11751175+ #[test]
11761176+ fn test_parse_annotation() {
11771177+ let input = "@deprecated\nrecord old {};";
11781178+ let result = parse_lexicon(input);
11791179+ assert!(result.is_ok());
11801180+ let lexicon = result.unwrap();
11811181+ assert_eq!(lexicon.items.len(), 1);
11821182+ match &lexicon.items[0] {
11831183+ Item::Record(r) => {
11841184+ assert_eq!(r.annotations.len(), 1);
11851185+ assert_eq!(r.annotations[0].name.name, "deprecated");
11861186+ }
11871187+ _ => panic!("Expected record"),
11881188+ }
11891189+ }
11901190+11911191+ #[test]
11921192+ fn test_parse_annotation_with_args() {
11931193+ let input = "@validate(min: 0, max: 100)\nrecord data {};";
11941194+ let result = parse_lexicon(input);
11951195+ assert!(result.is_ok());
11961196+ let lexicon = result.unwrap();
11971197+ assert_eq!(lexicon.items.len(), 1);
11981198+ match &lexicon.items[0] {
11991199+ Item::Record(r) => {
12001200+ assert_eq!(r.annotations.len(), 1);
12011201+ assert_eq!(r.annotations[0].args.len(), 2);
12021202+ }
12031203+ _ => panic!("Expected record"),
12041204+ }
12051205+ }
12061206+12071207+ #[test]
12081208+ fn test_parse_optional_field() {
12091209+ let input = r#"record user {
12101210+ name?: string,
12111211+ };"#;
12121212+ let result = parse_lexicon(input);
12131213+ assert!(result.is_ok());
12141214+ let lexicon = result.unwrap();
12151215+ match &lexicon.items[0] {
12161216+ Item::Record(r) => {
12171217+ assert_eq!(r.fields.len(), 1);
12181218+ assert!(r.fields[0].optional);
12191219+ }
12201220+ _ => panic!("Expected record"),
12211221+ }
12221222+ }
12231223+12241224+ #[test]
12251225+ fn test_parse_enum_constraint() {
12261226+ let input = r#"alias status = string constrained {
12271227+ enum: ["active", "inactive"],
12281228+ };"#;
12291229+ let result = parse_lexicon(input);
12301230+ assert!(result.is_ok());
12311231+ let lexicon = result.unwrap();
12321232+ match &lexicon.items[0] {
12331233+ Item::Alias(a) => {
12341234+ match &a.ty {
12351235+ Type::Constrained { constraints, .. } => {
12361236+ match &constraints[0] {
12371237+ Constraint::Enum { values, .. } => {
12381238+ assert_eq!(values.len(), 2);
12391239+ }
12401240+ _ => panic!("Expected enum constraint"),
12411241+ }
12421242+ }
12431243+ _ => panic!("Expected constrained type"),
12441244+ }
12451245+ }
12461246+ _ => panic!("Expected alias"),
12471247+ }
12481248+ }
12491249+}
···11+fn main() {
22+ let src_dir = std::path::Path::new("../../src");
33+44+ let mut c_config = cc::Build::new();
55+ c_config.include(src_dir);
66+ c_config
77+ .flag_if_supported("-Wno-unused-parameter")
88+ .flag_if_supported("-Wno-unused-but-set-variable")
99+ .flag_if_supported("-Wno-trigraphs");
1010+ let parser_path = src_dir.join("parser.c");
1111+ c_config.file(&parser_path);
1212+1313+ // If your language uses an external scanner written in C,
1414+ // then include this block of code:
1515+1616+ /*
1717+ let scanner_path = src_dir.join("scanner.c");
1818+ c_config.file(&scanner_path);
1919+ println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
2020+ */
2121+2222+ c_config.compile("tree-sitter-mlf");
2323+ println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
2424+}
+56
tree-sitter-mlf/bindings/rust/lib.rs
···11+//! This crate provides MLF language support for the [tree-sitter][] parsing library.
22+//!
33+//! Typically, you will use the [language][language func] function to add this language to a
44+//! tree-sitter [Parser][], and then use the parser to parse some code:
55+//!
66+//! ```
77+//! let code = r#"
88+//! record post {
99+//! text: string,
1010+//! };
1111+//! "#;
1212+//! let mut parser = tree_sitter::Parser::new();
1313+//! parser.set_language(&tree_sitter_mlf::language()).expect("Error loading MLF grammar");
1414+//! let tree = parser.parse(code, None).unwrap();
1515+//! assert!(!tree.root_node().has_error());
1616+//! ```
1717+//!
1818+//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
1919+//! [language func]: fn.language.html
2020+//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
2121+//! [tree-sitter]: https://tree-sitter.github.io/
2222+2323+use tree_sitter::Language;
2424+2525+extern "C" {
2626+ fn tree_sitter_mlf() -> Language;
2727+}
2828+2929+/// Get the tree-sitter [Language][] for this grammar.
3030+///
3131+/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
3232+pub fn language() -> Language {
3333+ unsafe { tree_sitter_mlf() }
3434+}
3535+3636+/// The content of the [`node-types.json`][] file for this grammar.
3737+///
3838+/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
3939+pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
4040+4141+// Uncomment these to include any queries that your grammar contains
4242+// pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
4343+// pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm");
4444+// pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm");
4545+// pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");
4646+4747+#[cfg(test)]
4848+mod tests {
4949+ #[test]
5050+ fn test_can_load_grammar() {
5151+ let mut parser = tree_sitter::Parser::new();
5252+ parser
5353+ .set_language(&super::language())
5454+ .expect("Error loading MLF language");
5555+ }
5656+}
···11++++
22+title = "Documentation"
33+description = "Complete guide to MLF"
44+sort_by = "weight"
55+template = "section.html"
66++++
77+88+MLF (Matt's Lexicon Format) is a human-friendly DSL for writing ATProto Lexicons. This documentation will help you learn the language, use the CLI tools, and integrate MLF into your projects.
99+1010+## Quick Links
1111+1212+Start with the **Getting Started** guide to install MLF and write your first lexicon, then explore the other sections to learn more about the language and tools.
+145
website/content/docs/cli.md
···11++++
22+title = "CLI Reference"
33+description = "Command-line tool documentation"
44+weight = 3
55++++
66+77+## Installation
88+99+Clone and build from source:
1010+1111+```bash
1212+git clone https://tangled.org/@stavola.xyz/mlf
1313+cd mlf
1414+cargo build --release
1515+```
1616+1717+The binary will be at `target/release/mlf`.
1818+1919+Optionally, install to your PATH:
2020+2121+```bash
2222+# Option 1: Use cargo install
2323+cargo install --path mlf-cli
2424+2525+# Option 2: Manually copy the binary
2626+cp target/release/mlf /usr/local/bin/
2727+```
2828+2929+## Commands
3030+3131+### `mlf check`
3232+3333+Validate MLF lexicon files for syntax and type errors.
3434+3535+```bash
3636+mlf check [INPUT]...
3737+```
3838+3939+**Arguments:**
4040+- `[INPUT]...` - MLF lexicon file(s) to validate (glob patterns supported)
4141+4242+**Examples:**
4343+4444+```bash
4545+# Check a single file
4646+mlf check thread.mlf
4747+4848+# Check multiple files
4949+mlf check thread.mlf profile.mlf reply.mlf
5050+5151+# Check with glob patterns
5252+mlf check "lexicons/**/*.mlf"
5353+```
5454+5555+**Output:**
5656+- ✓ Success message if valid
5757+- Detailed error messages with source context if invalid
5858+5959+---
6060+6161+### `mlf validate`
6262+6363+Validate a JSON record against an MLF lexicon.
6464+6565+```bash
6666+mlf validate <LEXICON> <RECORD>
6767+```
6868+6969+**Arguments:**
7070+- `<LEXICON>` - MLF lexicon file
7171+- `<RECORD>` - JSON record file to validate against the lexicon
7272+7373+**Example:**
7474+7575+```bash
7676+mlf validate thread.mlf record.json
7777+```
7878+7979+**Output:**
8080+- ✓ Success if record is valid
8181+- Detailed validation errors if invalid
8282+8383+---
8484+8585+### `mlf generate lexicon`
8686+8787+Generate ATProto JSON lexicons from MLF files.
8888+8989+```bash
9090+mlf generate lexicon --output <OUTPUT> [OPTIONS]
9191+```
9292+9393+**Options:**
9494+- `-i, --input <INPUT>` - Input MLF files (glob patterns supported, can be specified multiple times)
9595+- `-o, --output <OUTPUT>` - Output directory (required)
9696+- `--flat` - Use flat file structure (e.g., `com.example.thread.json`)
9797+9898+**Examples:**
9999+100100+```bash
101101+# Generate with folder structure
102102+mlf generate lexicon -i thread.mlf -o lexicons/
103103+# Creates: lexicons/com/example/thread.json
104104+105105+# Generate with flat structure
106106+mlf generate lexicon -i thread.mlf -o lexicons/ --flat
107107+# Creates: lexicons/com.example.thread.json
108108+109109+# Generate from multiple files
110110+mlf generate lexicon -i thread.mlf -i reply.mlf -o lexicons/
111111+112112+# Generate from glob pattern
113113+mlf generate lexicon -i "src/**/*.mlf" -o dist/lexicons/
114114+```
115115+116116+---
117117+118118+## Error Messages
119119+120120+MLF provides rich error messages with:
121121+122122+- Source code context
123123+- Labeled spans showing exact error locations
124124+- Helpful suggestions for fixing errors
125125+- Error codes for categorization
126126+127127+**Example error:**
128128+129129+```
130130+ × Undefined reference to 'ProfileView'
131131+ ╭─[profile.mlf:5:12]
132132+ 5 │ author: ProfileView,
133133+ · ^^^^^^^^^^^ 'ProfileView' is not defined
134134+ ╰────
135135+ help: Make sure this type is defined in the same file or imported via 'use'.
136136+```
137137+138138+## Environment Variables
139139+140140+None currently used.
141141+142142+## Exit Codes
143143+144144+- `0` - Success
145145+- `1` - Error (syntax, validation, or runtime error)
+88
website/content/docs/getting-started.md
···11++++
22+title = "Getting Started"
33+description = "Install MLF and write your first lexicon"
44+weight = 1
55++++
66+77+## Installation
88+99+Clone and build from source:
1010+1111+```bash
1212+git clone https://tangled.org/@stavola.xyz/mlf
1313+cd mlf
1414+cargo build --release
1515+```
1616+1717+The binary will be at `target/release/mlf`.
1818+1919+Optionally, install to your PATH:
2020+2121+```bash
2222+# Option 1: Use cargo install
2323+cargo install --path mlf-cli
2424+2525+# Option 2: Manually copy the binary
2626+cp target/release/mlf /usr/local/bin/
2727+```
2828+2929+## Your First Lexicon
3030+3131+Create a file `thread.mlf`:
3232+3333+```mlf
3434+/// A forum thread
3535+record thread {
3636+ /// Thread title
3737+ title: string constrained {
3838+ maxLength: 200,
3939+ minLength: 1,
4040+ },
4141+ /// Thread body
4242+ body: string constrained {
4343+ maxLength: 10000,
4444+ },
4545+ /// Thread creation timestamp
4646+ createdAt: Datetime,
4747+};
4848+```
4949+5050+## Generate JSON Lexicon
5151+5252+```bash
5353+mlf generate lexicon -i thread.mlf -o lexicons/
5454+```
5555+5656+This creates `lexicons/com/example/thread.json` with the ATProto JSON lexicon.
5757+5858+## Validate MLF Files
5959+6060+```bash
6161+mlf check thread.mlf
6262+```
6363+6464+This checks your MLF file for syntax and validation errors.
6565+6666+## Validate Records
6767+6868+Given a JSON record file `record.json`:
6969+7070+```json
7171+{
7272+ "title": "Welcome to the forums!",
7373+ "body": "This is my first thread. Looking forward to discussions!",
7474+ "createdAt": "2024-01-15T10:30:00Z"
7575+}
7676+```
7777+7878+Validate it against your lexicon:
7979+8080+```bash
8181+mlf validate thread.mlf record.json
8282+```
8383+8484+## Next Steps
8585+8686+- Read the [Syntax Guide](./syntax.md) to learn the MLF language
8787+- Check out the [CLI Reference](./cli.md) for all commands
8888+- Try the [WASM API](./wasm.md) to use MLF in the browser
+550
website/content/docs/syntax.md
···11++++
22+title = "Language Syntax"
33+description = "Complete reference for MLF syntax and features"
44+weight = 2
55++++
66+77+## File Structure
88+99+### File Extension
1010+- `.mlf` - MLF source files
1111+1212+### Shebang (Optional)
1313+```mlf
1414+#!/usr/bin/env mlf
1515+```
1616+1717+### File Naming Convention
1818+Files should follow the lexicon NSID:
1919+- `com.example.forum.thread.mlf` → Lexicon NSID: `com.example.forum.thread`
2020+- `com.example.user.profile.mlf` → Lexicon NSID: `com.example.user.profile`
2121+2222+## Basic Structure
2323+2424+Every MLF file can contain:
2525+2626+- Namespace declarations
2727+- Use statements (imports)
2828+- Type definitions (record, alias, token, query, procedure, subscription)
2929+3030+## Primitive Types
3131+3232+- `null` - Null value
3333+- `boolean` - True/false
3434+- `integer` - 64-bit integer
3535+- `number` - Double-precision float
3636+- `string` - UTF-8 string
3737+- `bytes` - Byte array
3838+- `blob` - Binary large object with metadata
3939+- `unknown` - Any value (for forward compatibility)
4040+4141+## Special String Formats
4242+4343+These are defined in the prelude and available everywhere:
4444+4545+- `Did` - Decentralized Identifier (did:*)
4646+- `AtUri` - AT-URI (at://...)
4747+- `AtIdentifier` - Either a DID or Handle
4848+- `Handle` - Handle identifier (domain name)
4949+- `Datetime` - ISO 8601 datetime
5050+- `Uri` - Generic URI
5151+- `Cid` - Content Identifier
5252+- `Nsid` - Namespaced Identifier
5353+- `Tid` - Timestamp Identifier
5454+- `RecordKey` - Record key
5555+- `Language` - BCP 47 language code
5656+5757+## Records
5858+5959+Records define structured data types stored in repositories:
6060+6161+```mlf
6262+/// A forum thread
6363+record thread {
6464+ /// Thread title
6565+ title: string constrained {
6666+ maxLength: 200,
6767+ minLength: 1,
6868+ },
6969+ /// Thread body
7070+ body?: string, // Optional field
7171+ /// Thread creation timestamp
7272+ createdAt: Datetime,
7373+};
7474+```
7575+7676+## Aliases
7777+7878+Type aliases define reusable object shapes:
7979+8080+```mlf
8181+alias replyRef = {
8282+ root: AtUri,
8383+ parent: AtUri,
8484+};
8585+8686+record thread {
8787+ reply?: replyRef,
8888+};
8989+```
9090+9191+If used in multiple places, they will be hoisted to a def. If only used once, they will be inlined.
9292+9393+## Tokens
9494+9595+Tokens are named constants used in enums and unions:
9696+9797+```mlf
9898+/// Open state
9999+token open;
100100+101101+/// Closed state
102102+token closed;
103103+104104+record issue {
105105+ state: string constrained {
106106+ knownValues: [open, closed],
107107+ default: "open",
108108+ },
109109+};
110110+```
111111+112112+Tokens must have doc comments describing their purpose.
113113+114114+## Constrained Types
115115+116116+Add validation constraints to types:
117117+118118+```mlf
119119+title: string constrained {
120120+ maxLength: 200,
121121+ minLength: 1,
122122+};
123123+124124+age: integer constrained {
125125+ minimum: 0,
126126+ maximum: 150,
127127+};
128128+129129+status: string constrained {
130130+ enum: ["draft", "published", "archived"],
131131+};
132132+```
133133+134134+### String Constraints
135135+136136+- `maxLength` / `minLength` - Length in bytes
137137+- `maxGraphemes` / `minGraphemes` - Length in grapheme clusters
138138+- `format` - Format validation (datetime, uri, did, handle, etc.)
139139+- `enum` - Allowed values (closed set)
140140+- `knownValues` - Known values (extensible set, can reference tokens)
141141+- `default` - Default value
142142+143143+### Integer Constraints
144144+145145+- `minimum` / `maximum` - Min/max values
146146+- `enum` - Allowed values
147147+- `default` - Default value
148148+149149+### Array Constraints
150150+151151+```mlf
152152+tags: string[] constrained {
153153+ minLength: 1,
154154+ maxLength: 10,
155155+}
156156+```
157157+158158+### Blob Constraints
159159+160160+```mlf
161161+avatar: blob constrained {
162162+ accept: ["image/png", "image/jpeg"],
163163+ maxSize: 1000000, // bytes
164164+}
165165+```
166166+167167+### Boolean Constraints
168168+169169+```mlf
170170+field: boolean constrained {
171171+ default: false,
172172+}
173173+```
174174+175175+### Constraint Refinement
176176+177177+Constraints can only make types **more restrictive**, never less restrictive:
178178+179179+```mlf
180180+alias shortString = string constrained {
181181+ maxLength: 100,
182182+};
183183+184184+record post {
185185+ // Valid: 50 is more restrictive than 100
186186+ title: shortString constrained {
187187+ maxLength: 50,
188188+ },
189189+};
190190+```
191191+192192+**Refinement rules:**
193193+- Numeric bounds: `minimum` can only increase, `maximum` can only decrease
194194+- Length bounds: `minLength`/`minGraphemes` can only increase, `maxLength`/`maxGraphemes` can only decrease
195195+- Enums: Can only restrict to a subset
196196+- Format: Cannot change once specified
197197+198198+## Arrays
199199+200200+```mlf
201201+tags: string[]
202202+203203+items: string[] constrained {
204204+ minLength: 1,
205205+ maxLength: 10,
206206+}
207207+```
208208+209209+## Unions
210210+211211+Use the pipe operator `|`:
212212+213213+```mlf
214214+// Closed union (only these types)
215215+content: text | image | video
216216+217217+// Union of tokens
218218+state: open | closed | pending
219219+```
220220+221221+Open unions (allowing unknown types) use `_`:
222222+223223+```mlf
224224+// Open union (can include unknown types)
225225+content: text | image | _
226226+```
227227+228228+## Objects
229229+230230+Inline object types:
231231+232232+```mlf
233233+metadata: {
234234+ version: integer,
235235+ timestamp: Datetime,
236236+}
237237+```
238238+239239+## Queries
240240+241241+Queries are read-only HTTP endpoints (GET):
242242+243243+```mlf
244244+/// Get a user profile
245245+query getProfile(
246246+ /// The actor's DID or handle
247247+ actor: AtIdentifier,
248248+): profile;
249249+```
250250+251251+With errors:
252252+253253+```mlf
254254+query getThread(
255255+ uri: AtUri,
256256+): thread | error {
257257+ /// Thread not found
258258+ NotFound,
259259+ /// Invalid request
260260+ BadRequest,
261261+};
262262+```
263263+264264+## Procedures
265265+266266+Procedures are write operations (POST):
267267+268268+```mlf
269269+/// Create a new thread
270270+procedure createThread(
271271+ title: string,
272272+ body: string,
273273+): {
274274+ uri: AtUri,
275275+ cid: Cid,
276276+} | error {
277277+ /// Title too long
278278+ TitleTooLong,
279279+};
280280+```
281281+282282+## Subscriptions
283283+284284+Subscriptions are WebSocket-based event streams:
285285+286286+```mlf
287287+/// Subscribe to repository events
288288+subscription subscribeRepos(
289289+ /// Optional cursor for resuming
290290+ cursor?: integer,
291291+): commit | identity | handle;
292292+```
293293+294294+Message types must be defined as aliases or records:
295295+296296+```mlf
297297+/// Commit message
298298+alias commit = {
299299+ seq: integer,
300300+ repo: Did,
301301+ commit: Cid,
302302+ time: Datetime,
303303+};
304304+305305+/// Identity message
306306+alias identity = {
307307+ did: Did,
308308+ handle: Handle,
309309+};
310310+```
311311+312312+## Comments
313313+314314+### Documentation Comments
315315+316316+Use `///` for documentation (appears in generated docs/code):
317317+318318+```mlf
319319+/// A forum thread
320320+record thread {
321321+ /// Thread title
322322+ title: string,
323323+};
324324+```
325325+326326+### Regular Comments
327327+328328+Use `//` for comments that won't appear in output:
329329+330330+```mlf
331331+// This is a regular comment
332332+record example {
333333+ field: string, // inline comment
334334+};
335335+```
336336+337337+## Annotations
338338+339339+Annotations use `@` and provide metadata for external tooling:
340340+341341+### Simple Annotation
342342+```mlf
343343+@deprecated
344344+record oldRecord {
345345+ field: string,
346346+}
347347+```
348348+349349+### Positional Arguments
350350+```mlf
351351+@since(1, 2, 0)
352352+@doc("https://example.com/docs")
353353+record example {
354354+ field: string,
355355+}
356356+```
357357+358358+### Named Arguments
359359+```mlf
360360+@validate(min: 0, max: 100, strict: true)
361361+@table(name: "threads", indexes: "did,createdAt")
362362+record thread {
363363+ @indexed
364364+ did: Did,
365365+366366+ @sensitive(pii: true)
367367+ title: string,
368368+}
369369+```
370370+371371+Annotations can be placed on records, aliases, tokens, queries, procedures, subscriptions, and fields.
372372+373373+## Imports
374374+375375+Import definitions from other lexicons:
376376+377377+```mlf
378378+// Single import
379379+use com.example.user.profile;
380380+381381+// Multiple imports
382382+use com.example.forum.{thread, reply};
383383+384384+// Alias import
385385+use com.example.user as User;
386386+387387+// Wildcard import
388388+use com.example.forum.*;
389389+390390+// Import with alias
391391+use com.example.forum.{thread as ForumThread};
392392+```
393393+394394+After importing, use the short name:
395395+396396+```mlf
397397+use com.example.user.profile;
398398+399399+record thread {
400400+ author: profile, // Instead of com.example.user.profile
401401+}
402402+```
403403+404404+## Namespaces
405405+406406+Organize related definitions:
407407+408408+```mlf
409409+namespace com.example.forum.thread;
410410+411411+record thread {
412412+ title: string,
413413+};
414414+```
415415+416416+Or use nested namespaces:
417417+418418+```mlf
419419+namespace .forum {
420420+ record thread {
421421+ title: string,
422422+ }
423423+424424+ query getThread(
425425+ uri: AtUri,
426426+ ): thread;
427427+}
428428+429429+namespace .user {
430430+ record profile {
431431+ displayName: string,
432432+ }
433433+}
434434+```
435435+436436+## References
437437+438438+Reference local or external definitions:
439439+440440+```mlf
441441+// Local reference (same file)
442442+record thread {
443443+ author: author, // References 'alias author' in same file
444444+}
445445+446446+// Cross-file reference
447447+record thread {
448448+ profile: com.example.user.profile, // References com/example/user/profile.mlf
449449+}
450450+```
451451+452452+**Note:** All references use dotted notation. The `#` character is NOT used for references.
453453+454454+## Optional Fields
455455+456456+Use `?` to mark fields as optional:
457457+458458+```mlf
459459+record thread {
460460+ title: string, // Required
461461+ body?: string, // Optional
462462+ tags?: string[], // Optional array
463463+}
464464+```
465465+466466+## Raw Identifiers
467467+468468+Use backticks to escape reserved keywords when you need to use them as identifiers:
469469+470470+```mlf
471471+alias `record` = {
472472+ `record`: com.atproto.repo.strongRef,
473473+ `error`: string,
474474+};
475475+```
476476+477477+This is useful when working with existing schemas that use MLF keywords as field or type names.
478478+479479+## Format Strings
480480+481481+Available format strings for constrained strings:
482482+483483+- `datetime` - ISO 8601 datetime
484484+- `uri` - URI (RFC 3986)
485485+- `at-uri` - AT-URI (ATProto)
486486+- `did` - Decentralized Identifier
487487+- `handle` - ATProto handle
488488+- `nsid` - Namespaced Identifier
489489+- `cid` - Content Identifier
490490+- `at-identifier` - DID or handle
491491+- `language` - BCP 47 language tag
492492+- `tid` - Timestamp ID
493493+- `record-key` - Record key
494494+495495+## Complete Example
496496+497497+```mlf
498498+#!/usr/bin/env mlf
499499+500500+use com.example.user.profile;
501501+502502+/// Open state
503503+token open;
504504+505505+/// Closed state
506506+token closed;
507507+508508+/// A forum thread
509509+record thread {
510510+ /// Thread title
511511+ title: string constrained {
512512+ minGraphemes: 1,
513513+ maxGraphemes: 200,
514514+ },
515515+ /// Thread body (markdown)
516516+ body?: string constrained {
517517+ maxGraphemes: 10000,
518518+ },
519519+ /// Thread state
520520+ state: string constrained {
521521+ knownValues: [open, closed],
522522+ default: "open",
523523+ },
524524+ /// Author profile
525525+ author: profile,
526526+ /// Creation timestamp
527527+ createdAt: Datetime,
528528+};
529529+530530+/// Get a thread by URI
531531+query getThread(
532532+ /// Thread AT-URI
533533+ uri: AtUri,
534534+): thread | error {
535535+ /// Thread not found
536536+ NotFound,
537537+};
538538+539539+/// Create a new thread
540540+procedure createThread(
541541+ title: string,
542542+ body?: string,
543543+): {
544544+ uri: AtUri,
545545+ cid: Cid,
546546+} | error {
547547+ /// Title too long
548548+ TitleTooLong,
549549+};
550550+```
+206
website/content/docs/wasm.md
···11++++
22+title = "WASM API"
33+description = "Using MLF in the browser with WebAssembly"
44+weight = 4
55++++
66+77+## Installation
88+99+Build from source:
1010+1111+```bash
1212+# Clone the repository
1313+git clone https://tangled.org/@stavola.xyz/mlf
1414+cd mlf
1515+1616+# Install wasm-pack if you don't have it
1717+cargo install wasm-pack
1818+1919+# Build the WASM module
2020+wasm-pack build mlf-wasm --target web
2121+```
2222+2323+This generates:
2424+- `pkg/mlf_wasm.js` - JavaScript bindings
2525+- `pkg/mlf_wasm_bg.wasm` - WebAssembly binary
2626+- `pkg/mlf_wasm.d.ts` - TypeScript definitions
2727+2828+## Usage
2929+3030+### Loading the Module
3131+3232+```javascript
3333+import init, * as mlf from './pkg/mlf_wasm.js';
3434+3535+// Initialize the WASM module
3636+await init();
3737+3838+// Now you can use MLF functions
3939+```
4040+4141+### API Functions
4242+4343+#### `parse(source: string) -> ParseResult`
4444+4545+Parse MLF source and check for syntax errors.
4646+4747+```javascript
4848+const result = mlf.parse(`
4949+ record post {
5050+ text: string,
5151+ };
5252+`);
5353+5454+if (result.success) {
5555+ console.log('Valid MLF!');
5656+} else {
5757+ console.error('Parse error:', result.error);
5858+}
5959+```
6060+6161+**Returns:**
6262+```typescript
6363+{
6464+ success: boolean,
6565+ error?: string
6666+}
6767+```
6868+6969+---
7070+7171+#### `check(source: string) -> CheckResult`
7272+7373+Perform full validation (parse + type checking).
7474+7575+```javascript
7676+const result = mlf.check(mlfSource);
7777+7878+if (result.success) {
7979+ console.log('Lexicon is valid!');
8080+} else {
8181+ console.error('Errors:', result.errors);
8282+}
8383+```
8484+8585+**Returns:**
8686+```typescript
8787+{
8888+ success: boolean,
8989+ errors: string[]
9090+}
9191+```
9292+9393+---
9494+9595+#### `generate_lexicon(source: string, namespace: string) -> GenerateResult`
9696+9797+Generate a JSON lexicon from MLF source.
9898+9999+```javascript
100100+const result = mlf.generate_lexicon(mlfSource, 'com.example.thread');
101101+102102+if (result.success) {
103103+ const lexicon = JSON.parse(result.lexicon);
104104+ console.log(lexicon);
105105+} else {
106106+ console.error('Generation error:', result.error);
107107+}
108108+```
109109+110110+**Returns:**
111111+```typescript
112112+{
113113+ success: boolean,
114114+ lexicon?: string, // JSON string
115115+ error?: string
116116+}
117117+```
118118+119119+---
120120+121121+#### `validate_record(lexicon: string, record: string) -> ValidateResult`
122122+123123+Validate a JSON record against an MLF lexicon.
124124+125125+```javascript
126126+const record = JSON.stringify({
127127+ text: "Hello, world!",
128128+ createdAt: "2024-01-15T10:30:00Z"
129129+});
130130+131131+const result = mlf.validate_record(mlfSource, record);
132132+133133+if (result.success) {
134134+ console.log('Record is valid!');
135135+} else {
136136+ console.error('Validation errors:', result.errors);
137137+}
138138+```
139139+140140+**Returns:**
141141+```typescript
142142+{
143143+ success: boolean,
144144+ errors: string[]
145145+}
146146+```
147147+148148+---
149149+150150+## Example: Live Editor
151151+152152+```html
153153+<!DOCTYPE html>
154154+<html>
155155+<head>
156156+ <title>MLF Editor</title>
157157+</head>
158158+<body>
159159+ <textarea id="editor"></textarea>
160160+ <pre id="output"></pre>
161161+162162+ <script type="module">
163163+ import init, * as mlf from './pkg/mlf_wasm.js';
164164+165165+ await init();
166166+167167+ const editor = document.getElementById('editor');
168168+ const output = document.getElementById('output');
169169+170170+ editor.addEventListener('input', () => {
171171+ const source = editor.value;
172172+ const result = mlf.generate_lexicon(source, 'app.example');
173173+174174+ if (result.success) {
175175+ output.textContent = JSON.stringify(
176176+ JSON.parse(result.lexicon),
177177+ null,
178178+ 2
179179+ );
180180+ } else {
181181+ output.textContent = `Error: ${result.error}`;
182182+ }
183183+ });
184184+ </script>
185185+</body>
186186+</html>
187187+```
188188+189189+## Browser Compatibility
190190+191191+Requires:
192192+- WebAssembly support
193193+- ES6 modules
194194+- Async/await
195195+196196+Supported browsers:
197197+- Chrome/Edge 61+
198198+- Firefox 60+
199199+- Safari 11+
200200+201201+## Bundle Size
202202+203203+Typical sizes (with wasm-opt):
204204+- WASM: ~80KB
205205+- JS: ~5KB
206206+- Gzipped: ~30KB total
+9
website/content/playground.md
···11++++
22+title = "Playground"
33+description = "Try MLF in your browser"
44+template = "playground.html"
55++++
66+77+# MLF Playground
88+99+Try MLF directly in your browser. The editor uses WebAssembly to provide instant feedback.
+22
website/justfile
···11+# Build development version (faster, no optimizations)
22+build-dev:
33+ #!/usr/bin/env bash
44+ cd ..
55+ wasm-pack build mlf-wasm --target web --out-dir ../website/static/js/pkg
66+ cd website
77+ zola build
88+99+# Build release version (optimized)
1010+build-release:
1111+ #!/usr/bin/env bash
1212+ cd ..
1313+ wasm-pack build mlf-wasm --target web --out-dir ../website/static/js/pkg --release
1414+ cd website
1515+ if command -v wasm-opt >/dev/null 2>&1; then
1616+ wasm-opt -Oz static/js/pkg/mlf_wasm_bg.wasm -o static/js/pkg/mlf_wasm_bg.wasm
1717+ fi
1818+ zola build
1919+2020+# Start development server with live reload
2121+serve:
2222+ zola serve