the next generation of the in-browser educational proof assistant
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge pull request #8 from joshcbrown/push-urxktozzxxlq

Implement parsing library

authored by

Liam O'Connor and committed by
GitHub
26358dcb 5b716ec3

+342 -174
+237
src/Parser.res
··· 1 + type sourceloc = { 2 + line: int, 3 + col: int, 4 + idx: int, 5 + } 6 + 7 + type info = { 8 + msg?: string, 9 + expects: array<string>, 10 + } 11 + 12 + type err = { 13 + info: info, 14 + pos: sourceloc, 15 + } 16 + 17 + type state = {pos: sourceloc} 18 + 19 + type t<'a> = (string, state) => result<('a, state), err> 20 + 21 + let infoPretty = (info: info) => { 22 + let expectedMsg = switch info.expects { 23 + | [] => None 24 + | [expect] => Some(`expected ${expect}`) 25 + | _ => { 26 + let n = Array.length(info.expects) 27 + let first = info.expects->Array.slice(~start=0, ~end=n - 1)->Array.join(", ") 28 + Some(`expected ${first}, or ${info.expects[n - 1]->Option.getUnsafe}`) 29 + } 30 + } 31 + switch (info.msg, expectedMsg) { 32 + | (Some(infoMsg), Some(expectedMsg)) => `parse failed: ${infoMsg}\n ${expectedMsg}` 33 + | (None, Some(msg)) | (Some(msg), None) => `parse failed: ${msg}` 34 + | (None, None) => `parse failed` 35 + } 36 + } 37 + let initialState = {pos: {line: 1, col: 1, idx: 0}} 38 + let runParser = (p: t<'a>, str: string): result<('a, string), string> => { 39 + p(str, initialState) 40 + ->Result.map(((res, state)) => (res, str->String.sliceToEnd(~start=state.pos.idx))) 41 + ->Result.mapError(err => { 42 + `around ${str->String.slice(~start=err.pos.idx, ~end=err.pos.idx + 5)}:\n${infoPretty( 43 + err.info, 44 + )}` 45 + }) 46 + } 47 + 48 + let map = (p: t<'a>, f: 'a => 'b): t<'b> => 49 + (str, state) => p(str, state)->Result.map(((res, state)) => (f(res), state)) 50 + let mapError = (p: t<'a>, f: err => err) => (str, state) => p(str, state)->Result.mapError(f) 51 + let label = (p: t<'a>, label: string) => 52 + p->mapError(err => {...err, info: {...err.info, expects: [label]}}) 53 + let pure = (a): t<'a> => (_, state) => Ok((a, state)) 54 + let bind = (p1: t<'a>, p2: 'a => t<'b>): t<'b> => 55 + (str, state) => p1(str, state)->Result.flatMap(((res, state)) => p2(res)(str, state)) 56 + let apply = (p: t<'a>, pf: t<'a => 'b>): t<'b> => p->bind(a => pf->map(f => f(a))) 57 + let then = (p1: t<'a>, p2: t<'b>): t<'b> => p1->bind(_ => p2) 58 + let thenIgnore = (p1: t<'a>, p2: t<'b>): t<'a> => p1->bind(res => p2->map(_ => res)) 59 + 60 + let fail = (info): t<'a> => (_, state) => Error({info: {msg: info, expects: []}, pos: state.pos}) 61 + let expected = (expects: array<string>, ~msg=?): t<'a> => 62 + (_, state) => Error({info: {?msg, expects}, pos: state.pos}) 63 + let void = (p: t<'a>): t<unit> => p->map(_ => ()) 64 + // backtracks by default 65 + let or = (p1: t<'a>, p2: t<'a>): t<'a> => 66 + (str, state) => 67 + switch p1(str, state) { 68 + | Ok(r) => Ok(r) 69 + | Error(e1) => 70 + p2(str, state)->Result.mapError(e2 => { 71 + ...e2, 72 + info: {expects: Array.concat(e1.info.expects, e2.info.expects)}, 73 + }) 74 + } 75 + let optional = (p: t<'a>): t<option<'a>> => p->map(a => Some(a))->or(pure(None)) 76 + let choice = (ps: array<t<'a>>): t<'a> => { 77 + ps->Array.reduce(fail("no matches"), or) 78 + } 79 + 80 + let getState: t<state> = (_, state) => Ok((state, state)) 81 + let setState = (newState: state): t<unit> => (_, _) => Ok(((), newState)) 82 + let modifyState = (f: state => state) => getState->bind(state => setState(f(state))) 83 + let readStr: t<string> = (str, state) => Ok((str, state)) 84 + let getCurrentStr: t<string> = (str, state) => Ok(( 85 + str->String.sliceToEnd(~start=state.pos.idx), 86 + state, 87 + )) 88 + 89 + let eof: t<bool> = getState->bind(state => readStr->map(str => state.pos.idx >= str->String.length)) 90 + 91 + // fixpoints using references as a layer of indirection are necessary because 92 + // the compiler complains very ambiguously about not knowing the size of 93 + // (directly) recursive combinators 94 + let fix = (f: t<'a> => t<'a>): t<'a> => { 95 + let pRef = ref(fail("umm")) 96 + pRef := f((str, state) => pRef.contents(str, state)) 97 + pRef.contents 98 + } 99 + 100 + let many = p => 101 + fix(f => 102 + optional(p)->bind(o => 103 + switch o { 104 + | Some(res) => f->map(l => l->List.add(res)) 105 + | None => pure(list{}) 106 + } 107 + ) 108 + )->map(List.toArray) 109 + let between = (inner: t<'a>, o1: t<string>, o2: t<string>): t<'a> => o1->then(inner)->thenIgnore(o2) 110 + let consume = (l: int): t<string> => 111 + getCurrentStr->bind(str => { 112 + if str->String.length < l { 113 + fail("tried to consume too much") 114 + } else { 115 + let consumed = str->String.slice(~start=0, ~end=l) 116 + let vOffset = consumed->String.split("\n")->Array.length - 1 117 + let hOffset = consumed->String.length - consumed->String.lastIndexOfOpt("\n")->Option.getOr(0) 118 + modifyState(state => { 119 + pos: { 120 + col: vOffset > 0 ? hOffset : state.pos.col + hOffset, 121 + line: state.pos.line + vOffset, 122 + idx: state.pos.idx + l, 123 + }, 124 + })->then(pure(consumed)) 125 + } 126 + }) 127 + 128 + let execRe = (re, str) => { 129 + re 130 + ->RegExp.exec(str) 131 + ->Option.map(result => { 132 + open RegExp.Result 133 + (matches(result), fullMatch(result)->String.length) 134 + }) 135 + } 136 + 137 + // this will be released in Core in 12.0.0.4, remove then 138 + @get external regexpFlags: RegExp.t => string = "flags" 139 + 140 + let string = s => 141 + getCurrentStr->bind(str => 142 + if str->String.startsWith(s) { 143 + consume(String.length(s))->then(pure(s)) 144 + } else { 145 + expected([`literal ${s}`]) 146 + } 147 + ) 148 + 149 + let regex = (re: RegExp.t): t<array<string>> => { 150 + let wrapped = { 151 + let source = re->RegExp.source 152 + if source->String.startsWith("^") { 153 + re 154 + } else { 155 + RegExp.fromStringWithFlags(`^${source}`, ~flags=re->regexpFlags) 156 + } 157 + } 158 + getCurrentStr->bind(str => 159 + switch execRe(wrapped, str) { 160 + | Some((matches, l)) => consume(l)->then(pure(matches)) 161 + | None => expected([`regex pattern ${re->RegExp.source}`]) 162 + } 163 + ) 164 + } 165 + 166 + let regex1 = (re: RegExp.t): t<string> => 167 + regex(re)->bind(matches => 168 + switch matches { 169 + | [x] => pure(x) 170 + | _ => fail("more than one match") 171 + } 172 + ) 173 + 174 + let peek = (n): t<string> => 175 + (str, state) => { 176 + let res = str->String.slice(~start=state.pos.idx, ~end=state.pos.idx + n) 177 + Ok((res, state)) 178 + } 179 + 180 + type length = int 181 + let takeWhileMany = (f: string => option<length>) => 182 + fix(p => 183 + getCurrentStr->bind(str => 184 + switch f(str) { 185 + | Some(length) => consume(length)->bind(s => p->map(s' => String.concat(s, s'))) 186 + | None => pure("") 187 + } 188 + ) 189 + ) 190 + 191 + let takeWhile = (f: string => bool) => 192 + takeWhileMany(s => 193 + if f(s) { 194 + Some(1) 195 + } else { 196 + None 197 + } 198 + ) 199 + 200 + let dbg = (p: t<'a>, label): t<'a> => { 201 + let dbgInfo = title => 202 + getCurrentStr->bind(str => 203 + getState->map(state => { 204 + let currSurr = `${str->String.slice(~start=0, ~end=10)}...` 205 + let stateStr = `${state.pos.line->Int.toString}:${state.pos.col->Int.toString}` 206 + Console.log(`${title} ${label} at:\n${currSurr}\nstate: ${stateStr}`) 207 + }) 208 + ) 209 + dbgInfo("enter") 210 + ->then(p) 211 + ->bind(res => { 212 + Console.log(("successfully parsed", res)) 213 + dbgInfo("exit")->map(_ => res) 214 + }) 215 + } 216 + 217 + let lexeme = p => p->thenIgnore(regex(/^\s*/)->void) 218 + let token = s => string(s)->lexeme 219 + 220 + let decimal = regex1(/(\d+)/)->map(xStr => xStr->Int.fromString->Option.getExn) 221 + let whitespace = regex(/\s*/)->void 222 + 223 + let lift = (f: string => result<('a, string), string>): t<'a> => 224 + getCurrentStr->bind(str => 225 + switch f(str) { 226 + | Ok((res, remaining)) => { 227 + let length = String.length(str) - String.length(remaining) 228 + consume(length)->map(_ => res) 229 + } 230 + | Error(msg) => fail(msg) 231 + } 232 + ) 233 + let liftParse = ( 234 + f: (string, ~scope: array<'m>, ~gen: 'g=?) => result<('a, string), string>, 235 + ~scope: array<'m>, 236 + ~gen: option<'g>=?, 237 + ): t<'a> => lift(s => f(s, ~scope, ~gen?))
+1 -1
src/Rule.res
··· 2 2 // the tree sitter plugin hates backslashes in string literals unless they're on the top 3 3 // level.. 4 4 let ruleNamePattern = "^[^|()\\s\\-—][^()\\s]*" 5 - let vinculumRES = "^\s*\\n\\s*[-—][-—][\\-—]+[ \t]*([^()|\\s\\-—][^()\\s]*)?" 5 + let vinculumRES = "^\\n?\\s*[-—][-—][\\-—]+[ \t]*([^()|\\s\\-—][^()\\s]*)?" 6 6 module Make = (Term: TERM, Judgment: JUDGMENT with module Term := Term) => { 7 7 type rec t = {vars: array<Term.meta>, premises: array<t>, conclusion: Judgment.t} 8 8 let rec substitute = (rule: t, subst: Term.subst) => {
+40 -145
src/SExp.res
··· 250 250 251 251 let prettyPrintSubst = (sub, ~scope) => 252 252 Util.prettyPrintMap(sub, ~showV=t => prettyPrint(t, ~scope)) 253 - let symbolRegexpString = `^([^\\s()\\[\\]]+)` 254 - let varRegexpString = "^\\\\([0-9]+)$" 255 - let schematicRegexpString = "^\\?([0-9]+)$" 256 - type lexeme = LParen | RParen | VarT(int) | AtomT(Atom.t) | SchematicT(int) 257 253 let nameRES = "^([^\\s.\\[\\]()]+)\\." 258 254 let prettyPrintMeta = (str: string) => { 259 255 String.concat(str, ".") ··· 269 265 } 270 266 } 271 267 } 272 - let parse = (str: string, ~scope: array<string>, ~gen=?) => { 273 - let cur = ref(String.make(str)) 274 - let lex: unit => option<lexeme> = () => { 275 - let str = String.trim(cur.contents) 276 - cur := str 277 - let checkVariable = (candidate: string) => { 278 - let varRegexp = RegExp.fromString(varRegexpString) 279 - switch Array.indexOf(scope, candidate) { 280 - | -1 => 281 - switch varRegexp->RegExp.exec(candidate) { 282 - | Some(res') => 283 - switch RegExp.Result.matches(res') { 284 - | [idx] => Some(idx->Int.fromString->Option.getUnsafe) 285 - | _ => None 286 - } 287 - | None => None 268 + let mkParser = (~scope: array<string>, ~gen=?): Parser.t<t> => { 269 + open Parser 270 + let varLit = string("\\")->then(decimal) 271 + let ident = regex1(/([^\s\(\)]+)/) 272 + let varIdx = 273 + varLit 274 + ->or( 275 + ident->bind(id => 276 + switch scope->Array.indexOfOpt(id) { 277 + | Some(idx) => pure(idx) 278 + | None => fail("expected variable") 288 279 } 289 - | idx => Some(idx) 290 - } 291 - } 292 - if String.get(str, 0) == Some("(") { 293 - cur := String.sliceToEnd(str, ~start=1) 294 - Some(LParen) 295 - } else if String.get(str, 0) == Some(")") { 296 - cur := String.sliceToEnd(str, ~start=1) 297 - Some(RParen) 298 - } else { 299 - let symbolRegexp = RegExp.fromStringWithFlags(symbolRegexpString, ~flags="y") 300 - switch symbolRegexp->RegExp.exec(str) { 301 - | None => None 302 - | Some(res) => 303 - switch RegExp.Result.matches(res) { 304 - | [symb] => { 305 - let specialSymb = tok => { 306 - cur := String.sliceToEnd(str, ~start=RegExp.lastIndex(symbolRegexp)) 307 - Some(tok) 308 - } 309 - let regularSymb = () => { 310 - // FIX: not ideal to throw away symbol error message 311 - Console.log(("current", cur.contents)) 312 - Atom.parse(cur.contents, ~scope) 313 - ->Util.Result.ok 314 - ->Option.map(((s, rest)) => { 315 - cur := rest 316 - Console.log(("parsed", s, cur.contents)) 317 - AtomT(s) 318 - }) 319 - } 320 - switch checkVariable(symb) { 321 - | Some(idx) => specialSymb(VarT(idx)) 322 - | None => { 323 - let schematicRegexp = RegExp.fromString(schematicRegexpString) 324 - switch schematicRegexp->RegExp.exec(symb) { 325 - | None => regularSymb() 326 - | Some(res') => 327 - switch RegExp.Result.matches(res') { 328 - | [s] => specialSymb(SchematicT(s->Int.fromString->Option.getUnsafe)) 329 - | _ => regularSymb() 330 - } 331 - } 332 - } 333 - } 334 - } 335 - | _ => None 336 - } 337 - } 338 - } 339 - } 280 + ), 281 + ) 282 + ->lexeme 283 + let var = varIdx->map(idx => Var({idx: idx})) 340 284 341 - let peek = () => { 342 - // a bit slow, better would be to keep a backlog of lexed tokens.. 343 - let str = String.make(cur.contents) 344 - let tok = lex() 345 - cur := str 346 - tok 347 - } 348 - exception ParseError(string) 349 - let rec parseExp = () => { 350 - let tok = peek() 351 - switch tok { 352 - | Some(AtomT(s)) => { 353 - let _ = lex() 354 - Some(Atom(s)) 355 - } 356 - | Some(VarT(idx)) => { 357 - let _ = lex() 358 - Some(Var({idx: idx})) 359 - } 360 - | Some(SchematicT(num)) => { 361 - let _ = lex() 362 - switch lex() { 363 - | Some(LParen) => { 364 - let it = ref(None) 365 - let bits = [] 366 - let getVar = (t: option<lexeme>) => 367 - switch t { 368 - | Some(VarT(idx)) => Some(idx) 369 - | _ => None 370 - } 371 - while { 372 - it := lex() 373 - it.contents->getVar->Option.isSome 374 - } { 375 - Array.push(bits, it.contents->getVar->Option.getUnsafe) 376 - } 377 - switch it.contents { 378 - | Some(RParen) => 379 - switch gen { 380 - | Some(g) => { 381 - seen(g, num) 382 - Some(Schematic({schematic: num, allowed: bits})) 383 - } 384 - | None => throw(ParseError("Schematics not allowed here")) 385 - } 386 - | _ => throw(ParseError("Expected closing parenthesis")) 387 - } 388 - } 389 - | _ => throw(ParseError("Expected opening parenthesis")) 390 - } 391 - } 392 - | Some(LParen) => { 393 - let _ = lex() 394 - let bits = [] 395 - let it = ref(None) 396 - while { 397 - it := parseExp() 398 - it.contents->Option.isSome 399 - } { 400 - Array.push(bits, it.contents->Option.getUnsafe) 401 - } 402 - switch lex() { 403 - | Some(RParen) => Some(Compound({subexps: bits})) 404 - | _ => throw(ParseError("Expected closing parenthesis")) 405 - } 406 - } 407 - | _ => None 408 - } 409 - } 410 - switch parseExp() { 411 - | exception ParseError(s) => Error(s) 412 - | None => Error("No expression to parse") 413 - | Some(e) => Ok((e, cur.contents)) 414 - } 285 + let schemaLit = 286 + string("?") 287 + ->then(decimal) 288 + ->bind(schematic => 289 + many(varIdx) 290 + ->between(token("("), token(")")) 291 + ->map(allowed => { 292 + gen->Option.map(g => allowed->Array.forEach(n => seen(g, n)))->ignore 293 + Schematic({schematic, allowed}) 294 + }) 295 + ) 296 + let inner = fix(f => 297 + choice([ 298 + schemaLit->label("schematic"), 299 + var->label("variable"), 300 + liftParse(Atom.parse, ~scope, ~gen?)->map(a => Atom(a))->label("atom"), 301 + many(f) 302 + ->between(token("(")->label("open expression"), token(")")->label("close expression")) 303 + ->map(subexps => Compound({subexps: subexps})), 304 + ])->lexeme 305 + ) 306 + whitespace->then(inner) 415 307 } 308 + 309 + let parse = (str: string, ~scope: array<string>, ~gen=?) => 310 + Parser.runParser(mkParser(~scope, ~gen?), str) 416 311 417 312 let rec concrete = t => 418 313 switch t {
+5 -5
src/StringA.res
··· 479 479 } 480 480 481 481 let parenthesise = f => 482 - [ 483 - <span className="symbol" key={"-1"}> {React.string("(")} </span>, 484 - ...f, 485 - <span className="symbol" key={"-2"}> {React.string(")")} </span>, 486 - ] 482 + Array.flat([ 483 + [<span className="symbol" key={"-1"}> {React.string("(")} </span>], 484 + f, 485 + [<span className="symbol" key={"-2"}> {React.string(")")} </span>], 486 + ]) 487 487 488 488 let intersperse = a => Util.intersperse(a, ~with=React.string(" ")) 489 489
+23
tests/ParserTests.res
··· 1 + open Zora 2 + module P = Parser 3 + 4 + let testParse = (t: Zora.t, p, str, ~expect=?) => { 5 + switch P.runParser(p, str) { 6 + | Ok((res, "")) => expect->Option.map(expect => t->equal(res, expect))->ignore 7 + | Ok((_, rem)) => t->fail(~msg=`failed to consume remaining input: ${rem}`) 8 + | Error(e) => t->fail(~msg=`parse failed`) 9 + } 10 + } 11 + 12 + zoraBlock("parse", t => { 13 + open Parser 14 + t->testParse(P.string("a"), "a", ~expect="a") 15 + t->testParse(P.token("a"), "a \n\r", ~expect="a") 16 + t->testParse(P.string("a")->P.or(P.string("b")), "b", ~expect="b") 17 + let abs = 18 + P.takeWhile(s => s->String.startsWith("a"))->P.bind(res => P.string("b")->P.map(b => (res, b))) 19 + t->testParse(abs, "aaaab", ~expect=("aaaa", "b")) 20 + let brackets = many(lexeme(decimal))->between(token("("), token(")")) 21 + t->testParse(brackets, "(1)", ~expect=[1]) 22 + t->testParse(brackets, "(1 2 3)", ~expect=[1, 2, 3]) 23 + })
+36 -23
tests/RuleTest.res
··· 31 31 } 32 32 } 33 33 34 - // zoraBlock("string terms", t => { 35 - // module T = MakeTest(StringSExp, StringSExpJ) 36 - // t->T.testParseInner( 37 - // `[s1. ("$s1" p) |- ("($s1)" p)]`, 38 - // { 39 - // vars: ["s1"], 40 - // premises: [ 41 - // { 42 - // vars: [], 43 - // premises: [], 44 - // conclusion: StringSExp.Compound( 45 - // [StringA.Atom.Var({idx: 0})], 46 - // SExp.pAtom("p")->StringA.AtomJudgment.ConstS->StringSymbolic.Atom, 47 - // ), 48 - // }, 49 - // ], 50 - // conclusion: ( 51 - // [StringA.Atom.String("("), StringA.Atom.Var({idx: 0}), StringA.Atom.String(")")], 52 - // SExp.pAtom("p")->StringA.AtomJudgment.ConstS->StringSymbolic.Atom, 53 - // ), 54 - // }, 55 - // ) 56 - // }) 34 + zoraBlock("string terms", t => { 35 + module StringSymbol = AtomDef.MakeAtomAndView( 36 + Coercible.StringA, 37 + StringA.AtomView, 38 + Coercible.Symbolic, 39 + Symbolic.AtomView, 40 + ) 41 + module StringSExp = SExp.Make(StringSymbol.Atom) 42 + module T = MakeTest(StringSExp, StringSExp) 43 + t->T.testParseInner( 44 + `[s1. ("$s1" p) |- ("($s1)" p)]`, 45 + { 46 + vars: ["s1"], 47 + premises: [ 48 + { 49 + vars: [], 50 + premises: [], 51 + conclusion: StringSExp.Compound({ 52 + subexps: [ 53 + [StringA.Var({idx: 0})]->StringSymbol.Atom.Left->StringSExp.Atom, 54 + "p"->StringSymbol.Atom.Right->StringSExp.Atom, 55 + ], 56 + }), 57 + }, 58 + ], 59 + conclusion: StringSExp.Compound({ 60 + subexps: [ 61 + [StringA.String("("), StringA.Var({idx: 0}), StringA.String(")")] 62 + ->StringSymbol.Atom.Left 63 + ->StringSExp.Atom, 64 + "p"->StringSymbol.Atom.Right->StringSExp.Atom, 65 + ], 66 + }), 67 + }, 68 + ) 69 + })