the next generation of the in-browser educational proof assistant
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

lay foundations for parser

+169
+150
src/Parser.res
··· 1 + type sourceloc = { 2 + line: int, 3 + col: int, 4 + idx: int, 5 + } 6 + 7 + type parserError<'info> = { 8 + message: 'info, 9 + pos: sourceloc, 10 + } 11 + 12 + type parserState = {pos: sourceloc} 13 + 14 + type t<'a> = (string, parserState) => result<('a, parserState), parserError<string>> 15 + 16 + let initialState = {pos: {line: 1, col: 1, idx: 0}} 17 + let runParser = (p: t<'a>, str: string): result<('a, string), parserError<string>> => { 18 + p(str, initialState)->Result.map(((res, state)) => ( 19 + res, 20 + str->String.sliceToEnd(~start=state.pos.idx), 21 + )) 22 + } 23 + 24 + let map = (p: t<'a>, f: 'a => 'b): t<'b> => (str, state) => 25 + p(str, state)->Result.map(((res, state)) => (f(res), state)) 26 + let pure = (a): t<'a> => (_, state) => Ok((a, state)) 27 + let bind = (p1: t<'a>, p2: 'a => t<'b>): t<'b> => (str, state) => 28 + p1(str, state)->Result.flatMap(((res, state)) => p2(res)(str, state)) 29 + let apply = (p: t<'a>, pf: t<'a => 'b>): t<'b> => p->bind(a => pf->map(f => f(a))) 30 + 31 + let then = (p1: t<'a>, p2: t<'b>): t<'b> => p1->bind(_ => p2) 32 + let thenIgnore = (p1: t<'a>, p2: t<'b>): t<'a> => p1->bind(res => p2->map(_ => res)) 33 + let or = (p1: t<'a>, p2: t<'a>): t<'a> => (str, state) => 34 + switch p1(str, state) { 35 + | Ok(r) => Ok(r) 36 + | Error(_) => p2(str, state) 37 + } 38 + let fail = (info): t<'a> => (_, state) => Error({message: info, pos: state.pos}) 39 + let void = (p: t<'a>): t<unit> => p->map(_ => ()) 40 + 41 + let getState: t<parserState> = (_, state) => Ok((state, state)) 42 + let setState = (newState: parserState): t<unit> => (_, _) => Ok(((), newState)) 43 + let modifyState = (f: parserState => parserState) => getState->bind(state => setState(f(state))) 44 + let readStr: t<string> = (str, state) => Ok((str, state)) 45 + let getCurrentStr: t<string> = (str, state) => Ok(( 46 + str->String.sliceToEnd(~start=state.pos.idx), 47 + state, 48 + )) 49 + 50 + let eof: t<bool> = getState->bind(state => readStr->map(str => state.pos.idx >= str->String.length)) 51 + 52 + // fixpoints are necessary because rescript compiler complains very ambiguously 53 + // about not knowing the size of recursive combinators 54 + let fix = (f: t<'a> => t<'a>): t<'a> => { 55 + let pRef = ref(fail("umm")) 56 + pRef := f((str, state) => pRef.contents(str, state)) 57 + pRef.contents 58 + } 59 + 60 + let consume = (l: int): t<string> => 61 + getCurrentStr->bind(str => { 62 + if str->String.length < l { 63 + fail("tried to consume too much") 64 + } else { 65 + let consumed = str->String.slice(~start=0, ~end=l) 66 + let vOffset = consumed->String.split("\n")->Array.length - 1 67 + let hOffset = consumed->String.length - consumed->String.lastIndexOfOpt("\n")->Option.getOr(0) 68 + modifyState(state => { 69 + pos: { 70 + col: vOffset > 0 ? hOffset : state.pos.col + hOffset, 71 + line: state.pos.line + vOffset, 72 + idx: state.pos.idx + l, 73 + }, 74 + })->then(pure(consumed)) 75 + } 76 + }) 77 + 78 + let execRe = (re, str) => { 79 + re 80 + ->RegExp.exec(str) 81 + ->Option.map(result => { 82 + open RegExp.Result 83 + (matches(result), fullMatch(result)->String.length) 84 + }) 85 + } 86 + 87 + // this will be released in Core in 12.0.0.4, remove then 88 + @get external regexpFlags: RegExp.t => string = "flags" 89 + 90 + let string = s => 91 + getCurrentStr->bind(str => 92 + if str->String.startsWith(s) { 93 + consume(String.length(s))->then(pure(s)) 94 + } else { 95 + fail("doesn't start with string") 96 + } 97 + ) 98 + 99 + let regex = (re: RegExp.t): t<array<string>> => { 100 + let wrapped = { 101 + let source = re->RegExp.source 102 + if source->String.startsWith("^") { 103 + re 104 + } else { 105 + RegExp.fromStringWithFlags(`^${source}`, ~flags=re->regexpFlags) 106 + } 107 + } 108 + getCurrentStr->bind(str => 109 + switch execRe(wrapped, str) { 110 + | Some((matches, l)) => consume(l)->then(pure(matches)) 111 + | None => fail("regex failed") 112 + } 113 + ) 114 + } 115 + 116 + let regex1 = (re: RegExp.t): t<string> => 117 + regex(re)->bind(matches => 118 + switch matches { 119 + | [x] => pure(x) 120 + | _ => fail("more than one match") 121 + } 122 + ) 123 + 124 + let peek = (n): t<string> => (str, state) => { 125 + let res = str->String.slice(~start=state.pos.idx, ~end=state.pos.idx + n) 126 + Ok((res, state)) 127 + } 128 + 129 + type length = int 130 + let takeWhileMany = (f: string => option<length>) => 131 + fix(p => 132 + getCurrentStr->bind(str => 133 + switch f(str) { 134 + | Some(length) => consume(length)->bind(s => p->map(s' => String.concat(s, s'))) 135 + | None => pure("") 136 + } 137 + ) 138 + ) 139 + 140 + let takeWhile = (f: string => bool) => 141 + takeWhileMany(s => 142 + if f(s) { 143 + Some(1) 144 + } else { 145 + None 146 + } 147 + ) 148 + 149 + let lexeme = p => p->thenIgnore(regex(%re(`/^\s*/`))->void) 150 + let token = s => string(s)->lexeme
+19
tests/ParserTests.res
··· 1 + open Zora 2 + module P = Parser 3 + 4 + let testParse = (t: Zora.t, p, str, ~expect=?) => { 5 + switch P.runParser(p, str) { 6 + | Ok((res, "")) => expect->Option.map(expect => t->equal(res, expect))->ignore 7 + | Ok((_, rem)) => t->fail(~msg=`failed to consume remaining input: ${rem}`) 8 + | Error(e) => t->fail(~msg=`parse failed`) 9 + } 10 + } 11 + 12 + zoraBlock("parse", t => { 13 + t->testParse(P.string("a"), "a", ~expect="a") 14 + t->testParse(P.token("a"), "a \n\r", ~expect="a") 15 + t->testParse(P.string("a")->P.or(P.string("b")), "b", ~expect="b") 16 + let abs = 17 + P.takeWhile(s => s->String.startsWith("a"))->P.bind(res => P.string("b")->P.map(b => (res, b))) 18 + t->testParse(abs, "aaaab", ~expect=("aaaa", "b")) 19 + })