ocaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

greatly simplify the lexer

+ Proper lexing of hierarchical identifier names
+ Introduce lexing modes
+ Buffer the lexer so that it can emit zero or many tokens at once

+103 -114
+1 -1
lib/compiler/Code.ml
··· 20 20 | Math of math_mode * t 21 21 | Ident of Trie.path 22 22 | Hash_ident of string 23 - | Angle_ident of string 23 + | Xml_ident of string option * string 24 24 | Subtree of string option * t 25 25 | Let of Trie.path * Trie.path binding list * t 26 26 | Open of Trie.path
+2 -2
lib/compiler/Expand.ml
··· 85 85 | { value = Ident path; loc } :: rest -> 86 86 let out, rest = expand_method_calls (expand_ident loc path) rest in 87 87 out @ expand rest 88 - | { value = Angle_ident name; loc } :: rest -> 89 - let qname = expand_xml_ident loc @@ Forester_xml_names.split_xml_qname name in 88 + | { value = Xml_ident (prefix, uname); loc } :: rest -> 89 + let qname = expand_xml_ident loc (prefix, uname) in 90 90 let attrs, rest = get_xml_attrs [] rest in 91 91 let arg_opt, rest = get_arg_opt rest in 92 92 let tag = Syn.Xml_tag (qname, attrs, Option.value ~default: [] arg_opt) in
+7 -7
lib/compiler/Grammar.mly
··· 3 3 open Forester_core 4 4 %} 5 5 6 - %token <string> XML_ELT_IDENT 6 + %token <string option * string> XML_ELT_IDENT 7 7 %token <string> DECL_XMLNS 8 8 %token <string> TEXT VERBATIM 9 9 %token <string> WHITESPACE 10 10 %token <string> IDENT 11 + 11 12 %token <string> HASH_IDENT 12 13 %token IMPORT EXPORT DEF NAMESPACE LET FUN OPEN 13 14 %token OBJECT PATCH CALL 14 15 %token SUBTREE SCOPE PUT GET DEFAULT ALLOC 15 - %token LBRACE RBRACE LSQUARE RSQUARE LPAREN RPAREN HASH_LBRACE HASH_HASH_LBRACE TICK AT_SIGN HASH 16 + %token SLASH LBRACE RBRACE LSQUARE RSQUARE LPAREN RPAREN HASH_LBRACE HASH_HASH_LBRACE TICK AT_SIGN HASH 16 17 %token EOF 17 18 18 19 %token DATALOG ··· 74 75 | DEFAULT; ~ = ident; ~ = arg; <Code.Default> 75 76 | GET; ~ = ident; <Code.Get> 76 77 | OPEN; ~ = ident; <Code.Open> 77 - | ~ = XML_ELT_IDENT; <Code.Angle_ident> 78 + | (~,~) = XML_ELT_IDENT; <Code.Xml_ident> 78 79 | ~ = DECL_XMLNS; ~ = txt_arg; <Code.Decl_xmlns> 79 80 | OBJECT; self = option(squares(bvar)); methods = braces(ws_list(method_decl)); { Code.Object {self; methods } } 80 81 | PATCH; obj = braces(code_expr); self = option(squares(bvar)); methods = braces(ws_list(method_decl)); { Code.Patch {obj; self; methods} } 81 82 | CALL; ~ = braces(code_expr); ~ = txt_arg; <Code.Call> 83 + | DATALOG; LBRACE; list(WHITESPACE); ~ = dx_sequent_node; RBRACE; <> 84 + | ~ = VERBATIM; <Code.Verbatim> 82 85 | ~ = delimited(HASH_LBRACE, textual_expr, RBRACE); <Code.inline_math> 83 86 | ~ = delimited(HASH_HASH_LBRACE, textual_expr, RBRACE); <Code.display_math> 84 87 | ~ = braces(textual_expr); <Code.braces> 85 88 | ~ = squares(textual_expr); <Code.squares> 86 89 | ~ = parens(textual_expr); <Code.parens> 87 - | ~ = VERBATIM; <Code.Verbatim> 88 - | DATALOG; LBRACE; list(WHITESPACE); ~ = dx_sequent_node; RBRACE; <> 89 90 90 91 let head_node1 := 91 92 | ~ = head_node; <> ··· 99 100 | k = squares(TEXT); list(WHITESPACE); v = arg; { k, v } 100 101 101 102 let ident := 102 - | ident = IDENT; 103 - { String.split_on_char '/' ident } 103 + | ~ = separated_nonempty_list(SLASH, IDENT); <> 104 104 105 105 let ws_or_text := 106 106 | x = TEXT; { x }
+69 -95
lib/compiler/Lexer.mll
··· 1 1 { 2 2 open Forester_prelude 3 + 4 + type mode = Main | Ident_init | Ident_fragments | Verbatim of string * Buffer.t 5 + let mode_stack = Stack.of_seq @@ List.to_seq [Main] 6 + 7 + let push_mode mode = Stack.push mode mode_stack 8 + let drop_mode () = Stack.drop mode_stack 9 + let set_mode mode = drop_mode(); push_mode mode 10 + let push_verbatim_mode herald = push_mode @@ Verbatim (herald, Buffer.create 2000) 11 + 3 12 let raise_err lexbuf = 4 13 let loc = Asai.Range.of_lexbuf lexbuf in 5 14 Forester_core.Reporter.fatalf ~loc Forester_core.Reporter.Message.Parse_error "unrecognized token `%s`" @@ ··· 8 17 9 18 let digit = ['0'-'9'] 10 19 let alpha = ['a'-'z' 'A'-'Z'] 11 - let int = '-'? digit+ 12 20 13 - let hierarchical_name = (alpha) (alpha | digit | '-' | '/')* 21 + let special_name = ['%' '\\' ',' '"' '`' '_' ';' '#' '{' '}' '[' ']' ' '] 14 22 let simple_name = (alpha | digit | '-')* 15 23 16 24 let xml_base_ident = (alpha) (alpha | digit | '-' | '_')* 17 25 let xml_qname = (xml_base_ident ':' xml_base_ident) | xml_base_ident 18 - let addr = (alpha) (alpha | digit | '_' | '-')* 19 26 let wschar = [' ' '\t'] 20 - let newline = '\r' | '\n' | "\r\n" 27 + let newline = ['\r' '\n'] | "\r\n" 21 28 let newline_followed_by_ws = (newline) (wschar)* 22 29 let text = [^' ' '%' '#' '\\' '{' '}' '[' ']' '(' ')' '\r' '\n']+ 23 30 let verbatim_herald = [^' ' '\t' '\r' '\n' '|']+ 24 - let verbatim_herald_sep = '|' 25 31 26 32 rule token = parse 27 - | "\\%" { Grammar.TEXT "%" } 33 + | "\\" { push_mode Ident_init; [] } 28 34 | "%" { comment lexbuf } 29 - | "##{" { Grammar.HASH_HASH_LBRACE } 30 - | "#{" { Grammar.HASH_LBRACE } 31 - | "'" { Grammar.TICK } 32 - | '@' { Grammar.AT_SIGN } 33 - | "\\\\" { Grammar.IDENT {|\|} } 34 - | "\\," { Grammar.IDENT {|,|} } 35 - | "\\\"" { Grammar.IDENT {|"|} } 36 - | "\\'" { Grammar.IDENT {|'|} } 37 - | "\\`" { Grammar.IDENT {|`|} } 38 - | "\\_" { Grammar.IDENT {|_|} } 39 - | "\\;" { Grammar.IDENT {|;|} } 40 - | "\\#" { Grammar.IDENT {|#|} } 41 - | "\\{" { Grammar.IDENT {|{|} } 42 - | "\\}" { Grammar.IDENT {|}|} } 43 - | "\\[" { Grammar.IDENT {|[|} } 44 - | "\\]" { Grammar.IDENT {|]|} } 45 - | "\\ " { Grammar.IDENT {| |} } 46 - | "-:" { Grammar.DX_ENTAILED } 47 - | "#" { Grammar.HASH } 48 - | "\\verb" { custom_verbatim_herald lexbuf } 49 - | "\\datalog" { Grammar.DATALOG } 50 - | "\\startverb" { custom_verbatim "\\stopverb" (Buffer.create 2000) lexbuf } 51 - | "\\scope" { Grammar.SCOPE } 52 - | "\\put" { Grammar.PUT } 53 - | "\\put?" { Grammar.DEFAULT } 54 - | "\\get" { Grammar.GET } 55 - | "\\import" { Grammar.IMPORT } 56 - | "\\export" { Grammar.EXPORT } 57 - | "\\namespace" { Grammar.NAMESPACE } 58 - | "\\open" { Grammar.OPEN } 59 - | "\\def" { Grammar.DEF } 60 - | "\\alloc" { Grammar.ALLOC } 61 - | "\\let" { Grammar.LET } 62 - | "\\fun" { Grammar.FUN } 63 - | "\\subtree" { Grammar.SUBTREE } 64 - | "\\object" { Grammar.OBJECT } 65 - | "\\patch" { Grammar.PATCH } 66 - | "\\call" { Grammar.CALL } 67 - | '#' (simple_name as name) { Grammar.HASH_IDENT name } 68 - | '?' (simple_name as name) { Grammar.DX_VAR name } 69 - | 70 - "\\<" 71 - { 72 - let qname = xml_qname lexbuf in 73 - let () = rangle lexbuf in 74 - XML_ELT_IDENT qname 75 - } 76 - | "\\xmlns:" { DECL_XMLNS (xml_base_ident lexbuf) } 77 - | '\\' (hierarchical_name as name) { Grammar.IDENT name } 78 - | '{' { Grammar.LBRACE } 79 - | '}' { Grammar.RBRACE } 80 - | '[' { Grammar.LSQUARE } 81 - | ']' { Grammar.RSQUARE } 82 - | '(' { Grammar.LPAREN } 83 - | ')' { Grammar.RPAREN } 84 - | text as str { Grammar.TEXT str } 85 - | wschar+ as str { Grammar.WHITESPACE str } 86 - | newline as str { Lexing.new_line lexbuf; Grammar.WHITESPACE str } 87 - | eof { Grammar.EOF } 35 + | "##{" { [Grammar.HASH_HASH_LBRACE] } 36 + | "#{" { [Grammar.HASH_LBRACE] } 37 + | "'" { [Grammar.TICK] } 38 + | '@' { [Grammar.AT_SIGN] } 39 + | "-:" { [Grammar.DX_ENTAILED] } 40 + | "#" { [Grammar.HASH] } 41 + | '#' (simple_name as name) { [Grammar.HASH_IDENT name] } 42 + | '?' (simple_name as name) { [Grammar.DX_VAR name] } 43 + | '{' { [Grammar.LBRACE] } 44 + | '}' { [Grammar.RBRACE] } 45 + | '[' { [Grammar.LSQUARE] } 46 + | ']' { [Grammar.RSQUARE] } 47 + | '(' { [Grammar.LPAREN] } 48 + | ')' { [Grammar.RPAREN] } 49 + | text as str { [Grammar.TEXT str] } 50 + | wschar+ as str { [Grammar.WHITESPACE str] } 51 + | newline as str { Lexing.new_line lexbuf; [Grammar.WHITESPACE str] } 52 + | eof { [Grammar.EOF] } 88 53 | _ { raise_err lexbuf } 89 54 90 - and comment = parse 91 - | newline_followed_by_ws { Lexing.new_line lexbuf; token lexbuf } 92 - | eof { Grammar.EOF } 93 - | _ { comment lexbuf } 94 - 95 - and custom_verbatim_herald = parse 96 - | verbatim_herald as herald { eat_verbatim_herald_sep (custom_verbatim herald (Buffer.create 2000)) lexbuf } 97 - | newline { Lexing.new_line lexbuf; raise_err lexbuf } 55 + and ident_init = parse 56 + | "verb" (verbatim_herald as herald) '|' { drop_mode (); push_verbatim_mode herald; [] } 57 + | "startverb" { drop_mode (); push_verbatim_mode "\\stopverb"; [] } 58 + | "scope" { drop_mode (); [Grammar.SCOPE] } 59 + | "put" { drop_mode (); [Grammar.PUT] } 60 + | "put?" { drop_mode (); [Grammar.DEFAULT] } 61 + | "get" { drop_mode (); [Grammar.GET] } 62 + | "import" { drop_mode (); [Grammar.IMPORT] } 63 + | "export" { drop_mode (); [Grammar.EXPORT] } 64 + | "namespace" { drop_mode (); [Grammar.NAMESPACE] } 65 + | "open" { drop_mode (); [Grammar.OPEN] } 66 + | "def" { drop_mode (); [Grammar.DEF] } 67 + | "alloc" { drop_mode (); [Grammar.ALLOC] } 68 + | "let" { drop_mode (); [Grammar.LET] } 69 + | "fun" { drop_mode (); [Grammar.FUN] } 70 + | "subtree" { drop_mode (); [Grammar.SUBTREE] } 71 + | "object" { drop_mode (); [Grammar.OBJECT] } 72 + | "patch" { drop_mode (); [Grammar.PATCH] } 73 + | "call" { drop_mode (); [Grammar.CALL] } 74 + | "datalog" { drop_mode (); [Grammar.DATALOG] } 75 + | "<" (xml_base_ident as prefix) ':' (xml_base_ident as uname) ">" { drop_mode (); [XML_ELT_IDENT (Some prefix, uname)] } 76 + | "<" (xml_base_ident as uname) ">" { drop_mode (); [XML_ELT_IDENT (None, uname)] } 77 + | "xmlns:" (xml_base_ident as str) { drop_mode (); [DECL_XMLNS str] } 78 + | (simple_name as s) "/" { set_mode Ident_fragments; [Grammar.IDENT s; Grammar.SLASH] } 79 + | simple_name as s { drop_mode (); [Grammar.IDENT s] } 80 + | special_name as c { drop_mode (); [Grammar.IDENT (String.make 1 c)] } 98 81 | _ { raise_err lexbuf } 99 82 100 - and eat_verbatim_herald_sep kont = parse 101 - | verbatim_herald_sep { kont lexbuf } 102 - | newline { Lexing.new_line lexbuf; raise_err lexbuf } 83 + and ident_fragments = parse 84 + | (simple_name as s) "/" { [Grammar.IDENT s; Grammar.SLASH] } 85 + | simple_name as s { drop_mode (); [Grammar.IDENT s] } 103 86 | _ { raise_err lexbuf } 104 87 105 - and custom_verbatim herald buffer = parse 88 + and comment = parse 89 + | newline_followed_by_ws { Lexing.new_line lexbuf; token lexbuf } 90 + | eof { [Grammar.EOF] } 91 + | _ { comment lexbuf } 92 + 93 + and verbatim herald buffer = parse 106 94 | newline as c 107 95 { 108 96 Lexing.new_line lexbuf; 109 97 Buffer.add_string buffer c; 110 - custom_verbatim herald buffer lexbuf 98 + [] 111 99 } 112 100 | _ as c 113 101 { ··· 121 109 String_util.trim_newlines @@ 122 110 Buffer.sub buffer 0 offset 123 111 in 124 - Grammar.VERBATIM text 112 + drop_mode (); 113 + [Grammar.VERBATIM text] 125 114 else 126 - custom_verbatim herald buffer lexbuf 115 + [] 127 116 } 128 - 129 - and xml_qname = parse 130 - | xml_qname as qname { qname } 131 - | newline { Lexing.new_line lexbuf; raise_err lexbuf } 132 - | _ { raise_err lexbuf } 133 - 134 - and xml_base_ident = parse 135 - | xml_base_ident as x { x } 136 - | newline { Lexing.new_line lexbuf; raise_err lexbuf } 137 - | _ { raise_err lexbuf } 138 - 139 - and rangle = parse 140 - | ">" { () } 141 - | newline { Lexing.new_line lexbuf; raise_err lexbuf } 142 - | _ { raise_err lexbuf }
+20 -1
lib/compiler/Parse.ml
··· 4 4 5 5 module I = Grammar.MenhirInterpreter 6 6 7 - let lexer lexbuf = Lexer.token lexbuf 7 + let buffer_lexer lexer = 8 + let buf = ref [] in 9 + let rec loop lexbuf = 10 + match !buf with 11 + | v :: vs -> 12 + buf := vs; v 13 + | [] -> 14 + match lexer lexbuf with 15 + | v :: vs -> buf := vs @ !buf; v 16 + | [] -> loop lexbuf 17 + in 18 + loop 19 + 20 + let lexer = 21 + let@ lexbuf = buffer_lexer in 22 + match Stack.top @@ Lexer.mode_stack with 23 + | Main -> Lexer.token lexbuf 24 + | Ident_init -> Lexer.ident_init lexbuf 25 + | Ident_fragments -> Lexer.ident_fragments lexbuf 26 + | Verbatim (herald, buffer) -> Lexer.verbatim herald buffer lexbuf 8 27 9 28 (* debugging helpers *) 10 29 let _string_of_token token =
+1 -1
lib/forest/Forest.ml
··· 154 154 | false -> 155 155 Hashtbl.add resources iri resource 156 156 | true -> 157 - Reporter.emitf Duplicate_tree "Already planted resource at address %a" pp_iri iri 157 + () 158 158 159 159 let get_resource iri = 160 160 let iri = Iri.normalize iri in
+1 -5
lib/frontend/DSL.ml
··· 24 24 let img href = X.(Img (Remote href)) 25 25 let content c = X.Content c 26 26 27 - let xml_elt name content = 27 + let xml_elt (prefix, uname) content = 28 28 let open Forester_compiler in 29 - let prefix, uname = 30 - Lexer.xml_qname (Lexing.from_string name) 31 - |> Forester_xml_names.split_xml_qname 32 - in 33 29 let prefix = Option.value ~default: "" prefix in 34 30 let qname = X.{ prefix; uname; xmlns = None } in 35 31 X.Xml_elt
+1 -1
lib/frontend/Import_graph.ml
··· 43 43 | Dx_query (_, positives, negatives) -> 44 44 List.iter (analyse_code roots) positives; 45 45 List.iter (analyse_code roots) negatives 46 - | Text _ | Hash_ident _ | Angle_ident _ | Verbatim _ | Ident _ | Open _ | Put _ | Default _ | Get _ | Decl_xmlns _ | Call _ | Alloc _ | Dx_var _ | Dx_const_content _ | Dx_const_iri _ -> () 46 + | Text _ | Hash_ident _ | Xml_ident _ | Verbatim _ | Ident _ | Open _ | Put _ | Default _ | Get _ | Decl_xmlns _ | Call _ | Alloc _ | Dx_var _ | Dx_const_content _ | Dx_const_iri _ -> () 47 47 in 48 48 begin 49 49 let@ tree = List.iter @~ trees in
+1 -1
test/dsl.ml
··· 20 20 figure [txt "figure"]; 21 21 figcaption [txt "caption"]; 22 22 cdata "cdata"; 23 - xml_elt "html" []; 23 + xml_elt (None, "html") []; 24 24 transclude "foo-001"; 25 25 contextual_number "chapter-3"; 26 26 results_of_query (union []);