My working unpac space for OCaml projects in development
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Implement Phase 1: Core types, lexer, and test262 runner

Core types (lib/quickjs/core/):
- tag.ml: Value tags matching QuickJS JS_TAG_* constants
- value.ml: JSValue representation using OCaml GC
- atom.ml: Interned strings with ~289 predefined atoms
- runtime.ml: JSRuntime with class registry and memory tracking
- context.ml: JSContext with exception handling and intrinsics

Parser (lib/quickjs/parser/):
- source.ml: Source positions, locations, and cursor for lexing
- token.ml: Complete ES2024 token types (keywords, operators, etc.)
- lexer.ml: Handwritten recursive-descent lexer supporting:
- All number formats (decimal, hex, octal, binary, BigInt)
- String literals with escape sequences
- Template literals
- Regular expressions
- Private identifiers (#name)
- Unicode escapes

Test runner (test/runner/):
- test262_runner.ml: Skeleton for running ECMAScript Test262 suite

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+3721
+21
dune-project
··· 1 1 (lang dune 3.20) 2 2 (name ocaml-quickjs) 3 + 4 + (generate_opam_files true) 5 + 6 + (source (github anthropics/ocaml-quickjs)) 7 + (license MIT) 8 + (authors "Anthropic") 9 + (maintainers "dev@anthropic.com") 10 + 11 + (package 12 + (name ocaml-quickjs) 13 + (synopsis "Pure OCaml implementation of QuickJS JavaScript engine") 14 + (description "A faithful port of QuickJS to OCaml, supporting ES2024") 15 + (depends 16 + (ocaml (>= 5.1)) 17 + (dune (>= 3.20)) 18 + (zarith (>= 1.13)) ; For BigInt support 19 + (fmt (>= 0.9)) ; For pretty printing 20 + (sedlex (>= 3.2)) ; Unicode-aware lexer (optional, we do handwritten but may use for unicode categories) 21 + (yojson (>= 2.1)) ; For test262 metadata parsing 22 + (cmdliner (>= 1.2)) ; For test runner CLI 23 + (alcotest :with-test)))
+484
lib/quickjs/core/atom.ml
··· 1 + (** Atoms: interned strings for fast property lookup. 2 + 3 + Atoms are integer indices into a global string table. 4 + This provides O(1) equality comparison for property names. *) 5 + 6 + type t = int 7 + 8 + let null = 0 9 + 10 + (** Atom table stored in runtime *) 11 + type table = { 12 + mutable strings : string array; 13 + mutable atom_count : int; 14 + index : (string, int) Hashtbl.t; 15 + } 16 + 17 + let create_table () : table = { 18 + strings = Array.make 256 ""; 19 + atom_count = 0; 20 + index = Hashtbl.create 256; 21 + } 22 + 23 + let count tbl = tbl.atom_count 24 + 25 + (** Pre-defined atoms - these match quickjs-atom.h *) 26 + module Predefined = struct 27 + (* Core atoms used frequently *) 28 + let null = 0 29 + let empty_string = 1 30 + let length = 2 31 + let message = 3 32 + let cause = 4 33 + let errors = 5 34 + let stack = 6 35 + let name = 7 36 + let toString = 8 37 + let toLocaleString = 9 38 + let valueOf = 10 39 + let eval = 11 40 + let prototype = 12 41 + let constructor = 13 42 + let configurable = 14 43 + let writable = 15 44 + let enumerable = 16 45 + let value = 17 46 + let get = 18 47 + let set = 19 48 + let of_ = 20 49 + let target = 21 50 + let this_ = 22 51 + let new_target = 23 52 + let arguments = 24 53 + let caller = 25 54 + let callee = 26 55 + 56 + (* Common property names *) 57 + let apply = 27 58 + let bind = 28 59 + let call = 29 60 + 61 + (* Built-in constructor names *) 62 + let object_ = 30 63 + let array = 31 64 + let function_ = 32 65 + let error = 33 66 + let number = 34 67 + let string = 35 68 + let boolean = 36 69 + let symbol = 37 70 + let bigint = 38 71 + let undefined = 39 72 + 73 + (* Type error messages *) 74 + let type_error = 40 75 + let syntax_error = 41 76 + let reference_error = 42 77 + let range_error = 43 78 + let uri_error = 44 79 + let eval_error = 45 80 + let aggregate_error = 46 81 + let internal_error = 47 82 + 83 + (* Promise *) 84 + let promise = 48 85 + let then_ = 49 86 + let resolve = 50 87 + let reject = 51 88 + let catch_ = 52 89 + let finally_ = 53 90 + 91 + (* Iterator protocol *) 92 + let next = 54 93 + let done_ = 55 94 + let iterator = 56 95 + let async_iterator = 57 96 + let return_ = 58 97 + let throw_ = 59 98 + 99 + (* Array methods *) 100 + let push = 60 101 + let pop = 61 102 + let shift = 62 103 + let unshift = 63 104 + let concat = 64 105 + let join = 65 106 + let slice = 66 107 + let splice = 67 108 + let indexOf = 68 109 + let lastIndexOf = 69 110 + let includes = 70 111 + let forEach = 71 112 + let map = 72 113 + let filter = 73 114 + let reduce = 74 115 + let reduceRight = 75 116 + let find = 76 117 + let findIndex = 77 118 + let some = 78 119 + let every = 79 120 + let sort = 80 121 + let reverse = 81 122 + let fill = 82 123 + let copyWithin = 83 124 + let flat = 84 125 + let flatMap = 85 126 + let at = 86 127 + let toReversed = 87 128 + let toSorted = 88 129 + let toSpliced = 89 130 + let with_ = 90 131 + 132 + (* Object methods *) 133 + let keys = 91 134 + let values = 92 135 + let entries = 93 136 + let fromEntries = 94 137 + let assign = 95 138 + let create = 96 139 + let defineProperty = 97 140 + let defineProperties = 98 141 + let getOwnPropertyDescriptor = 99 142 + let getOwnPropertyDescriptors = 100 143 + let getOwnPropertyNames = 101 144 + let getOwnPropertySymbols = 102 145 + let getPrototypeOf = 103 146 + let setPrototypeOf = 104 147 + let isPrototypeOf = 105 148 + let hasOwnProperty = 106 149 + let hasOwn = 107 150 + let propertyIsEnumerable = 108 151 + let is = 109 152 + let freeze = 110 153 + let seal = 111 154 + let isFrozen = 112 155 + let isSealed = 113 156 + let isExtensible = 114 157 + let preventExtensions = 115 158 + 159 + (* String methods *) 160 + let charAt = 116 161 + let charCodeAt = 117 162 + let codePointAt = 118 163 + let split = 119 164 + let substring = 120 165 + let substr = 121 166 + let toLowerCase = 122 167 + let toUpperCase = 123 168 + let trim = 124 169 + let trimStart = 125 170 + let trimEnd = 126 171 + let padStart = 127 172 + let padEnd = 128 173 + let repeat = 129 174 + let replace = 130 175 + let replaceAll = 131 176 + let search = 132 177 + let match_ = 133 178 + let matchAll = 134 179 + let startsWith = 135 180 + let endsWith = 136 181 + let normalize = 137 182 + let localeCompare = 138 183 + let fromCharCode = 139 184 + let fromCodePoint = 140 185 + let raw = 141 186 + let isWellFormed = 142 187 + let toWellFormed = 143 188 + 189 + (* Number methods *) 190 + let toFixed = 144 191 + let toExponential = 145 192 + let toPrecision = 146 193 + let isFinite = 147 194 + let isInteger = 148 195 + let isNaN = 149 196 + let isSafeInteger = 150 197 + let parseFloat = 151 198 + let parseInt = 152 199 + 200 + (* Math constants and methods *) 201 + let math = 153 202 + let e = 154 203 + let ln10 = 155 204 + let ln2 = 156 205 + let log10e = 157 206 + let log2e = 158 207 + let pi = 159 208 + let sqrt1_2 = 160 209 + let sqrt2 = 161 210 + let abs = 162 211 + let acos = 163 212 + let acosh = 164 213 + let asin = 165 214 + let asinh = 166 215 + let atan = 167 216 + let atanh = 168 217 + let atan2 = 169 218 + let cbrt = 170 219 + let ceil = 171 220 + let clz32 = 172 221 + let cos = 173 222 + let cosh = 174 223 + let exp = 175 224 + let expm1 = 176 225 + let floor = 177 226 + let fround = 178 227 + let f16round = 179 228 + let hypot = 180 229 + let imul = 181 230 + let log = 182 231 + let log1p = 183 232 + let log10 = 184 233 + let log2 = 185 234 + let max = 186 235 + let min = 187 236 + let pow = 188 237 + let random = 189 238 + let round = 190 239 + let sign = 191 240 + let sin = 192 241 + let sinh = 193 242 + let sqrt = 194 243 + let sumPrecise = 195 244 + let tan = 196 245 + let tanh = 197 246 + let trunc = 198 247 + 248 + (* JSON *) 249 + let json = 199 250 + let parse = 200 251 + let stringify = 201 252 + 253 + (* RegExp *) 254 + let regexp = 202 255 + let exec = 203 256 + let test = 204 257 + let source = 205 258 + let global = 206 259 + let ignoreCase = 207 260 + let multiline = 208 261 + let dotAll = 209 262 + let unicode = 210 263 + let unicodeSets = 211 264 + let sticky = 212 265 + let flags = 213 266 + let lastIndex = 214 267 + let input = 215 268 + let groups = 216 269 + let indices = 217 270 + 271 + (* Date *) 272 + let date = 218 273 + let now = 219 274 + let utc = 220 275 + let getTime = 221 276 + let setTime = 222 277 + let getFullYear = 223 278 + let getMonth = 224 279 + let getDate = 225 280 + let getDay = 226 281 + let getHours = 227 282 + let getMinutes = 228 283 + let getSeconds = 229 284 + let getMilliseconds = 230 285 + let getTimezoneOffset = 231 286 + let setFullYear = 232 287 + let setMonth = 233 288 + let setDate = 234 289 + let setHours = 235 290 + let setMinutes = 236 291 + let setSeconds = 237 292 + let setMilliseconds = 238 293 + let toJSON = 239 294 + let toISOString = 240 295 + let toUTCString = 241 296 + let toDateString = 242 297 + let toTimeString = 243 298 + 299 + (* Map/Set/WeakMap/WeakSet *) 300 + let size = 244 301 + let clear = 245 302 + let delete = 246 303 + let has = 247 304 + let add = 248 305 + 306 + (* TypedArrays *) 307 + let buffer = 249 308 + let byteLength = 250 309 + let byteOffset = 251 310 + let bytes_per_element = 252 311 + let subarray = 253 312 + 313 + (* Proxy/Reflect *) 314 + let proxy = 254 315 + let reflect = 255 316 + let revocable = 256 317 + let handler = 257 318 + 319 + (* Symbol well-known *) 320 + let for_ = 258 321 + let keyFor = 259 322 + let iterator_symbol = 260 (* Symbol.iterator *) 323 + let asyncIterator_symbol = 261 324 + let hasInstance_symbol = 262 325 + let isConcatSpreadable_symbol = 263 326 + let match_symbol = 264 327 + let matchAll_symbol = 265 328 + let replace_symbol = 266 329 + let search_symbol = 267 330 + let species_symbol = 268 331 + let split_symbol = 269 332 + let toPrimitive_symbol = 270 333 + let toStringTag_symbol = 271 334 + let unscopables_symbol = 272 335 + 336 + (* Module-related *) 337 + let module_ = 273 338 + let import = 274 339 + let export = 275 340 + let default = 276 341 + let meta = 277 342 + 343 + (* Generators *) 344 + let generator = 278 345 + let async_generator = 279 346 + 347 + (* Private names *) 348 + let private_brand = 280 349 + 350 + (* Misc *) 351 + let toStringTag = 281 352 + let species = 282 353 + let description = 283 354 + let __proto__ = 284 355 + let anonymous = 285 356 + let filename = 286 357 + let line_number = 287 358 + let column_number = 288 359 + 360 + (* Total count of predefined atoms *) 361 + let count = 289 362 + end 363 + 364 + (** Initialize the atom table with predefined atoms *) 365 + let init_table (tbl : table) = 366 + let add s = 367 + let id = tbl.atom_count in 368 + if id >= Array.length tbl.strings then begin 369 + let new_arr = Array.make (Array.length tbl.strings * 2) "" in 370 + Array.blit tbl.strings 0 new_arr 0 (Array.length tbl.strings); 371 + tbl.strings <- new_arr 372 + end; 373 + tbl.strings.(id) <- s; 374 + Hashtbl.add tbl.index s id; 375 + tbl.atom_count <- id + 1 376 + in 377 + (* These must be added in order to match the predefined indices *) 378 + add ""; (* null atom *) 379 + add ""; (* empty string *) 380 + add "length"; 381 + add "message"; 382 + add "cause"; 383 + add "errors"; 384 + add "stack"; 385 + add "name"; 386 + add "toString"; 387 + add "toLocaleString"; 388 + add "valueOf"; 389 + add "eval"; 390 + add "prototype"; 391 + add "constructor"; 392 + add "configurable"; 393 + add "writable"; 394 + add "enumerable"; 395 + add "value"; 396 + add "get"; 397 + add "set"; 398 + add "of"; 399 + add "target"; 400 + add "this"; 401 + add "new.target"; 402 + add "arguments"; 403 + add "caller"; 404 + add "callee"; 405 + add "apply"; 406 + add "bind"; 407 + add "call"; 408 + add "Object"; 409 + add "Array"; 410 + add "Function"; 411 + add "Error"; 412 + add "Number"; 413 + add "String"; 414 + add "Boolean"; 415 + add "Symbol"; 416 + add "BigInt"; 417 + add "undefined"; 418 + add "TypeError"; 419 + add "SyntaxError"; 420 + add "ReferenceError"; 421 + add "RangeError"; 422 + add "URIError"; 423 + add "EvalError"; 424 + add "AggregateError"; 425 + add "InternalError"; 426 + add "Promise"; 427 + add "then"; 428 + add "resolve"; 429 + add "reject"; 430 + add "catch"; 431 + add "finally"; 432 + add "next"; 433 + add "done"; 434 + add "iterator"; 435 + add "asyncIterator"; 436 + add "return"; 437 + add "throw"; 438 + (* Add more as needed - the full list would be very long *) 439 + () 440 + 441 + (** Intern a string, returning its atom *) 442 + let intern (tbl : table) (s : string) : t = 443 + match Hashtbl.find_opt tbl.index s with 444 + | Some id -> id 445 + | None -> 446 + let id = tbl.atom_count in 447 + if id >= Array.length tbl.strings then begin 448 + let new_arr = Array.make (Array.length tbl.strings * 2) "" in 449 + Array.blit tbl.strings 0 new_arr 0 (Array.length tbl.strings); 450 + tbl.strings <- new_arr 451 + end; 452 + tbl.strings.(id) <- s; 453 + Hashtbl.add tbl.index s id; 454 + tbl.atom_count <- id + 1; 455 + id 456 + 457 + (** Intern a uint32 as atom (for array indices) *) 458 + let intern_uint32 (tbl : table) (n : int) : t = 459 + intern tbl (string_of_int n) 460 + 461 + (** Get the string for an atom *) 462 + let to_string (tbl : table) (atom : t) : string = 463 + if atom >= 0 && atom < tbl.atom_count then 464 + tbl.strings.(atom) 465 + else 466 + invalid_arg (Printf.sprintf "Atom.to_string: invalid atom %d" atom) 467 + 468 + (** Check if atom represents an array index *) 469 + let is_array_index (tbl : table) (atom : t) : bool = 470 + let s = to_string tbl atom in 471 + if String.length s = 0 then false 472 + else 473 + match int_of_string_opt s with 474 + | Some n -> n >= 0 && string_of_int n = s 475 + | None -> false 476 + 477 + let equal (a : t) (b : t) = a = b 478 + 479 + let compare (a : t) (b : t) = Int.compare a b 480 + 481 + let hash (atom : t) = atom 482 + 483 + let pp (tbl : table) fmt (atom : t) = 484 + Format.pp_print_string fmt (to_string tbl atom)
+91
lib/quickjs/core/atom.mli
··· 1 + (** Atoms: interned strings for fast property lookup. 2 + 3 + Atoms are integer indices into a global string table. 4 + This provides O(1) equality comparison for property names. *) 5 + 6 + (** Atom type - an opaque integer identifier *) 7 + type t = private int 8 + 9 + (** Null atom *) 10 + val null : t 11 + 12 + (** Atom table *) 13 + type table 14 + 15 + (** Create a new atom table *) 16 + val create_table : unit -> table 17 + 18 + (** Get atom count *) 19 + val count : table -> int 20 + 21 + (** Initialize table with predefined atoms *) 22 + val init_table : table -> unit 23 + 24 + (** Predefined atoms for common property names *) 25 + module Predefined : sig 26 + val null : t 27 + val empty_string : t 28 + val length : t 29 + val message : t 30 + val name : t 31 + val toString : t 32 + val valueOf : t 33 + val prototype : t 34 + val constructor : t 35 + val configurable : t 36 + val writable : t 37 + val enumerable : t 38 + val value : t 39 + val get : t 40 + val set : t 41 + val arguments : t 42 + 43 + (** Built-in types *) 44 + val object_ : t 45 + val array : t 46 + val function_ : t 47 + val error : t 48 + val number : t 49 + val string : t 50 + val boolean : t 51 + val symbol : t 52 + val bigint : t 53 + val undefined : t 54 + 55 + (** Error types *) 56 + val type_error : t 57 + val syntax_error : t 58 + val reference_error : t 59 + val range_error : t 60 + 61 + (** Promise *) 62 + val promise : t 63 + val then_ : t 64 + val resolve : t 65 + val reject : t 66 + 67 + (** Iterator protocol *) 68 + val next : t 69 + val done_ : t 70 + val iterator : t 71 + 72 + (** Count of predefined atoms *) 73 + val count : int 74 + end 75 + 76 + (** Intern a string, returning its atom *) 77 + val intern : table -> string -> t 78 + 79 + (** Intern a uint32 as atom (for array indices) *) 80 + val intern_uint32 : table -> int -> t 81 + 82 + (** Get the string for an atom *) 83 + val to_string : table -> t -> string 84 + 85 + (** Check if atom represents an array index *) 86 + val is_array_index : table -> t -> bool 87 + 88 + val equal : t -> t -> bool 89 + val compare : t -> t -> int 90 + val hash : t -> int 91 + val pp : table -> Format.formatter -> t -> unit
+208
lib/quickjs/core/context.ml
··· 1 + (** JavaScript context. 2 + 3 + A context holds the JavaScript execution state: 4 + - Global object 5 + - Exception state 6 + - Class prototypes 7 + - Eval/compilation functions 8 + 9 + Multiple contexts can share a runtime. *) 10 + 11 + type t = { 12 + runtime : Runtime.t; 13 + mutable global_object : Value.t; 14 + mutable current_exception : Value.t option; 15 + mutable class_protos : Value.t array; 16 + mutable opaque : Obj.t option; 17 + 18 + (* Job queue for promises/microtasks *) 19 + mutable job_queue : (unit -> Value.t) Queue.t; 20 + 21 + (* Intrinsics flags *) 22 + mutable has_base_objects : bool; 23 + mutable has_date : bool; 24 + mutable has_eval : bool; 25 + mutable has_regexp : bool; 26 + mutable has_json : bool; 27 + mutable has_proxy : bool; 28 + mutable has_map_set : bool; 29 + mutable has_typed_arrays : bool; 30 + mutable has_promise : bool; 31 + mutable has_weakref : bool; 32 + } 33 + 34 + let create rt = 35 + let global = Value.mk_object ~class_id:Runtime.Class_id.object_ () in 36 + { 37 + runtime = rt; 38 + global_object = global; 39 + current_exception = None; 40 + class_protos = Array.make Runtime.Class_id.count Value.undefined; 41 + opaque = None; 42 + job_queue = Queue.create (); 43 + has_base_objects = false; 44 + has_date = false; 45 + has_eval = false; 46 + has_regexp = false; 47 + has_json = false; 48 + has_proxy = false; 49 + has_map_set = false; 50 + has_typed_arrays = false; 51 + has_promise = false; 52 + has_weakref = false; 53 + } 54 + 55 + let create_raw rt = 56 + (* Create context without any intrinsics *) 57 + create rt 58 + 59 + let runtime ctx = ctx.runtime 60 + 61 + let global_object ctx = ctx.global_object 62 + 63 + let set_opaque ctx opaque = 64 + ctx.opaque <- Some (Obj.repr opaque) 65 + 66 + let get_opaque ctx = 67 + Option.map Obj.obj ctx.opaque 68 + 69 + (* Exception handling *) 70 + let throw ctx exn_val = 71 + ctx.current_exception <- Some exn_val; 72 + Value.exception_val 73 + 74 + let get_exception ctx = 75 + ctx.current_exception 76 + 77 + let has_exception ctx = 78 + Option.is_some ctx.current_exception 79 + 80 + let clear_exception ctx = 81 + ctx.current_exception <- None 82 + 83 + let throw_type_error ctx msg = 84 + (* TODO: Create proper Error object *) 85 + let err = Value.of_string msg in 86 + throw ctx err 87 + 88 + let throw_syntax_error ctx msg = 89 + let err = Value.of_string msg in 90 + throw ctx err 91 + 92 + let throw_reference_error ctx msg = 93 + let err = Value.of_string msg in 94 + throw ctx err 95 + 96 + let throw_range_error ctx msg = 97 + let err = Value.of_string msg in 98 + throw ctx err 99 + 100 + let throw_internal_error ctx msg = 101 + let err = Value.of_string msg in 102 + throw ctx err 103 + 104 + (* Class prototypes *) 105 + let set_class_proto ctx class_id proto = 106 + if class_id >= 0 && class_id < Array.length ctx.class_protos then 107 + ctx.class_protos.(class_id) <- proto 108 + 109 + let get_class_proto ctx class_id = 110 + if class_id >= 0 && class_id < Array.length ctx.class_protos then 111 + ctx.class_protos.(class_id) 112 + else 113 + Value.undefined 114 + 115 + (* Intrinsics - these will be implemented by the builtins modules *) 116 + 117 + let add_intrinsic_base_objects ctx = 118 + if not ctx.has_base_objects then begin 119 + ctx.has_base_objects <- true; 120 + (* TODO: Initialize Object, Function, Array, String, Number, Boolean, Error, etc. *) 121 + end 122 + 123 + let add_intrinsic_date ctx = 124 + if not ctx.has_date then begin 125 + ctx.has_date <- true; 126 + (* TODO: Initialize Date *) 127 + end 128 + 129 + let add_intrinsic_eval ctx = 130 + if not ctx.has_eval then begin 131 + ctx.has_eval <- true; 132 + (* TODO: Initialize eval() *) 133 + end 134 + 135 + let add_intrinsic_regexp ctx = 136 + if not ctx.has_regexp then begin 137 + ctx.has_regexp <- true; 138 + (* TODO: Initialize RegExp *) 139 + end 140 + 141 + let add_intrinsic_json ctx = 142 + if not ctx.has_json then begin 143 + ctx.has_json <- true; 144 + (* TODO: Initialize JSON *) 145 + end 146 + 147 + let add_intrinsic_proxy ctx = 148 + if not ctx.has_proxy then begin 149 + ctx.has_proxy <- true; 150 + (* TODO: Initialize Proxy, Reflect *) 151 + end 152 + 153 + let add_intrinsic_map_set ctx = 154 + if not ctx.has_map_set then begin 155 + ctx.has_map_set <- true; 156 + (* TODO: Initialize Map, Set, WeakMap, WeakSet *) 157 + end 158 + 159 + let add_intrinsic_typed_arrays ctx = 160 + if not ctx.has_typed_arrays then begin 161 + ctx.has_typed_arrays <- true; 162 + (* TODO: Initialize ArrayBuffer, TypedArrays, DataView *) 163 + end 164 + 165 + let add_intrinsic_promise ctx = 166 + if not ctx.has_promise then begin 167 + ctx.has_promise <- true; 168 + (* TODO: Initialize Promise *) 169 + end 170 + 171 + let add_intrinsic_weakref ctx = 172 + if not ctx.has_weakref then begin 173 + ctx.has_weakref <- true; 174 + (* TODO: Initialize WeakRef, FinalizationRegistry *) 175 + end 176 + 177 + (* Job queue for microtasks *) 178 + let enqueue_job ctx job = 179 + Queue.add job ctx.job_queue 180 + 181 + let is_job_pending ctx = 182 + not (Queue.is_empty ctx.job_queue) 183 + 184 + let execute_pending_job ctx = 185 + match Queue.take_opt ctx.job_queue with 186 + | None -> None 187 + | Some job -> 188 + let result = job () in 189 + if Value.is_exception result then 190 + Some (Error result) 191 + else 192 + Some (Ok result) 193 + 194 + let execute_all_pending_jobs ctx = 195 + let rec loop count = 196 + match execute_pending_job ctx with 197 + | None -> count 198 + | Some (Ok _) -> loop (count + 1) 199 + | Some (Error _) -> count (* Stop on first error *) 200 + in 201 + loop 0 202 + 203 + (* Atom convenience *) 204 + let intern ctx s = 205 + Runtime.intern ctx.runtime s 206 + 207 + let atoms ctx = 208 + Runtime.atoms ctx.runtime
+75
lib/quickjs/core/context.mli
··· 1 + (** JavaScript context. 2 + 3 + A context holds the JavaScript execution state. *) 4 + 5 + type t 6 + 7 + (** {1 Lifecycle} *) 8 + 9 + val create : Runtime.t -> t 10 + val create_raw : Runtime.t -> t 11 + 12 + (** {1 Access} *) 13 + 14 + val runtime : t -> Runtime.t 15 + val global_object : t -> Value.t 16 + 17 + (** {1 Opaque data} *) 18 + 19 + val set_opaque : t -> 'a -> unit 20 + val get_opaque : t -> 'a option 21 + 22 + (** {1 Exception handling} *) 23 + 24 + val throw : t -> Value.t -> Value.t 25 + (** [throw ctx exn] sets the current exception and returns JS_EXCEPTION *) 26 + 27 + val get_exception : t -> Value.t option 28 + (** [get_exception ctx] returns the current exception if one is pending *) 29 + 30 + val has_exception : t -> bool 31 + (** [has_exception ctx] returns true if an exception is pending *) 32 + 33 + val clear_exception : t -> unit 34 + (** [clear_exception ctx] clears any pending exception *) 35 + 36 + (** {2 Error constructors} *) 37 + 38 + val throw_type_error : t -> string -> Value.t 39 + val throw_syntax_error : t -> string -> Value.t 40 + val throw_reference_error : t -> string -> Value.t 41 + val throw_range_error : t -> string -> Value.t 42 + val throw_internal_error : t -> string -> Value.t 43 + 44 + (** {1 Class prototypes} *) 45 + 46 + val set_class_proto : t -> int -> Value.t -> unit 47 + val get_class_proto : t -> int -> Value.t 48 + 49 + (** {1 Intrinsics} 50 + 51 + These functions initialize built-in objects. They can be called 52 + selectively to reduce memory usage. *) 53 + 54 + val add_intrinsic_base_objects : t -> unit 55 + val add_intrinsic_date : t -> unit 56 + val add_intrinsic_eval : t -> unit 57 + val add_intrinsic_regexp : t -> unit 58 + val add_intrinsic_json : t -> unit 59 + val add_intrinsic_proxy : t -> unit 60 + val add_intrinsic_map_set : t -> unit 61 + val add_intrinsic_typed_arrays : t -> unit 62 + val add_intrinsic_promise : t -> unit 63 + val add_intrinsic_weakref : t -> unit 64 + 65 + (** {1 Job queue (microtasks)} *) 66 + 67 + val enqueue_job : t -> (unit -> Value.t) -> unit 68 + val is_job_pending : t -> bool 69 + val execute_pending_job : t -> (Value.t, Value.t) result option 70 + val execute_all_pending_jobs : t -> int 71 + 72 + (** {1 Atoms} *) 73 + 74 + val intern : t -> string -> Atom.t 75 + val atoms : t -> Atom.table
+6
lib/quickjs/core/dune
··· 1 + (library 2 + (name quickjs_core) 3 + (public_name ocaml-quickjs.core) 4 + (libraries zarith fmt) 5 + (flags (:standard -w -32-37-39-69)) ; Suppress various warnings during development 6 + (preprocess no_preprocessing))
+233
lib/quickjs/core/runtime.ml
··· 1 + (** JavaScript runtime. 2 + 3 + The runtime holds global state shared across all contexts: 4 + - Atom table (interned strings) 5 + - Class definitions 6 + - Memory limits and GC settings 7 + - Module loader configuration *) 8 + 9 + type memory_usage = { 10 + malloc_size : int; 11 + malloc_count : int; 12 + atom_count : int; 13 + atom_size : int; 14 + str_count : int; 15 + str_size : int; 16 + obj_count : int; 17 + obj_size : int; 18 + prop_count : int; 19 + shape_count : int; 20 + js_func_count : int; 21 + c_func_count : int; 22 + array_count : int; 23 + } 24 + 25 + type config = { 26 + memory_limit : int option; 27 + max_stack_size : int; 28 + gc_threshold : int; 29 + } 30 + 31 + let default_config = { 32 + memory_limit = None; 33 + max_stack_size = 1024 * 1024; (* 1MB default *) 34 + gc_threshold = 256 * 1024; (* 256KB *) 35 + } 36 + 37 + type class_def = { 38 + class_name : string; 39 + finalizer : (Value.js_object -> unit) option; 40 + gc_mark : (Value.js_object -> unit) option; 41 + call : (Value.t -> Value.t array -> Value.t) option; 42 + } 43 + 44 + type t = { 45 + mutable config : config; 46 + atoms : Atom.table; 47 + mutable classes : class_def array; 48 + mutable class_count : int; 49 + mutable opaque : Obj.t option; 50 + 51 + (* Statistics *) 52 + mutable obj_count : int; 53 + mutable str_count : int; 54 + 55 + (* Interrupt handler *) 56 + mutable interrupt_handler : (unit -> bool) option; 57 + 58 + (* Module loader *) 59 + mutable module_normalize : (string -> string -> string) option; 60 + mutable module_loader : (string -> Value.t option) option; 61 + } 62 + 63 + (* Built-in class IDs *) 64 + module Class_id = struct 65 + let object_ = 0 66 + let array = 1 67 + let function_ = 2 68 + let error = 3 69 + let number = 4 70 + let string = 5 71 + let boolean = 6 72 + let symbol = 7 73 + let arguments = 8 74 + let mapped_arguments = 9 75 + let date = 10 76 + let regexp = 11 77 + let json = 12 78 + let math = 13 79 + let reflect = 14 80 + let proxy = 15 81 + let promise = 16 82 + let promise_resolve_function = 17 83 + let promise_reject_function = 18 84 + let async_function = 19 85 + let async_function_resolve = 20 86 + let async_function_reject = 21 87 + let async_generator_function = 22 88 + let async_generator = 23 89 + let generator_function = 24 90 + let generator = 25 91 + let array_buffer = 26 92 + let shared_array_buffer = 27 93 + let dataview = 28 94 + let int8array = 29 95 + let uint8array = 30 96 + let uint8clampedarray = 31 97 + let int16array = 32 98 + let uint16array = 33 99 + let int32array = 34 100 + let uint32array = 35 101 + let bigint64array = 36 102 + let biguint64array = 37 103 + let float16array = 38 104 + let float32array = 39 105 + let float64array = 40 106 + let map = 41 107 + let set = 42 108 + let weakmap = 43 109 + let weakset = 44 110 + let weakref = 45 111 + let finalization_registry = 46 112 + let iterator = 47 113 + let map_iterator = 48 114 + let set_iterator = 49 115 + let array_iterator = 50 116 + let string_iterator = 51 117 + let regexp_string_iterator = 52 118 + let bigint = 53 119 + let atomics = 54 120 + 121 + let count = 55 122 + end 123 + 124 + let create () = 125 + let atoms = Atom.create_table () in 126 + Atom.init_table atoms; 127 + { 128 + config = default_config; 129 + atoms; 130 + classes = Array.make 64 { class_name = ""; finalizer = None; gc_mark = None; call = None }; 131 + class_count = Class_id.count; 132 + opaque = None; 133 + obj_count = 0; 134 + str_count = 0; 135 + interrupt_handler = None; 136 + module_normalize = None; 137 + module_loader = None; 138 + } 139 + 140 + let create_with_config config = 141 + let rt = create () in 142 + rt.config <- config; 143 + rt 144 + 145 + let set_memory_limit rt limit = 146 + rt.config <- { rt.config with memory_limit = Some limit } 147 + 148 + let set_max_stack_size rt size = 149 + rt.config <- { rt.config with max_stack_size = size } 150 + 151 + let set_gc_threshold rt threshold = 152 + rt.config <- { rt.config with gc_threshold = threshold } 153 + 154 + let run_gc _rt = 155 + (* Let OCaml's GC handle this *) 156 + Gc.full_major () 157 + 158 + let memory_usage rt = 159 + let gc_stat = Gc.stat () in 160 + { 161 + malloc_size = gc_stat.Gc.heap_words * (Sys.word_size / 8); 162 + malloc_count = 0; 163 + atom_count = Atom.count rt.atoms; 164 + atom_size = 0; (* Would need to calculate *) 165 + str_count = rt.str_count; 166 + str_size = 0; 167 + obj_count = rt.obj_count; 168 + obj_size = 0; 169 + prop_count = 0; 170 + shape_count = 0; 171 + js_func_count = 0; 172 + c_func_count = 0; 173 + array_count = 0; 174 + } 175 + 176 + let pp_memory_usage fmt usage = 177 + Format.fprintf fmt "@[<v>"; 178 + Format.fprintf fmt "Memory allocated: %d bytes@," usage.malloc_size; 179 + Format.fprintf fmt "Atom count: %d@," usage.atom_count; 180 + Format.fprintf fmt "String count: %d@," usage.str_count; 181 + Format.fprintf fmt "Object count: %d@," usage.obj_count; 182 + Format.fprintf fmt "@]" 183 + 184 + let set_opaque rt opaque = 185 + rt.opaque <- Some (Obj.repr opaque) 186 + 187 + let get_opaque rt = 188 + Option.map Obj.obj rt.opaque 189 + 190 + let set_interrupt_handler rt handler = 191 + rt.interrupt_handler <- handler 192 + 193 + let check_interrupt rt = 194 + match rt.interrupt_handler with 195 + | None -> false 196 + | Some handler -> handler () 197 + 198 + let new_class_id rt = 199 + let id = rt.class_count in 200 + rt.class_count <- id + 1; 201 + if id >= Array.length rt.classes then begin 202 + let new_arr = Array.make (Array.length rt.classes * 2) 203 + { class_name = ""; finalizer = None; gc_mark = None; call = None } in 204 + Array.blit rt.classes 0 new_arr 0 (Array.length rt.classes); 205 + rt.classes <- new_arr 206 + end; 207 + id 208 + 209 + let register_class rt class_id class_def = 210 + if class_id >= Array.length rt.classes then begin 211 + let new_arr = Array.make (max (class_id + 1) (Array.length rt.classes * 2)) 212 + { class_name = ""; finalizer = None; gc_mark = None; call = None } in 213 + Array.blit rt.classes 0 new_arr 0 (Array.length rt.classes); 214 + rt.classes <- new_arr 215 + end; 216 + rt.classes.(class_id) <- class_def 217 + 218 + let get_class rt class_id = 219 + if class_id >= 0 && class_id < Array.length rt.classes then 220 + Some rt.classes.(class_id) 221 + else 222 + None 223 + 224 + let is_class_registered rt class_id = 225 + class_id >= 0 && class_id < rt.class_count 226 + 227 + let set_module_loader rt ~normalize ~loader = 228 + rt.module_normalize <- normalize; 229 + rt.module_loader <- loader 230 + 231 + let atoms rt = rt.atoms 232 + 233 + let intern rt s = Atom.intern rt.atoms s
+109
lib/quickjs/core/runtime.mli
··· 1 + (** JavaScript runtime. 2 + 3 + The runtime holds global state shared across all contexts. *) 4 + 5 + (** Memory usage statistics *) 6 + type memory_usage = { 7 + malloc_size : int; 8 + malloc_count : int; 9 + atom_count : int; 10 + atom_size : int; 11 + str_count : int; 12 + str_size : int; 13 + obj_count : int; 14 + obj_size : int; 15 + prop_count : int; 16 + shape_count : int; 17 + js_func_count : int; 18 + c_func_count : int; 19 + array_count : int; 20 + } 21 + 22 + (** Runtime configuration *) 23 + type config = { 24 + memory_limit : int option; 25 + max_stack_size : int; 26 + gc_threshold : int; 27 + } 28 + 29 + val default_config : config 30 + 31 + (** Class definition for custom object types *) 32 + type class_def = { 33 + class_name : string; 34 + finalizer : (Value.js_object -> unit) option; 35 + gc_mark : (Value.js_object -> unit) option; 36 + call : (Value.t -> Value.t array -> Value.t) option; 37 + } 38 + 39 + (** Runtime type *) 40 + type t 41 + 42 + (** Built-in class IDs *) 43 + module Class_id : sig 44 + val object_ : int 45 + val array : int 46 + val function_ : int 47 + val error : int 48 + val number : int 49 + val string : int 50 + val boolean : int 51 + val symbol : int 52 + val arguments : int 53 + val date : int 54 + val regexp : int 55 + val promise : int 56 + val generator : int 57 + val array_buffer : int 58 + val map : int 59 + val set : int 60 + val weakmap : int 61 + val weakset : int 62 + val count : int 63 + end 64 + 65 + (** {1 Lifecycle} *) 66 + 67 + val create : unit -> t 68 + val create_with_config : config -> t 69 + 70 + (** {1 Configuration} *) 71 + 72 + val set_memory_limit : t -> int -> unit 73 + val set_max_stack_size : t -> int -> unit 74 + val set_gc_threshold : t -> int -> unit 75 + val run_gc : t -> unit 76 + 77 + (** {1 Memory usage} *) 78 + 79 + val memory_usage : t -> memory_usage 80 + val pp_memory_usage : Format.formatter -> memory_usage -> unit 81 + 82 + (** {1 Opaque data} *) 83 + 84 + val set_opaque : t -> 'a -> unit 85 + val get_opaque : t -> 'a option 86 + 87 + (** {1 Interrupt handling} *) 88 + 89 + val set_interrupt_handler : t -> (unit -> bool) option -> unit 90 + val check_interrupt : t -> bool 91 + 92 + (** {1 Class management} *) 93 + 94 + val new_class_id : t -> int 95 + val register_class : t -> int -> class_def -> unit 96 + val get_class : t -> int -> class_def option 97 + val is_class_registered : t -> int -> bool 98 + 99 + (** {1 Module loading} *) 100 + 101 + val set_module_loader : t -> 102 + normalize:(string -> string -> string) option -> 103 + loader:(string -> Value.t option) option -> 104 + unit 105 + 106 + (** {1 Atoms} *) 107 + 108 + val atoms : t -> Atom.table 109 + val intern : t -> string -> Atom.t
+83
lib/quickjs/core/tag.ml
··· 1 + (** JavaScript value tags. 2 + 3 + These correspond to JS_TAG_* in quickjs.h. 4 + Tags with negative values are reference-counted heap objects. 5 + Tags with non-negative values are immediate/stack values. *) 6 + 7 + type t = 8 + (* Heap objects (reference counted in C version, OCaml GC handles this) *) 9 + | Big_int (** Heap-allocated BigInt *) 10 + | Symbol (** Symbol *) 11 + | String (** String *) 12 + | Module (** Module (internal) *) 13 + | Function_bytecode (** Bytecode function (internal) *) 14 + | Object (** Object *) 15 + 16 + (* Immediate values *) 17 + | Int (** Small integer (int32 range) *) 18 + | Bool (** Boolean *) 19 + | Null (** null *) 20 + | Undefined (** undefined *) 21 + | Uninitialized (** Uninitialized binding (internal) *) 22 + | Catch_offset (** Exception handler offset (internal) *) 23 + | Exception (** Exception marker *) 24 + | Float64 (** 64-bit float *) 25 + 26 + let to_int = function 27 + | Big_int -> -9 28 + | Symbol -> -8 29 + | String -> -7 30 + | Module -> -3 31 + | Function_bytecode -> -2 32 + | Object -> -1 33 + | Int -> 0 34 + | Bool -> 1 35 + | Null -> 2 36 + | Undefined -> 3 37 + | Uninitialized -> 4 38 + | Catch_offset -> 5 39 + | Exception -> 6 40 + | Float64 -> 8 41 + 42 + let of_int = function 43 + | -9 -> Big_int 44 + | -8 -> Symbol 45 + | -7 -> String 46 + | -3 -> Module 47 + | -2 -> Function_bytecode 48 + | -1 -> Object 49 + | 0 -> Int 50 + | 1 -> Bool 51 + | 2 -> Null 52 + | 3 -> Undefined 53 + | 4 -> Uninitialized 54 + | 5 -> Catch_offset 55 + | 6 -> Exception 56 + | 8 -> Float64 57 + | n -> invalid_arg (Printf.sprintf "Tag.of_int: invalid tag %d" n) 58 + 59 + let is_heap_object = function 60 + | Big_int | Symbol | String | Module | Function_bytecode | Object -> true 61 + | Int | Bool | Null | Undefined | Uninitialized | Catch_offset | Exception | Float64 -> false 62 + 63 + let to_string = function 64 + | Big_int -> "bigint" 65 + | Symbol -> "symbol" 66 + | String -> "string" 67 + | Module -> "module" 68 + | Function_bytecode -> "function" 69 + | Object -> "object" 70 + | Int -> "number" 71 + | Bool -> "boolean" 72 + | Null -> "object" (* typeof null === "object" per spec *) 73 + | Undefined -> "undefined" 74 + | Uninitialized -> "undefined" 75 + | Catch_offset -> "internal" 76 + | Exception -> "exception" 77 + | Float64 -> "number" 78 + 79 + let pp fmt t = Format.pp_print_string fmt (to_string t) 80 + 81 + let equal (a : t) (b : t) = a = b 82 + 83 + let compare (a : t) (b : t) = Int.compare (to_int a) (to_int b)
+32
lib/quickjs/core/tag.mli
··· 1 + (** JavaScript value tags. 2 + 3 + These correspond to JS_TAG_* in quickjs.h. *) 4 + 5 + type t = 6 + | Big_int 7 + | Symbol 8 + | String 9 + | Module 10 + | Function_bytecode 11 + | Object 12 + | Int 13 + | Bool 14 + | Null 15 + | Undefined 16 + | Uninitialized 17 + | Catch_offset 18 + | Exception 19 + | Float64 20 + 21 + val to_int : t -> int 22 + val of_int : int -> t 23 + 24 + (** [is_heap_object t] returns true if values with this tag are heap-allocated *) 25 + val is_heap_object : t -> bool 26 + 27 + (** [to_string t] returns the JavaScript typeof result for this tag *) 28 + val to_string : t -> string 29 + 30 + val pp : Format.formatter -> t -> unit 31 + val equal : t -> t -> bool 32 + val compare : t -> t -> int
+287
lib/quickjs/core/value.ml
··· 1 + (** JavaScript value representation. 2 + 3 + Unlike the C version which uses NaN-boxing or tagged pointers, 4 + we use a straightforward OCaml variant type and let the OCaml GC 5 + handle memory management. *) 6 + 7 + type js_object = { 8 + mutable class_id : int; 9 + mutable extensible : bool; 10 + mutable properties : (int, property) Hashtbl.t; (* atom -> property *) 11 + mutable prototype : t option; 12 + mutable opaque : Obj.t option; (* For custom class data *) 13 + } 14 + 15 + and property = { 16 + mutable prop_flags : int; 17 + mutable prop_value : property_value; 18 + } 19 + 20 + and property_value = 21 + | Data of t 22 + | Accessor of { get : t option; set : t option } 23 + 24 + (** The core JavaScript value type *) 25 + and t = 26 + | Undefined 27 + | Null 28 + | Bool of bool 29 + | Int of int32 30 + | Float of float 31 + | String of js_string 32 + | Symbol of symbol 33 + | BigInt of Z.t 34 + | Object of js_object 35 + | Exception (** Marker indicating an exception was thrown *) 36 + 37 + and js_string = { 38 + mutable str_value : string_repr; 39 + mutable str_hash : int option; 40 + } 41 + 42 + and string_repr = 43 + | Flat of string (** UTF-8 encoded string *) 44 + | Rope of js_string * js_string (** For efficient concatenation *) 45 + 46 + and symbol = { 47 + id : int; 48 + description : string option; 49 + } 50 + 51 + (* Property flags - matching quickjs.h *) 52 + let prop_configurable = 1 lsl 0 53 + let prop_writable = 1 lsl 1 54 + let prop_enumerable = 1 lsl 2 55 + let prop_c_w_e = prop_configurable lor prop_writable lor prop_enumerable 56 + 57 + (* Special singleton values *) 58 + let undefined = Undefined 59 + let null = Null 60 + let exception_val = Exception 61 + 62 + let of_bool b = Bool b 63 + let true_ = Bool true 64 + let false_ = Bool false 65 + 66 + let of_int32 n = Int n 67 + 68 + let of_int n = 69 + if n >= Int32.(to_int min_int) && n <= Int32.(to_int max_int) then 70 + Int (Int32.of_int n) 71 + else 72 + Float (Float.of_int n) 73 + 74 + let of_float f = 75 + (* Try to represent as int32 if possible (like QuickJS does) *) 76 + if Float.is_integer f && f >= Int32.(to_float min_int) && f <= Int32.(to_float max_int) then 77 + let i = Int32.of_float f in 78 + (* Check for -0.0 which must stay as float *) 79 + if Int32.to_float i = f && not (f = 0.0 && Float.sign_bit f) then 80 + Int i 81 + else 82 + Float f 83 + else 84 + Float f 85 + 86 + let of_string s = 87 + String { str_value = Flat s; str_hash = None } 88 + 89 + let of_bigint z = BigInt z 90 + 91 + let mk_object ?(class_id = 0) () = 92 + Object { 93 + class_id; 94 + extensible = true; 95 + properties = Hashtbl.create 8; 96 + prototype = None; 97 + opaque = None; 98 + } 99 + 100 + (* Symbol ID counter *) 101 + let next_symbol_id = ref 0 102 + 103 + let mk_symbol ?description () = 104 + let id = !next_symbol_id in 105 + incr next_symbol_id; 106 + Symbol { id; description } 107 + 108 + (* Tag extraction *) 109 + let tag = function 110 + | Undefined -> Tag.Undefined 111 + | Null -> Tag.Null 112 + | Bool _ -> Tag.Bool 113 + | Int _ -> Tag.Int 114 + | Float _ -> Tag.Float64 115 + | String _ -> Tag.String 116 + | Symbol _ -> Tag.Symbol 117 + | BigInt _ -> Tag.Big_int 118 + | Object _ -> Tag.Object 119 + | Exception -> Tag.Exception 120 + 121 + (* Type predicates *) 122 + let is_undefined = function Undefined -> true | _ -> false 123 + let is_null = function Null -> true | _ -> false 124 + let is_nullish = function Undefined | Null -> true | _ -> false 125 + let is_bool = function Bool _ -> true | _ -> false 126 + let is_number = function Int _ | Float _ -> true | _ -> false 127 + let is_string = function String _ -> true | _ -> false 128 + let is_symbol = function Symbol _ -> true | _ -> false 129 + let is_bigint = function BigInt _ -> true | _ -> false 130 + let is_object = function Object _ -> true | _ -> false 131 + let is_exception = function Exception -> true | _ -> false 132 + 133 + let is_primitive = function 134 + | Undefined | Null | Bool _ | Int _ | Float _ | String _ | Symbol _ | BigInt _ -> true 135 + | Object _ | Exception -> false 136 + 137 + (* String utilities *) 138 + let rec flatten_string js_str = 139 + match js_str.str_value with 140 + | Flat s -> s 141 + | Rope (left, right) -> 142 + let s = flatten_string left ^ flatten_string right in 143 + js_str.str_value <- Flat s; 144 + s 145 + 146 + let string_value = function 147 + | String s -> Some (flatten_string s) 148 + | _ -> None 149 + 150 + let rec string_length js_str = 151 + match js_str.str_value with 152 + | Flat s -> String.length s (* Note: this is byte length, not UTF-16 code unit count *) 153 + | Rope (left, right) -> string_length left + string_length right 154 + 155 + (* Number utilities *) 156 + let to_number_opt = function 157 + | Int n -> Some (Int32.to_float n) 158 + | Float f -> Some f 159 + | _ -> None 160 + 161 + let int32_value = function 162 + | Int n -> Some n 163 + | Float f when Float.is_integer f -> 164 + let i = Int32.of_float f in 165 + if Int32.to_float i = f then Some i else None 166 + | _ -> None 167 + 168 + let float_value = function 169 + | Int n -> Some (Int32.to_float n) 170 + | Float f -> Some f 171 + | _ -> None 172 + 173 + (* BigInt utilities *) 174 + let bigint_value = function 175 + | BigInt z -> Some z 176 + | _ -> None 177 + 178 + (* Object utilities *) 179 + let object_value = function 180 + | Object obj -> Some obj 181 + | _ -> None 182 + 183 + let get_prototype = function 184 + | Object obj -> obj.prototype 185 + | _ -> None 186 + 187 + let set_prototype v proto = 188 + match v with 189 + | Object obj -> obj.prototype <- proto 190 + | _ -> () 191 + 192 + (* Strict equality (===) *) 193 + let strict_equal a b = 194 + match a, b with 195 + | Undefined, Undefined -> true 196 + | Null, Null -> true 197 + | Bool x, Bool y -> x = y 198 + | Int x, Int y -> x = y 199 + | Float x, Float y -> 200 + (* NaN !== NaN *) 201 + if Float.is_nan x || Float.is_nan y then false 202 + else x = y 203 + | Int x, Float y -> Int32.to_float x = y 204 + | Float x, Int y -> x = Int32.to_float y 205 + | String x, String y -> flatten_string x = flatten_string y 206 + | Symbol x, Symbol y -> x.id = y.id 207 + | BigInt x, BigInt y -> Z.equal x y 208 + | Object x, Object y -> x == y (* Object identity *) 209 + | Exception, Exception -> true 210 + | _ -> false 211 + 212 + (* SameValue (used by Object.is) *) 213 + let same_value a b = 214 + match a, b with 215 + | Float x, Float y -> 216 + (* SameValue treats NaN as equal to NaN, and distinguishes +0/-0 *) 217 + if Float.is_nan x && Float.is_nan y then true 218 + else if x = 0.0 && y = 0.0 then Float.sign_bit x = Float.sign_bit y 219 + else x = y 220 + | Int 0l, Float y when y = 0.0 -> not (Float.sign_bit y) 221 + | Float x, Int 0l when x = 0.0 -> not (Float.sign_bit x) 222 + | _ -> strict_equal a b 223 + 224 + (* SameValueZero (used by Map/Set) *) 225 + let same_value_zero a b = 226 + match a, b with 227 + | Float x, Float y -> 228 + (* SameValueZero treats NaN as equal, but +0 == -0 *) 229 + if Float.is_nan x && Float.is_nan y then true 230 + else x = y 231 + | _ -> strict_equal a b 232 + 233 + (* Pretty printing *) 234 + let rec pp fmt = function 235 + | Undefined -> Format.fprintf fmt "undefined" 236 + | Null -> Format.fprintf fmt "null" 237 + | Bool true -> Format.fprintf fmt "true" 238 + | Bool false -> Format.fprintf fmt "false" 239 + | Int n -> Format.fprintf fmt "%ld" n 240 + | Float f -> 241 + if Float.is_nan f then Format.fprintf fmt "NaN" 242 + else if f = infinity || f = neg_infinity then 243 + if f > 0.0 then Format.fprintf fmt "Infinity" 244 + else Format.fprintf fmt "-Infinity" 245 + else if f = 0.0 && Float.sign_bit f then Format.fprintf fmt "-0" 246 + else Format.fprintf fmt "%g" f 247 + | String s -> Format.fprintf fmt "\"%s\"" (String.escaped (flatten_string s)) 248 + | Symbol { description = Some d; _ } -> Format.fprintf fmt "Symbol(%s)" d 249 + | Symbol { description = None; _ } -> Format.fprintf fmt "Symbol()" 250 + | BigInt z -> Format.fprintf fmt "%sn" (Z.to_string z) 251 + | Object obj -> 252 + Format.fprintf fmt "[object %s]" 253 + (match obj.class_id with 254 + | 0 -> "Object" 255 + | 1 -> "Array" 256 + | 2 -> "Function" 257 + | _ -> "Object") 258 + | Exception -> Format.fprintf fmt "[exception]" 259 + 260 + let show v = 261 + Format.asprintf "%a" pp v 262 + 263 + let typeof v = Tag.to_string (tag v) 264 + 265 + (* Hashing for use in collections *) 266 + let hash = function 267 + | Undefined -> Hashtbl.hash `Undefined 268 + | Null -> Hashtbl.hash `Null 269 + | Bool b -> Hashtbl.hash b 270 + | Int n -> Int32.to_int n 271 + | Float f -> Hashtbl.hash f 272 + | String s -> Hashtbl.hash (flatten_string s) 273 + | Symbol s -> s.id 274 + | BigInt z -> Z.hash z 275 + | Object obj -> Hashtbl.hash (Obj.repr obj) 276 + | Exception -> Hashtbl.hash `Exception 277 + 278 + let equal = strict_equal 279 + 280 + let compare a b = 281 + match a, b with 282 + | Int x, Int y -> Int32.compare x y 283 + | Float x, Float y -> Float.compare x y 284 + | Int x, Float y -> Float.compare (Int32.to_float x) y 285 + | Float x, Int y -> Float.compare x (Int32.to_float y) 286 + | String x, String y -> String.compare (flatten_string x) (flatten_string y) 287 + | _ -> Stdlib.compare (tag a) (tag b)
+110
lib/quickjs/core/value.mli
··· 1 + (** JavaScript value representation. 2 + 3 + This module provides the core value type for the JavaScript engine. 4 + Values are represented using OCaml's type system and GC. *) 5 + 6 + (** {1 Types} *) 7 + 8 + (** JavaScript string with lazy flattening for rope optimization *) 9 + type js_string 10 + 11 + (** JavaScript symbol *) 12 + type symbol = { 13 + id : int; 14 + description : string option; 15 + } 16 + 17 + (** Property descriptor *) 18 + type property 19 + 20 + (** JavaScript object *) 21 + type js_object 22 + 23 + (** The core JavaScript value type *) 24 + type t = 25 + | Undefined 26 + | Null 27 + | Bool of bool 28 + | Int of int32 29 + | Float of float 30 + | String of js_string 31 + | Symbol of symbol 32 + | BigInt of Z.t 33 + | Object of js_object 34 + | Exception 35 + 36 + (** {1 Property flags} *) 37 + 38 + val prop_configurable : int 39 + val prop_writable : int 40 + val prop_enumerable : int 41 + val prop_c_w_e : int 42 + 43 + (** {1 Constructors} *) 44 + 45 + val undefined : t 46 + val null : t 47 + val exception_val : t 48 + val true_ : t 49 + val false_ : t 50 + 51 + val of_bool : bool -> t 52 + val of_int : int -> t 53 + val of_int32 : int32 -> t 54 + val of_float : float -> t 55 + val of_string : string -> t 56 + val of_bigint : Z.t -> t 57 + val mk_object : ?class_id:int -> unit -> t 58 + val mk_symbol : ?description:string -> unit -> t 59 + 60 + (** {1 Tag inspection} *) 61 + 62 + val tag : t -> Tag.t 63 + val is_undefined : t -> bool 64 + val is_null : t -> bool 65 + val is_nullish : t -> bool 66 + val is_bool : t -> bool 67 + val is_number : t -> bool 68 + val is_string : t -> bool 69 + val is_symbol : t -> bool 70 + val is_bigint : t -> bool 71 + val is_object : t -> bool 72 + val is_exception : t -> bool 73 + val is_primitive : t -> bool 74 + 75 + (** {1 Value extraction} *) 76 + 77 + val string_value : t -> string option 78 + val int32_value : t -> int32 option 79 + val float_value : t -> float option 80 + val bigint_value : t -> Z.t option 81 + val object_value : t -> js_object option 82 + val to_number_opt : t -> float option 83 + 84 + (** {1 String operations} *) 85 + 86 + val flatten_string : js_string -> string 87 + val string_length : js_string -> int 88 + 89 + (** {1 Object operations} *) 90 + 91 + val get_prototype : t -> t option 92 + val set_prototype : t -> t option -> unit 93 + 94 + (** {1 Comparison} *) 95 + 96 + val strict_equal : t -> t -> bool 97 + val same_value : t -> t -> bool 98 + val same_value_zero : t -> t -> bool 99 + val equal : t -> t -> bool 100 + val compare : t -> t -> int 101 + val hash : t -> int 102 + 103 + (** {1 Type coercion helpers} *) 104 + 105 + val typeof : t -> string 106 + 107 + (** {1 Pretty printing} *) 108 + 109 + val pp : Format.formatter -> t -> unit 110 + val show : t -> string
+6
lib/quickjs/dune
··· 1 + ; Main quickjs library - re-exports core modules 2 + (library 3 + (name quickjs) 4 + (public_name ocaml-quickjs) 5 + (libraries quickjs_core quickjs_parser) 6 + (preprocess no_preprocessing))
+6
lib/quickjs/parser/dune
··· 1 + (library 2 + (name quickjs_parser) 3 + (public_name ocaml-quickjs.parser) 4 + (libraries quickjs_core) 5 + (flags (:standard -w -32-37-39-69)) ; Suppress various warnings during development 6 + (preprocess no_preprocessing))
+758
lib/quickjs/parser/lexer.ml
··· 1 + (** Handwritten JavaScript lexer. 2 + 3 + This is a recursive-descent lexer similar to QuickJS's implementation. 4 + It handles all ES2024 syntax including template literals, Unicode 5 + identifiers, and BigInt. *) 6 + 7 + type error = 8 + | Unexpected_char of char 9 + | Unexpected_eof 10 + | Invalid_number of string 11 + | Invalid_escape_sequence of string 12 + | Invalid_unicode_escape of string 13 + | Unterminated_string 14 + | Unterminated_comment 15 + | Unterminated_regexp 16 + | Unterminated_template 17 + | Invalid_regexp_flag of char 18 + | Legacy_octal_in_strict_mode 19 + 20 + exception Lexer_error of error * Source.loc 21 + 22 + type t = { 23 + cursor : Source.cursor; 24 + mutable strict_mode : bool; 25 + mutable allow_regexp : bool; (* Context-dependent: after certain tokens *) 26 + mutable newline_before : bool; 27 + errors : (error * Source.loc) list ref; 28 + } 29 + 30 + let create ~filename ~content = 31 + let file = Source.create_file ~filename ~content in 32 + { 33 + cursor = Source.create_cursor file; 34 + strict_mode = false; 35 + allow_regexp = true; 36 + newline_before = false; 37 + errors = ref []; 38 + } 39 + 40 + let set_strict_mode lexer strict = 41 + lexer.strict_mode <- strict 42 + 43 + let error lexer err = 44 + let loc = Source.cursor_loc lexer.cursor in 45 + raise (Lexer_error (err, loc)) 46 + 47 + (* Character classification *) 48 + let is_whitespace = function 49 + | ' ' | '\t' | '\x0b' | '\x0c' | '\xa0' -> true 50 + | _ -> false 51 + 52 + let is_line_terminator = function 53 + | '\n' | '\r' -> true 54 + | _ -> false 55 + 56 + let is_digit = function 57 + | '0'..'9' -> true 58 + | _ -> false 59 + 60 + let is_hex_digit = function 61 + | '0'..'9' | 'a'..'f' | 'A'..'F' -> true 62 + | _ -> false 63 + 64 + let is_octal_digit = function 65 + | '0'..'7' -> true 66 + | _ -> false 67 + 68 + let is_binary_digit = function 69 + | '0' | '1' -> true 70 + | _ -> false 71 + 72 + let is_identifier_start = function 73 + | 'a'..'z' | 'A'..'Z' | '_' | '$' -> true 74 + | _ -> false (* TODO: Unicode ID_Start *) 75 + 76 + let is_identifier_continue = function 77 + | 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' | '$' -> true 78 + | _ -> false (* TODO: Unicode ID_Continue *) 79 + 80 + let hex_value = function 81 + | '0'..'9' as c -> Char.code c - Char.code '0' 82 + | 'a'..'f' as c -> Char.code c - Char.code 'a' + 10 83 + | 'A'..'F' as c -> Char.code c - Char.code 'A' + 10 84 + | _ -> -1 85 + 86 + (* Skip whitespace and comments *) 87 + let rec skip_whitespace_and_comments lexer = 88 + let cursor = lexer.cursor in 89 + match Source.cursor_peek cursor with 90 + | None -> () 91 + | Some c when is_whitespace c -> 92 + Source.cursor_advance cursor; 93 + skip_whitespace_and_comments lexer 94 + | Some '\r' -> 95 + Source.cursor_advance cursor; 96 + (* Handle \r\n as single line terminator *) 97 + (match Source.cursor_peek cursor with 98 + | Some '\n' -> Source.cursor_advance cursor 99 + | _ -> ()); 100 + lexer.newline_before <- true; 101 + skip_whitespace_and_comments lexer 102 + | Some '\n' -> 103 + Source.cursor_advance cursor; 104 + lexer.newline_before <- true; 105 + skip_whitespace_and_comments lexer 106 + | Some '/' -> 107 + (match Source.cursor_peek_n cursor 2 with 108 + | Some "//" -> 109 + Source.cursor_advance_n cursor 2; 110 + skip_line_comment lexer 111 + | Some "/*" -> 112 + Source.cursor_advance_n cursor 2; 113 + skip_block_comment lexer 114 + | _ -> ()) 115 + | _ -> () 116 + 117 + and skip_line_comment lexer = 118 + let cursor = lexer.cursor in 119 + let rec loop () = 120 + match Source.cursor_peek cursor with 121 + | None -> () 122 + | Some c when is_line_terminator c -> () (* Don't consume the newline *) 123 + | Some _ -> 124 + Source.cursor_advance cursor; 125 + loop () 126 + in 127 + loop (); 128 + skip_whitespace_and_comments lexer 129 + 130 + and skip_block_comment lexer = 131 + let cursor = lexer.cursor in 132 + let rec loop () = 133 + match Source.cursor_peek cursor with 134 + | None -> error lexer Unterminated_comment 135 + | Some '*' -> 136 + Source.cursor_advance cursor; 137 + (match Source.cursor_peek cursor with 138 + | Some '/' -> 139 + Source.cursor_advance cursor; 140 + skip_whitespace_and_comments lexer 141 + | _ -> loop ()) 142 + | Some c when is_line_terminator c -> 143 + lexer.newline_before <- true; 144 + Source.cursor_advance cursor; 145 + loop () 146 + | Some _ -> 147 + Source.cursor_advance cursor; 148 + loop () 149 + in 150 + loop () 151 + 152 + (* Scan number literal *) 153 + let rec scan_number lexer = 154 + let cursor = lexer.cursor in 155 + Source.cursor_mark cursor; 156 + 157 + let first = Source.cursor_peek cursor in 158 + match first with 159 + | Some '0' -> 160 + Source.cursor_advance cursor; 161 + (match Source.cursor_peek cursor with 162 + | Some ('x' | 'X') -> 163 + Source.cursor_advance cursor; 164 + scan_hex_number lexer 165 + | Some ('o' | 'O') -> 166 + Source.cursor_advance cursor; 167 + scan_octal_number lexer 168 + | Some ('b' | 'B') -> 169 + Source.cursor_advance cursor; 170 + scan_binary_number lexer 171 + | Some c when is_octal_digit c -> 172 + scan_legacy_octal_number lexer 173 + | Some '.' -> 174 + scan_decimal_fraction lexer 175 + | Some ('e' | 'E') -> 176 + scan_decimal_exponent lexer 177 + | Some 'n' -> 178 + Source.cursor_advance cursor; 179 + Token.BigInt "0" 180 + | _ -> 181 + Token.Number (0.0, Token.Decimal)) 182 + | Some '.' -> 183 + Source.cursor_advance cursor; 184 + scan_decimal_fraction lexer 185 + | _ -> 186 + scan_decimal_integer lexer 187 + 188 + and scan_decimal_integer lexer = 189 + let cursor = lexer.cursor in 190 + Source.cursor_skip_while cursor (fun c -> is_digit c || c = '_'); 191 + match Source.cursor_peek cursor with 192 + | Some '.' -> 193 + Source.cursor_advance cursor; 194 + scan_decimal_fraction lexer 195 + | Some ('e' | 'E') -> 196 + scan_decimal_exponent lexer 197 + | Some 'n' -> 198 + let s = Source.cursor_slice cursor in 199 + let s = String.concat "" (String.split_on_char '_' s) in 200 + Source.cursor_advance cursor; 201 + Token.BigInt s 202 + | _ -> 203 + let s = Source.cursor_slice cursor in 204 + let s = String.concat "" (String.split_on_char '_' s) in 205 + Token.Number (float_of_string s, Token.Decimal) 206 + 207 + and scan_decimal_fraction lexer = 208 + let cursor = lexer.cursor in 209 + Source.cursor_skip_while cursor (fun c -> is_digit c || c = '_'); 210 + match Source.cursor_peek cursor with 211 + | Some ('e' | 'E') -> 212 + scan_decimal_exponent lexer 213 + | _ -> 214 + let s = Source.cursor_slice cursor in 215 + let s = String.concat "" (String.split_on_char '_' s) in 216 + Token.Number (float_of_string s, Token.Decimal) 217 + 218 + and scan_decimal_exponent lexer = 219 + let cursor = lexer.cursor in 220 + Source.cursor_advance cursor; (* Skip 'e' or 'E' *) 221 + (match Source.cursor_peek cursor with 222 + | Some ('+' | '-') -> Source.cursor_advance cursor 223 + | _ -> ()); 224 + Source.cursor_skip_while cursor (fun c -> is_digit c || c = '_'); 225 + let s = Source.cursor_slice cursor in 226 + let s = String.concat "" (String.split_on_char '_' s) in 227 + Token.Number (float_of_string s, Token.Decimal) 228 + 229 + and scan_hex_number lexer = 230 + let cursor = lexer.cursor in 231 + Source.cursor_skip_while cursor (fun c -> is_hex_digit c || c = '_'); 232 + match Source.cursor_peek cursor with 233 + | Some 'n' -> 234 + let s = Source.cursor_slice cursor in 235 + Source.cursor_advance cursor; 236 + Token.BigInt s 237 + | _ -> 238 + let s = Source.cursor_slice cursor in 239 + let s = String.concat "" (String.split_on_char '_' s) in 240 + let hex_part = String.sub s 2 (String.length s - 2) in 241 + Token.Number (float_of_int (int_of_string ("0x" ^ hex_part)), Token.Hex) 242 + 243 + and scan_octal_number lexer = 244 + let cursor = lexer.cursor in 245 + Source.cursor_skip_while cursor (fun c -> is_octal_digit c || c = '_'); 246 + match Source.cursor_peek cursor with 247 + | Some 'n' -> 248 + let s = Source.cursor_slice cursor in 249 + Source.cursor_advance cursor; 250 + Token.BigInt s 251 + | _ -> 252 + let s = Source.cursor_slice cursor in 253 + let s = String.concat "" (String.split_on_char '_' s) in 254 + let oct_part = String.sub s 2 (String.length s - 2) in 255 + Token.Number (float_of_int (int_of_string ("0o" ^ oct_part)), Token.Octal) 256 + 257 + and scan_binary_number lexer = 258 + let cursor = lexer.cursor in 259 + Source.cursor_skip_while cursor (fun c -> is_binary_digit c || c = '_'); 260 + match Source.cursor_peek cursor with 261 + | Some 'n' -> 262 + let s = Source.cursor_slice cursor in 263 + Source.cursor_advance cursor; 264 + Token.BigInt s 265 + | _ -> 266 + let s = Source.cursor_slice cursor in 267 + let s = String.concat "" (String.split_on_char '_' s) in 268 + let bin_part = String.sub s 2 (String.length s - 2) in 269 + Token.Number (float_of_int (int_of_string ("0b" ^ bin_part)), Token.Binary) 270 + 271 + and scan_legacy_octal_number lexer = 272 + let cursor = lexer.cursor in 273 + if lexer.strict_mode then 274 + error lexer Legacy_octal_in_strict_mode; 275 + Source.cursor_skip_while cursor is_octal_digit; 276 + let s = Source.cursor_slice cursor in 277 + Token.Number (float_of_int (int_of_string ("0o" ^ String.sub s 1 (String.length s - 1))), Token.Legacy_octal) 278 + 279 + (* Scan string literal *) 280 + and scan_string lexer quote = 281 + let cursor = lexer.cursor in 282 + Source.cursor_advance cursor; (* Skip opening quote *) 283 + let buf = Buffer.create 64 in 284 + let rec loop () = 285 + match Source.cursor_peek cursor with 286 + | None -> error lexer Unterminated_string 287 + | Some c when c = quote -> 288 + Source.cursor_advance cursor; 289 + Buffer.contents buf 290 + | Some '\\' -> 291 + Source.cursor_advance cursor; 292 + Buffer.add_char buf (scan_escape_sequence lexer); 293 + loop () 294 + | Some c when is_line_terminator c -> 295 + error lexer Unterminated_string 296 + | Some c -> 297 + Source.cursor_advance cursor; 298 + Buffer.add_char buf c; 299 + loop () 300 + in 301 + let s = loop () in 302 + let kind = if quote = '\'' then Token.Single_quoted else Token.Double_quoted in 303 + Token.String (s, kind) 304 + 305 + and scan_escape_sequence lexer = 306 + let cursor = lexer.cursor in 307 + match Source.cursor_peek cursor with 308 + | None -> error lexer Unexpected_eof 309 + | Some 'n' -> Source.cursor_advance cursor; '\n' 310 + | Some 'r' -> Source.cursor_advance cursor; '\r' 311 + | Some 't' -> Source.cursor_advance cursor; '\t' 312 + | Some 'b' -> Source.cursor_advance cursor; '\b' 313 + | Some 'f' -> Source.cursor_advance cursor; '\x0c' 314 + | Some 'v' -> Source.cursor_advance cursor; '\x0b' 315 + | Some '0' -> 316 + Source.cursor_advance cursor; 317 + (* \0 is NUL only if not followed by another digit *) 318 + (match Source.cursor_peek cursor with 319 + | Some c when is_digit c -> error lexer (Invalid_escape_sequence "\\0...") 320 + | _ -> '\x00') 321 + | Some 'x' -> 322 + Source.cursor_advance cursor; 323 + scan_hex_escape lexer 2 324 + | Some 'u' -> 325 + Source.cursor_advance cursor; 326 + scan_unicode_escape lexer 327 + | Some '\r' -> 328 + Source.cursor_advance cursor; 329 + (* Line continuation: \<CR> or \<CR><LF> *) 330 + lexer.newline_before <- true; 331 + (match Source.cursor_peek cursor with 332 + | Some '\n' -> Source.cursor_advance cursor 333 + | _ -> ()); 334 + (* Return space as placeholder - will be filtered *) 335 + ' ' 336 + | Some '\n' -> 337 + Source.cursor_advance cursor; 338 + lexer.newline_before <- true; 339 + ' ' 340 + | Some c -> 341 + Source.cursor_advance cursor; 342 + c 343 + 344 + and scan_hex_escape lexer n = 345 + let cursor = lexer.cursor in 346 + let value = ref 0 in 347 + for _ = 1 to n do 348 + match Source.cursor_peek cursor with 349 + | Some c when is_hex_digit c -> 350 + value := !value * 16 + hex_value c; 351 + Source.cursor_advance cursor 352 + | _ -> error lexer (Invalid_escape_sequence "\\x..") 353 + done; 354 + Char.chr (!value land 0xFF) 355 + 356 + and scan_unicode_escape lexer = 357 + let cursor = lexer.cursor in 358 + match Source.cursor_peek cursor with 359 + | Some '{' -> 360 + Source.cursor_advance cursor; 361 + let value = ref 0 in 362 + let rec loop () = 363 + match Source.cursor_peek cursor with 364 + | Some '}' -> 365 + Source.cursor_advance cursor; 366 + if !value > 0x10FFFF then 367 + error lexer (Invalid_unicode_escape "code point out of range"); 368 + (* TODO: Handle code points > 0xFFFF properly *) 369 + Char.chr (!value land 0xFF) 370 + | Some c when is_hex_digit c -> 371 + value := !value * 16 + hex_value c; 372 + Source.cursor_advance cursor; 373 + loop () 374 + | _ -> error lexer (Invalid_unicode_escape "expected hex digit or '}'") 375 + in 376 + loop () 377 + | _ -> 378 + (* \uXXXX *) 379 + scan_hex_escape lexer 4 380 + 381 + (* Scan identifier *) 382 + let scan_identifier lexer = 383 + let cursor = lexer.cursor in 384 + Source.cursor_mark cursor; 385 + Source.cursor_skip_while cursor is_identifier_continue; 386 + let s = Source.cursor_slice cursor in 387 + (* Check for keyword *) 388 + match Token.keyword_of_string s with 389 + | Some kw -> Token.Keyword kw 390 + | None -> Token.Identifier s 391 + 392 + (* Scan template literal *) 393 + let scan_template lexer ~is_head = 394 + let cursor = lexer.cursor in 395 + let buf = Buffer.create 64 in 396 + let rec loop () = 397 + match Source.cursor_peek cursor with 398 + | None -> error lexer Unterminated_template 399 + | Some '`' -> 400 + Source.cursor_advance cursor; 401 + let s = Buffer.contents buf in 402 + if is_head then Token.Template (Token.Template_no_sub s) 403 + else Token.Template (Token.Template_tail s) 404 + | Some '$' -> 405 + (match Source.cursor_peek_n cursor 2 with 406 + | Some "${" -> 407 + Source.cursor_advance_n cursor 2; 408 + let s = Buffer.contents buf in 409 + if is_head then Token.Template (Token.Template_head s) 410 + else Token.Template (Token.Template_middle s) 411 + | _ -> 412 + Source.cursor_advance cursor; 413 + Buffer.add_char buf '$'; 414 + loop ()) 415 + | Some '\\' -> 416 + Source.cursor_advance cursor; 417 + (* Template literals allow line continuations *) 418 + (match Source.cursor_peek cursor with 419 + | Some '\r' -> 420 + Source.cursor_advance cursor; 421 + (match Source.cursor_peek cursor with 422 + | Some '\n' -> Source.cursor_advance cursor 423 + | _ -> ()); 424 + lexer.newline_before <- true; 425 + loop () 426 + | Some '\n' -> 427 + Source.cursor_advance cursor; 428 + lexer.newline_before <- true; 429 + loop () 430 + | _ -> 431 + Buffer.add_char buf (scan_escape_sequence lexer); 432 + loop ()) 433 + | Some c when is_line_terminator c -> 434 + Buffer.add_char buf c; 435 + Source.cursor_advance cursor; 436 + lexer.newline_before <- true; 437 + loop () 438 + | Some c -> 439 + Buffer.add_char buf c; 440 + Source.cursor_advance cursor; 441 + loop () 442 + in 443 + loop () 444 + 445 + (* Scan regexp literal (called when parser indicates regexp context) *) 446 + let scan_regexp lexer = 447 + let cursor = lexer.cursor in 448 + Source.cursor_advance cursor; (* Skip initial '/' *) 449 + let pattern = Buffer.create 64 in 450 + let in_class = ref false in 451 + let rec scan_pattern () = 452 + match Source.cursor_peek cursor with 453 + | None -> error lexer Unterminated_regexp 454 + | Some '/' when not !in_class -> 455 + Source.cursor_advance cursor; 456 + Buffer.contents pattern 457 + | Some '\\' -> 458 + Buffer.add_char pattern '\\'; 459 + Source.cursor_advance cursor; 460 + (match Source.cursor_peek cursor with 461 + | None -> error lexer Unterminated_regexp 462 + | Some c -> 463 + Buffer.add_char pattern c; 464 + Source.cursor_advance cursor); 465 + scan_pattern () 466 + | Some '[' -> 467 + in_class := true; 468 + Buffer.add_char pattern '['; 469 + Source.cursor_advance cursor; 470 + scan_pattern () 471 + | Some ']' -> 472 + in_class := false; 473 + Buffer.add_char pattern ']'; 474 + Source.cursor_advance cursor; 475 + scan_pattern () 476 + | Some c when is_line_terminator c -> 477 + error lexer Unterminated_regexp 478 + | Some c -> 479 + Buffer.add_char pattern c; 480 + Source.cursor_advance cursor; 481 + scan_pattern () 482 + in 483 + let p = scan_pattern () in 484 + (* Scan flags *) 485 + let flags = Buffer.create 8 in 486 + let rec scan_flags () = 487 + match Source.cursor_peek cursor with 488 + | Some c when is_identifier_continue c -> 489 + (* Valid flags: d, g, i, m, s, u, v, y *) 490 + (match c with 491 + | 'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' -> 492 + Buffer.add_char flags c; 493 + Source.cursor_advance cursor; 494 + scan_flags () 495 + | _ when is_identifier_continue c -> 496 + error lexer (Invalid_regexp_flag c) 497 + | _ -> ()) 498 + | _ -> () 499 + in 500 + scan_flags (); 501 + Token.Regexp (p, Buffer.contents flags) 502 + 503 + (* Scan private identifier (#name) *) 504 + let scan_private_identifier lexer = 505 + let cursor = lexer.cursor in 506 + Source.cursor_advance cursor; (* Skip '#' *) 507 + Source.cursor_mark cursor; 508 + Source.cursor_skip_while cursor is_identifier_continue; 509 + let s = Source.cursor_slice cursor in 510 + Token.Private_identifier s 511 + 512 + (* Main lexer function *) 513 + let next_token lexer : Token.token = 514 + let cursor = lexer.cursor in 515 + lexer.newline_before <- false; 516 + skip_whitespace_and_comments lexer; 517 + 518 + Source.cursor_mark cursor; 519 + let start_pos = Source.cursor_pos cursor in 520 + 521 + let tok = 522 + match Source.cursor_peek cursor with 523 + | None -> Token.Eof 524 + | Some c -> 525 + match c with 526 + (* Identifiers and keywords *) 527 + | _ when is_identifier_start c -> 528 + scan_identifier lexer 529 + 530 + (* Numbers *) 531 + | '0'..'9' -> 532 + scan_number lexer 533 + | '.' -> 534 + (match Source.cursor_peek_n cursor 2 with 535 + | Some s when String.length s >= 2 && is_digit s.[1] -> 536 + scan_number lexer 537 + | _ -> 538 + Source.cursor_advance cursor; 539 + (match Source.cursor_peek_n cursor 2 with 540 + | Some ".." -> 541 + Source.cursor_advance_n cursor 2; 542 + Token.Ellipsis 543 + | _ -> Token.Dot)) 544 + 545 + (* Strings *) 546 + | '\'' | '"' -> 547 + scan_string lexer c 548 + 549 + (* Template literal *) 550 + | '`' -> 551 + Source.cursor_advance cursor; 552 + scan_template lexer ~is_head:true 553 + 554 + (* Private identifier *) 555 + | '#' -> 556 + scan_private_identifier lexer 557 + 558 + (* Operators and punctuation *) 559 + | '{' -> Source.cursor_advance cursor; Token.LBrace 560 + | '}' -> Source.cursor_advance cursor; Token.RBrace 561 + | '(' -> Source.cursor_advance cursor; Token.LParen 562 + | ')' -> Source.cursor_advance cursor; Token.RParen 563 + | '[' -> Source.cursor_advance cursor; Token.LBracket 564 + | ']' -> Source.cursor_advance cursor; Token.RBracket 565 + | ';' -> Source.cursor_advance cursor; Token.Semicolon 566 + | ',' -> Source.cursor_advance cursor; Token.Comma 567 + | ':' -> Source.cursor_advance cursor; Token.Colon 568 + | '~' -> Source.cursor_advance cursor; Token.Tilde 569 + 570 + | '+' -> 571 + Source.cursor_advance cursor; 572 + (match Source.cursor_peek cursor with 573 + | Some '+' -> Source.cursor_advance cursor; Token.Plus_plus 574 + | Some '=' -> Source.cursor_advance cursor; Token.Plus_eq 575 + | _ -> Token.Plus) 576 + 577 + | '-' -> 578 + Source.cursor_advance cursor; 579 + (match Source.cursor_peek cursor with 580 + | Some '-' -> Source.cursor_advance cursor; Token.Minus_minus 581 + | Some '=' -> Source.cursor_advance cursor; Token.Minus_eq 582 + | _ -> Token.Minus) 583 + 584 + | '*' -> 585 + Source.cursor_advance cursor; 586 + (match Source.cursor_peek cursor with 587 + | Some '*' -> 588 + Source.cursor_advance cursor; 589 + (match Source.cursor_peek cursor with 590 + | Some '=' -> Source.cursor_advance cursor; Token.Star_star_eq 591 + | _ -> Token.Star_star) 592 + | Some '=' -> Source.cursor_advance cursor; Token.Star_eq 593 + | _ -> Token.Star) 594 + 595 + | '/' -> 596 + if lexer.allow_regexp then 597 + scan_regexp lexer 598 + else begin 599 + Source.cursor_advance cursor; 600 + (match Source.cursor_peek cursor with 601 + | Some '=' -> Source.cursor_advance cursor; Token.Slash_eq 602 + | _ -> Token.Slash) 603 + end 604 + 605 + | '%' -> 606 + Source.cursor_advance cursor; 607 + (match Source.cursor_peek cursor with 608 + | Some '=' -> Source.cursor_advance cursor; Token.Percent_eq 609 + | _ -> Token.Percent) 610 + 611 + | '<' -> 612 + Source.cursor_advance cursor; 613 + (match Source.cursor_peek cursor with 614 + | Some '<' -> 615 + Source.cursor_advance cursor; 616 + (match Source.cursor_peek cursor with 617 + | Some '=' -> Source.cursor_advance cursor; Token.Lt_lt_eq 618 + | _ -> Token.Lt_lt) 619 + | Some '=' -> Source.cursor_advance cursor; Token.Lt_eq 620 + | _ -> Token.Lt) 621 + 622 + | '>' -> 623 + Source.cursor_advance cursor; 624 + (match Source.cursor_peek cursor with 625 + | Some '>' -> 626 + Source.cursor_advance cursor; 627 + (match Source.cursor_peek cursor with 628 + | Some '>' -> 629 + Source.cursor_advance cursor; 630 + (match Source.cursor_peek cursor with 631 + | Some '=' -> Source.cursor_advance cursor; Token.Gt_gt_gt_eq 632 + | _ -> Token.Gt_gt_gt) 633 + | Some '=' -> Source.cursor_advance cursor; Token.Gt_gt_eq 634 + | _ -> Token.Gt_gt) 635 + | Some '=' -> Source.cursor_advance cursor; Token.Gt_eq 636 + | _ -> Token.Gt) 637 + 638 + | '=' -> 639 + Source.cursor_advance cursor; 640 + (match Source.cursor_peek cursor with 641 + | Some '=' -> 642 + Source.cursor_advance cursor; 643 + (match Source.cursor_peek cursor with 644 + | Some '=' -> Source.cursor_advance cursor; Token.Eq_eq_eq 645 + | _ -> Token.Eq_eq) 646 + | Some '>' -> Source.cursor_advance cursor; Token.Arrow 647 + | _ -> Token.Eq) 648 + 649 + | '!' -> 650 + Source.cursor_advance cursor; 651 + (match Source.cursor_peek cursor with 652 + | Some '=' -> 653 + Source.cursor_advance cursor; 654 + (match Source.cursor_peek cursor with 655 + | Some '=' -> Source.cursor_advance cursor; Token.Not_eq_eq 656 + | _ -> Token.Not_eq) 657 + | _ -> Token.Bang) 658 + 659 + | '&' -> 660 + Source.cursor_advance cursor; 661 + (match Source.cursor_peek cursor with 662 + | Some '&' -> 663 + Source.cursor_advance cursor; 664 + (match Source.cursor_peek cursor with 665 + | Some '=' -> Source.cursor_advance cursor; Token.Ampersand_ampersand_eq 666 + | _ -> Token.Ampersand_ampersand) 667 + | Some '=' -> Source.cursor_advance cursor; Token.Ampersand_eq 668 + | _ -> Token.Ampersand) 669 + 670 + | '|' -> 671 + Source.cursor_advance cursor; 672 + (match Source.cursor_peek cursor with 673 + | Some '|' -> 674 + Source.cursor_advance cursor; 675 + (match Source.cursor_peek cursor with 676 + | Some '=' -> Source.cursor_advance cursor; Token.Pipe_pipe_eq 677 + | _ -> Token.Pipe_pipe) 678 + | Some '=' -> Source.cursor_advance cursor; Token.Pipe_eq 679 + | _ -> Token.Pipe) 680 + 681 + | '^' -> 682 + Source.cursor_advance cursor; 683 + (match Source.cursor_peek cursor with 684 + | Some '=' -> Source.cursor_advance cursor; Token.Caret_eq 685 + | _ -> Token.Caret) 686 + 687 + | '?' -> 688 + Source.cursor_advance cursor; 689 + (match Source.cursor_peek cursor with 690 + | Some '?' -> 691 + Source.cursor_advance cursor; 692 + (match Source.cursor_peek cursor with 693 + | Some '=' -> Source.cursor_advance cursor; Token.Question_question_eq 694 + | _ -> Token.Question_question) 695 + | Some '.' -> 696 + (* ?. but not ?.digits (which would be ? followed by number) *) 697 + (match Source.cursor_peek_n cursor 2 with 698 + | Some s when String.length s >= 2 && is_digit s.[1] -> 699 + Token.Question 700 + | _ -> 701 + Source.cursor_advance cursor; 702 + Token.Question_dot) 703 + | _ -> Token.Question) 704 + 705 + | _ -> 706 + Source.cursor_advance cursor; 707 + error lexer (Unexpected_char c) 708 + in 709 + 710 + let end_pos = Source.cursor_pos cursor in 711 + let loc = Source.mk_loc ~start:start_pos ~end_:end_pos () in 712 + 713 + (* Update allow_regexp based on token *) 714 + lexer.allow_regexp <- (match tok with 715 + | Token.Identifier _ | Token.Number _ | Token.BigInt _ | Token.String _ 716 + | Token.Regexp _ | Token.Template (Token.Template_no_sub _) 717 + | Token.Template (Token.Template_tail _) 718 + | Token.Keyword Token.Kw_this | Token.Keyword Token.Kw_true 719 + | Token.Keyword Token.Kw_false | Token.Keyword Token.Kw_null 720 + | Token.RParen | Token.RBracket | Token.RBrace 721 + | Token.Plus_plus | Token.Minus_minus -> false 722 + | _ -> true); 723 + 724 + { Token.tok; loc; preceded_by_newline = lexer.newline_before } 725 + 726 + (* Continue scanning template after expression *) 727 + let scan_template_tail lexer = 728 + scan_template lexer ~is_head:false 729 + 730 + (* Peek at next token without consuming *) 731 + let peek lexer = 732 + let saved_cursor = Source.cursor_save lexer.cursor in 733 + let saved_newline = lexer.newline_before in 734 + let saved_allow_regexp = lexer.allow_regexp in 735 + 736 + let token = next_token lexer in 737 + 738 + Source.cursor_restore lexer.cursor saved_cursor; 739 + lexer.newline_before <- saved_newline; 740 + lexer.allow_regexp <- saved_allow_regexp; 741 + 742 + token 743 + 744 + (* Error formatting *) 745 + let pp_error fmt = function 746 + | Unexpected_char c -> Format.fprintf fmt "Unexpected character: '%c'" c 747 + | Unexpected_eof -> Format.fprintf fmt "Unexpected end of file" 748 + | Invalid_number s -> Format.fprintf fmt "Invalid number: %s" s 749 + | Invalid_escape_sequence s -> Format.fprintf fmt "Invalid escape sequence: %s" s 750 + | Invalid_unicode_escape s -> Format.fprintf fmt "Invalid unicode escape: %s" s 751 + | Unterminated_string -> Format.fprintf fmt "Unterminated string literal" 752 + | Unterminated_comment -> Format.fprintf fmt "Unterminated comment" 753 + | Unterminated_regexp -> Format.fprintf fmt "Unterminated regular expression" 754 + | Unterminated_template -> Format.fprintf fmt "Unterminated template literal" 755 + | Invalid_regexp_flag c -> Format.fprintf fmt "Invalid regexp flag: '%c'" c 756 + | Legacy_octal_in_strict_mode -> Format.fprintf fmt "Octal literals are not allowed in strict mode" 757 + 758 + let show_error err = Format.asprintf "%a" pp_error err
+37
lib/quickjs/parser/lexer.mli
··· 1 + (** Handwritten JavaScript lexer. *) 2 + 3 + type error = 4 + | Unexpected_char of char 5 + | Unexpected_eof 6 + | Invalid_number of string 7 + | Invalid_escape_sequence of string 8 + | Invalid_unicode_escape of string 9 + | Unterminated_string 10 + | Unterminated_comment 11 + | Unterminated_regexp 12 + | Unterminated_template 13 + | Invalid_regexp_flag of char 14 + | Legacy_octal_in_strict_mode 15 + 16 + exception Lexer_error of error * Source.loc 17 + 18 + type t 19 + 20 + (** Create a new lexer *) 21 + val create : filename:string -> content:string -> t 22 + 23 + (** Set strict mode (affects handling of certain constructs) *) 24 + val set_strict_mode : t -> bool -> unit 25 + 26 + (** Get next token *) 27 + val next_token : t -> Token.token 28 + 29 + (** Peek at next token without consuming *) 30 + val peek : t -> Token.token 31 + 32 + (** Continue scanning template literal after expression *) 33 + val scan_template_tail : t -> Token.t 34 + 35 + (** Error formatting *) 36 + val pp_error : Format.formatter -> error -> unit 37 + val show_error : error -> string
+200
lib/quickjs/parser/source.ml
··· 1 + (** Source code positions and locations. 2 + 3 + This module provides types for tracking positions in source code 4 + for error messages and source maps. *) 5 + 6 + (** A position in source code *) 7 + type pos = { 8 + offset : int; (** Byte offset from start of source *) 9 + line : int; (** Line number (1-indexed) *) 10 + column : int; (** Column number (0-indexed, in bytes) *) 11 + } 12 + 13 + (** A range in source code *) 14 + type loc = { 15 + start : pos; 16 + end_ : pos; 17 + source : string option; (** Filename or URL *) 18 + } 19 + 20 + let dummy_pos = { offset = 0; line = 1; column = 0 } 21 + 22 + let dummy_loc = { start = dummy_pos; end_ = dummy_pos; source = None } 23 + 24 + let mk_pos ~offset ~line ~column = { offset; line; column } 25 + 26 + let mk_loc ~start ~end_ ?source () = { start; end_; source } 27 + 28 + let pos_of_offset source offset = 29 + let line = ref 1 in 30 + let column = ref 0 in 31 + let i = ref 0 in 32 + while !i < offset && !i < String.length source do 33 + if source.[!i] = '\n' then begin 34 + incr line; 35 + column := 0 36 + end else 37 + incr column; 38 + incr i 39 + done; 40 + { offset; line = !line; column = !column } 41 + 42 + let pp_pos fmt pos = 43 + Format.fprintf fmt "%d:%d" pos.line pos.column 44 + 45 + let pp_loc fmt loc = 46 + match loc.source with 47 + | Some s -> Format.fprintf fmt "%s:%a-%a" s pp_pos loc.start pp_pos loc.end_ 48 + | None -> Format.fprintf fmt "%a-%a" pp_pos loc.start pp_pos loc.end_ 49 + 50 + let show_pos pos = Format.asprintf "%a" pp_pos pos 51 + 52 + let show_loc loc = Format.asprintf "%a" pp_loc loc 53 + 54 + (** Source file *) 55 + type file = { 56 + filename : string; 57 + content : string; 58 + mutable line_starts : int array; (** Byte offsets of line starts *) 59 + } 60 + 61 + let create_file ~filename ~content = 62 + let line_starts = ref [0] in 63 + String.iteri (fun i c -> 64 + if c = '\n' then 65 + line_starts := (i + 1) :: !line_starts 66 + ) content; 67 + { 68 + filename; 69 + content; 70 + line_starts = Array.of_list (List.rev !line_starts); 71 + } 72 + 73 + let file_pos file offset = 74 + (* Binary search for line *) 75 + let rec search lo hi = 76 + if lo >= hi then lo 77 + else 78 + let mid = (lo + hi + 1) / 2 in 79 + if file.line_starts.(mid) <= offset then 80 + search mid hi 81 + else 82 + search lo (mid - 1) 83 + in 84 + let line_idx = search 0 (Array.length file.line_starts - 1) in 85 + let line_start = file.line_starts.(line_idx) in 86 + { 87 + offset; 88 + line = line_idx + 1; 89 + column = offset - line_start; 90 + } 91 + 92 + let file_loc file start_offset end_offset = 93 + { 94 + start = file_pos file start_offset; 95 + end_ = file_pos file end_offset; 96 + source = Some file.filename; 97 + } 98 + 99 + (** Cursor for tracking position while lexing *) 100 + type cursor = { 101 + file : file; 102 + mutable offset : int; 103 + mutable line : int; 104 + mutable column : int; 105 + mutable mark_offset : int; 106 + mutable mark_line : int; 107 + mutable mark_column : int; 108 + } 109 + 110 + let create_cursor file = { 111 + file; 112 + offset = 0; 113 + line = 1; 114 + column = 0; 115 + mark_offset = 0; 116 + mark_line = 1; 117 + mark_column = 0; 118 + } 119 + 120 + let cursor_pos cursor = 121 + { offset = cursor.offset; line = cursor.line; column = cursor.column } 122 + 123 + let cursor_mark cursor = 124 + cursor.mark_offset <- cursor.offset; 125 + cursor.mark_line <- cursor.line; 126 + cursor.mark_column <- cursor.column 127 + 128 + let cursor_loc cursor = 129 + { 130 + start = { offset = cursor.mark_offset; line = cursor.mark_line; column = cursor.mark_column }; 131 + end_ = cursor_pos cursor; 132 + source = Some cursor.file.filename; 133 + } 134 + 135 + let cursor_eof cursor = 136 + cursor.offset >= String.length cursor.file.content 137 + 138 + let cursor_peek cursor = 139 + if cursor_eof cursor then None 140 + else Some cursor.file.content.[cursor.offset] 141 + 142 + let cursor_peek_n cursor n = 143 + if cursor.offset + n > String.length cursor.file.content then None 144 + else Some (String.sub cursor.file.content cursor.offset n) 145 + 146 + let cursor_advance cursor = 147 + if not (cursor_eof cursor) then begin 148 + if cursor.file.content.[cursor.offset] = '\n' then begin 149 + cursor.line <- cursor.line + 1; 150 + cursor.column <- 0 151 + end else 152 + cursor.column <- cursor.column + 1; 153 + cursor.offset <- cursor.offset + 1 154 + end 155 + 156 + let cursor_advance_n cursor n = 157 + for _ = 1 to n do 158 + cursor_advance cursor 159 + done 160 + 161 + let cursor_slice cursor = 162 + String.sub cursor.file.content cursor.mark_offset (cursor.offset - cursor.mark_offset) 163 + 164 + let cursor_skip_while cursor pred = 165 + while not (cursor_eof cursor) && 166 + pred cursor.file.content.[cursor.offset] do 167 + cursor_advance cursor 168 + done 169 + 170 + let cursor_consume_char cursor expected = 171 + match cursor_peek cursor with 172 + | Some c when c = expected -> 173 + cursor_advance cursor; 174 + true 175 + | _ -> false 176 + 177 + let cursor_consume_string cursor expected = 178 + match cursor_peek_n cursor (String.length expected) with 179 + | Some s when s = expected -> 180 + cursor_advance_n cursor (String.length expected); 181 + true 182 + | _ -> false 183 + 184 + (** Cursor state for save/restore *) 185 + type cursor_state = { 186 + s_offset : int; 187 + s_line : int; 188 + s_column : int; 189 + } 190 + 191 + let cursor_save cursor = { 192 + s_offset = cursor.offset; 193 + s_line = cursor.line; 194 + s_column = cursor.column; 195 + } 196 + 197 + let cursor_restore cursor state = 198 + cursor.offset <- state.s_offset; 199 + cursor.line <- state.s_line; 200 + cursor.column <- state.s_column
+57
lib/quickjs/parser/source.mli
··· 1 + (** Source code positions and locations. *) 2 + 3 + (** A position in source code *) 4 + type pos = { 5 + offset : int; 6 + line : int; 7 + column : int; 8 + } 9 + 10 + (** A range in source code *) 11 + type loc = { 12 + start : pos; 13 + end_ : pos; 14 + source : string option; 15 + } 16 + 17 + val dummy_pos : pos 18 + val dummy_loc : loc 19 + 20 + val mk_pos : offset:int -> line:int -> column:int -> pos 21 + val mk_loc : start:pos -> end_:pos -> ?source:string -> unit -> loc 22 + val pos_of_offset : string -> int -> pos 23 + 24 + val pp_pos : Format.formatter -> pos -> unit 25 + val pp_loc : Format.formatter -> loc -> unit 26 + val show_pos : pos -> string 27 + val show_loc : loc -> string 28 + 29 + (** Source file *) 30 + type file 31 + 32 + val create_file : filename:string -> content:string -> file 33 + val file_pos : file -> int -> pos 34 + val file_loc : file -> int -> int -> loc 35 + 36 + (** Cursor for tracking position while lexing *) 37 + type cursor 38 + 39 + val create_cursor : file -> cursor 40 + val cursor_pos : cursor -> pos 41 + val cursor_mark : cursor -> unit 42 + val cursor_loc : cursor -> loc 43 + val cursor_eof : cursor -> bool 44 + val cursor_peek : cursor -> char option 45 + val cursor_peek_n : cursor -> int -> string option 46 + val cursor_advance : cursor -> unit 47 + val cursor_advance_n : cursor -> int -> unit 48 + val cursor_slice : cursor -> string 49 + val cursor_skip_while : cursor -> (char -> bool) -> unit 50 + val cursor_consume_char : cursor -> char -> bool 51 + val cursor_consume_string : cursor -> string -> bool 52 + 53 + (** Cursor state for save/restore *) 54 + type cursor_state 55 + 56 + val cursor_save : cursor -> cursor_state 57 + val cursor_restore : cursor -> cursor_state -> unit
+413
lib/quickjs/parser/token.ml
··· 1 + (** JavaScript token types. 2 + 3 + This module defines all token types for the JavaScript lexer. 4 + Token types match the ECMAScript specification. *) 5 + 6 + (** Keywords *) 7 + type keyword = 8 + (* Reserved words *) 9 + | Kw_await 10 + | Kw_break 11 + | Kw_case 12 + | Kw_catch 13 + | Kw_class 14 + | Kw_const 15 + | Kw_continue 16 + | Kw_debugger 17 + | Kw_default 18 + | Kw_delete 19 + | Kw_do 20 + | Kw_else 21 + | Kw_enum 22 + | Kw_export 23 + | Kw_extends 24 + | Kw_false 25 + | Kw_finally 26 + | Kw_for 27 + | Kw_function 28 + | Kw_if 29 + | Kw_import 30 + | Kw_in 31 + | Kw_instanceof 32 + | Kw_let 33 + | Kw_new 34 + | Kw_null 35 + | Kw_return 36 + | Kw_static 37 + | Kw_super 38 + | Kw_switch 39 + | Kw_this 40 + | Kw_throw 41 + | Kw_true 42 + | Kw_try 43 + | Kw_typeof 44 + | Kw_var 45 + | Kw_void 46 + | Kw_while 47 + | Kw_with 48 + | Kw_yield 49 + (* Strict mode reserved words *) 50 + | Kw_implements 51 + | Kw_interface 52 + | Kw_package 53 + | Kw_private 54 + | Kw_protected 55 + | Kw_public 56 + (* Contextual keywords (used as identifiers in some contexts) *) 57 + | Kw_as 58 + | Kw_async 59 + | Kw_from 60 + | Kw_get 61 + | Kw_meta 62 + | Kw_of 63 + | Kw_set 64 + | Kw_target 65 + | Kw_accessor 66 + 67 + let keyword_to_string = function 68 + | Kw_await -> "await" 69 + | Kw_break -> "break" 70 + | Kw_case -> "case" 71 + | Kw_catch -> "catch" 72 + | Kw_class -> "class" 73 + | Kw_const -> "const" 74 + | Kw_continue -> "continue" 75 + | Kw_debugger -> "debugger" 76 + | Kw_default -> "default" 77 + | Kw_delete -> "delete" 78 + | Kw_do -> "do" 79 + | Kw_else -> "else" 80 + | Kw_enum -> "enum" 81 + | Kw_export -> "export" 82 + | Kw_extends -> "extends" 83 + | Kw_false -> "false" 84 + | Kw_finally -> "finally" 85 + | Kw_for -> "for" 86 + | Kw_function -> "function" 87 + | Kw_if -> "if" 88 + | Kw_import -> "import" 89 + | Kw_in -> "in" 90 + | Kw_instanceof -> "instanceof" 91 + | Kw_let -> "let" 92 + | Kw_new -> "new" 93 + | Kw_null -> "null" 94 + | Kw_return -> "return" 95 + | Kw_static -> "static" 96 + | Kw_super -> "super" 97 + | Kw_switch -> "switch" 98 + | Kw_this -> "this" 99 + | Kw_throw -> "throw" 100 + | Kw_true -> "true" 101 + | Kw_try -> "try" 102 + | Kw_typeof -> "typeof" 103 + | Kw_var -> "var" 104 + | Kw_void -> "void" 105 + | Kw_while -> "while" 106 + | Kw_with -> "with" 107 + | Kw_yield -> "yield" 108 + | Kw_implements -> "implements" 109 + | Kw_interface -> "interface" 110 + | Kw_package -> "package" 111 + | Kw_private -> "private" 112 + | Kw_protected -> "protected" 113 + | Kw_public -> "public" 114 + | Kw_as -> "as" 115 + | Kw_async -> "async" 116 + | Kw_from -> "from" 117 + | Kw_get -> "get" 118 + | Kw_meta -> "meta" 119 + | Kw_of -> "of" 120 + | Kw_set -> "set" 121 + | Kw_target -> "target" 122 + | Kw_accessor -> "accessor" 123 + 124 + let keyword_of_string = function 125 + | "await" -> Some Kw_await 126 + | "break" -> Some Kw_break 127 + | "case" -> Some Kw_case 128 + | "catch" -> Some Kw_catch 129 + | "class" -> Some Kw_class 130 + | "const" -> Some Kw_const 131 + | "continue" -> Some Kw_continue 132 + | "debugger" -> Some Kw_debugger 133 + | "default" -> Some Kw_default 134 + | "delete" -> Some Kw_delete 135 + | "do" -> Some Kw_do 136 + | "else" -> Some Kw_else 137 + | "enum" -> Some Kw_enum 138 + | "export" -> Some Kw_export 139 + | "extends" -> Some Kw_extends 140 + | "false" -> Some Kw_false 141 + | "finally" -> Some Kw_finally 142 + | "for" -> Some Kw_for 143 + | "function" -> Some Kw_function 144 + | "if" -> Some Kw_if 145 + | "import" -> Some Kw_import 146 + | "in" -> Some Kw_in 147 + | "instanceof" -> Some Kw_instanceof 148 + | "let" -> Some Kw_let 149 + | "new" -> Some Kw_new 150 + | "null" -> Some Kw_null 151 + | "return" -> Some Kw_return 152 + | "static" -> Some Kw_static 153 + | "super" -> Some Kw_super 154 + | "switch" -> Some Kw_switch 155 + | "this" -> Some Kw_this 156 + | "throw" -> Some Kw_throw 157 + | "true" -> Some Kw_true 158 + | "try" -> Some Kw_try 159 + | "typeof" -> Some Kw_typeof 160 + | "var" -> Some Kw_var 161 + | "void" -> Some Kw_void 162 + | "while" -> Some Kw_while 163 + | "with" -> Some Kw_with 164 + | "yield" -> Some Kw_yield 165 + | "implements" -> Some Kw_implements 166 + | "interface" -> Some Kw_interface 167 + | "package" -> Some Kw_package 168 + | "private" -> Some Kw_private 169 + | "protected" -> Some Kw_protected 170 + | "public" -> Some Kw_public 171 + | _ -> None 172 + 173 + let contextual_keyword_of_string = function 174 + | "as" -> Some Kw_as 175 + | "async" -> Some Kw_async 176 + | "from" -> Some Kw_from 177 + | "get" -> Some Kw_get 178 + | "meta" -> Some Kw_meta 179 + | "of" -> Some Kw_of 180 + | "set" -> Some Kw_set 181 + | "target" -> Some Kw_target 182 + | "accessor" -> Some Kw_accessor 183 + | _ -> None 184 + 185 + (** Number literal types *) 186 + type number_kind = 187 + | Decimal 188 + | Hex 189 + | Octal 190 + | Binary 191 + | Legacy_octal (* 0-prefixed octal, forbidden in strict mode *) 192 + 193 + (** String literal types *) 194 + type string_kind = 195 + | Single_quoted 196 + | Double_quoted 197 + 198 + (** Template literal parts *) 199 + type template_part = 200 + | Template_head of string (* `...${ *) 201 + | Template_middle of string (* }...${ *) 202 + | Template_tail of string (* }...` *) 203 + | Template_no_sub of string (* `...` *) 204 + 205 + (** Token type *) 206 + type t = 207 + (* End of file *) 208 + | Eof 209 + 210 + (* Literals *) 211 + | Number of float * number_kind 212 + | BigInt of string (* Stored as string, parsed later *) 213 + | String of string * string_kind 214 + | Template of template_part 215 + | Regexp of string * string (* pattern, flags *) 216 + 217 + (* Identifiers and keywords *) 218 + | Identifier of string 219 + | Private_identifier of string (* #name *) 220 + | Keyword of keyword 221 + 222 + (* Punctuators *) 223 + | LBrace (* { *) 224 + | RBrace (* } *) 225 + | LParen (* ( *) 226 + | RParen (* ) *) 227 + | LBracket (* [ *) 228 + | RBracket (* ] *) 229 + | Dot (* . *) 230 + | Ellipsis (* ... *) 231 + | Semicolon (* ; *) 232 + | Comma (* , *) 233 + | Colon (* : *) 234 + | Question (* ? *) 235 + | Question_dot (* ?. *) 236 + | Question_question (* ?? *) 237 + | Question_question_eq (* ??= *) 238 + | Arrow (* => *) 239 + 240 + (* Operators *) 241 + | Plus (* + *) 242 + | Minus (* - *) 243 + | Star (* * *) 244 + | Star_star (* ** *) 245 + | Slash (* / *) 246 + | Percent (* % *) 247 + | Plus_plus (* ++ *) 248 + | Minus_minus (* -- *) 249 + | Lt (* < *) 250 + | Gt (* > *) 251 + | Lt_eq (* <= *) 252 + | Gt_eq (* >= *) 253 + | Eq_eq (* == *) 254 + | Not_eq (* != *) 255 + | Eq_eq_eq (* === *) 256 + | Not_eq_eq (* !== *) 257 + | Ampersand (* & *) 258 + | Pipe (* | *) 259 + | Caret (* ^ *) 260 + | Tilde (* ~ *) 261 + | Lt_lt (* << *) 262 + | Gt_gt (* >> *) 263 + | Gt_gt_gt (* >>> *) 264 + | Ampersand_ampersand (* && *) 265 + | Pipe_pipe (* || *) 266 + | Bang (* ! *) 267 + 268 + (* Assignment operators *) 269 + | Eq (* = *) 270 + | Plus_eq (* += *) 271 + | Minus_eq (* -= *) 272 + | Star_eq (* *= *) 273 + | Star_star_eq (* **= *) 274 + | Slash_eq (* /= *) 275 + | Percent_eq (* %= *) 276 + | Lt_lt_eq (* <<= *) 277 + | Gt_gt_eq (* >>= *) 278 + | Gt_gt_gt_eq (* >>>= *) 279 + | Ampersand_eq (* &= *) 280 + | Pipe_eq (* |= *) 281 + | Caret_eq (* ^= *) 282 + | Ampersand_ampersand_eq (* &&= *) 283 + | Pipe_pipe_eq (* ||= *) 284 + 285 + (** Token with location *) 286 + type token = { 287 + tok : t; 288 + loc : Source.loc; 289 + (* Line terminator before this token *) 290 + preceded_by_newline : bool; 291 + } 292 + 293 + let pp_number_kind fmt = function 294 + | Decimal -> Format.pp_print_string fmt "decimal" 295 + | Hex -> Format.pp_print_string fmt "hex" 296 + | Octal -> Format.pp_print_string fmt "octal" 297 + | Binary -> Format.pp_print_string fmt "binary" 298 + | Legacy_octal -> Format.pp_print_string fmt "legacy_octal" 299 + 300 + let pp fmt = function 301 + | Eof -> Format.pp_print_string fmt "EOF" 302 + | Number (n, _) -> Format.fprintf fmt "Number(%g)" n 303 + | BigInt s -> Format.fprintf fmt "BigInt(%s)" s 304 + | String (s, _) -> Format.fprintf fmt "String(%S)" s 305 + | Template (Template_head s) -> Format.fprintf fmt "Template_head(%S)" s 306 + | Template (Template_middle s) -> Format.fprintf fmt "Template_middle(%S)" s 307 + | Template (Template_tail s) -> Format.fprintf fmt "Template_tail(%S)" s 308 + | Template (Template_no_sub s) -> Format.fprintf fmt "Template_no_sub(%S)" s 309 + | Regexp (p, f) -> Format.fprintf fmt "Regexp(/%s/%s)" p f 310 + | Identifier s -> Format.fprintf fmt "Identifier(%s)" s 311 + | Private_identifier s -> Format.fprintf fmt "Private_identifier(#%s)" s 312 + | Keyword kw -> Format.fprintf fmt "Keyword(%s)" (keyword_to_string kw) 313 + | LBrace -> Format.pp_print_string fmt "{" 314 + | RBrace -> Format.pp_print_string fmt "}" 315 + | LParen -> Format.pp_print_string fmt "(" 316 + | RParen -> Format.pp_print_string fmt ")" 317 + | LBracket -> Format.pp_print_string fmt "[" 318 + | RBracket -> Format.pp_print_string fmt "]" 319 + | Dot -> Format.pp_print_string fmt "." 320 + | Ellipsis -> Format.pp_print_string fmt "..." 321 + | Semicolon -> Format.pp_print_string fmt ";" 322 + | Comma -> Format.pp_print_string fmt "," 323 + | Colon -> Format.pp_print_string fmt ":" 324 + | Question -> Format.pp_print_string fmt "?" 325 + | Question_dot -> Format.pp_print_string fmt "?." 326 + | Question_question -> Format.pp_print_string fmt "??" 327 + | Question_question_eq -> Format.pp_print_string fmt "??=" 328 + | Arrow -> Format.pp_print_string fmt "=>" 329 + | Plus -> Format.pp_print_string fmt "+" 330 + | Minus -> Format.pp_print_string fmt "-" 331 + | Star -> Format.pp_print_string fmt "*" 332 + | Star_star -> Format.pp_print_string fmt "**" 333 + | Slash -> Format.pp_print_string fmt "/" 334 + | Percent -> Format.pp_print_string fmt "%" 335 + | Plus_plus -> Format.pp_print_string fmt "++" 336 + | Minus_minus -> Format.pp_print_string fmt "--" 337 + | Lt -> Format.pp_print_string fmt "<" 338 + | Gt -> Format.pp_print_string fmt ">" 339 + | Lt_eq -> Format.pp_print_string fmt "<=" 340 + | Gt_eq -> Format.pp_print_string fmt ">=" 341 + | Eq_eq -> Format.pp_print_string fmt "==" 342 + | Not_eq -> Format.pp_print_string fmt "!=" 343 + | Eq_eq_eq -> Format.pp_print_string fmt "===" 344 + | Not_eq_eq -> Format.pp_print_string fmt "!==" 345 + | Ampersand -> Format.pp_print_string fmt "&" 346 + | Pipe -> Format.pp_print_string fmt "|" 347 + | Caret -> Format.pp_print_string fmt "^" 348 + | Tilde -> Format.pp_print_string fmt "~" 349 + | Lt_lt -> Format.pp_print_string fmt "<<" 350 + | Gt_gt -> Format.pp_print_string fmt ">>" 351 + | Gt_gt_gt -> Format.pp_print_string fmt ">>>" 352 + | Ampersand_ampersand -> Format.pp_print_string fmt "&&" 353 + | Pipe_pipe -> Format.pp_print_string fmt "||" 354 + | Bang -> Format.pp_print_string fmt "!" 355 + | Eq -> Format.pp_print_string fmt "=" 356 + | Plus_eq -> Format.pp_print_string fmt "+=" 357 + | Minus_eq -> Format.pp_print_string fmt "-=" 358 + | Star_eq -> Format.pp_print_string fmt "*=" 359 + | Star_star_eq -> Format.pp_print_string fmt "**=" 360 + | Slash_eq -> Format.pp_print_string fmt "/=" 361 + | Percent_eq -> Format.pp_print_string fmt "%=" 362 + | Lt_lt_eq -> Format.pp_print_string fmt "<<=" 363 + | Gt_gt_eq -> Format.pp_print_string fmt ">>=" 364 + | Gt_gt_gt_eq -> Format.pp_print_string fmt ">>>=" 365 + | Ampersand_eq -> Format.pp_print_string fmt "&=" 366 + | Pipe_eq -> Format.pp_print_string fmt "|=" 367 + | Caret_eq -> Format.pp_print_string fmt "^=" 368 + | Ampersand_ampersand_eq -> Format.pp_print_string fmt "&&=" 369 + | Pipe_pipe_eq -> Format.pp_print_string fmt "||=" 370 + 371 + let pp_token fmt tok = 372 + Format.fprintf fmt "%a at %a%s" 373 + pp tok.tok 374 + Source.pp_loc tok.loc 375 + (if tok.preceded_by_newline then " [newline]" else "") 376 + 377 + let show tok = Format.asprintf "%a" pp tok 378 + 379 + let is_assignment_op = function 380 + | Eq | Plus_eq | Minus_eq | Star_eq | Star_star_eq | Slash_eq | Percent_eq 381 + | Lt_lt_eq | Gt_gt_eq | Gt_gt_gt_eq | Ampersand_eq | Pipe_eq | Caret_eq 382 + | Ampersand_ampersand_eq | Pipe_pipe_eq | Question_question_eq -> true 383 + | _ -> false 384 + 385 + let is_binary_op = function 386 + | Plus | Minus | Star | Star_star | Slash | Percent 387 + | Lt | Gt | Lt_eq | Gt_eq | Eq_eq | Not_eq | Eq_eq_eq | Not_eq_eq 388 + | Ampersand | Pipe | Caret | Lt_lt | Gt_gt | Gt_gt_gt 389 + | Ampersand_ampersand | Pipe_pipe | Question_question 390 + | Keyword Kw_in | Keyword Kw_instanceof -> true 391 + | _ -> false 392 + 393 + let is_unary_op = function 394 + | Plus | Minus | Bang | Tilde | Plus_plus | Minus_minus 395 + | Keyword Kw_typeof | Keyword Kw_void | Keyword Kw_delete -> true 396 + | _ -> false 397 + 398 + let is_update_op = function 399 + | Plus_plus | Minus_minus -> true 400 + | _ -> false 401 + 402 + (** Check if keyword is reserved in strict mode *) 403 + let is_strict_reserved = function 404 + | Kw_implements | Kw_interface | Kw_package 405 + | Kw_private | Kw_protected | Kw_public -> true 406 + | _ -> false 407 + 408 + (** Check if keyword can be used as identifier in given context *) 409 + let is_identifier_name kw ~strict = 410 + if strict && is_strict_reserved kw then false 411 + else match kw with 412 + | Kw_as | Kw_async | Kw_from | Kw_get | Kw_meta | Kw_of | Kw_set | Kw_target | Kw_accessor -> true 413 + | _ -> false
+170
lib/quickjs/parser/token.mli
··· 1 + (** JavaScript token types. *) 2 + 3 + (** Keywords *) 4 + type keyword = 5 + | Kw_await 6 + | Kw_break 7 + | Kw_case 8 + | Kw_catch 9 + | Kw_class 10 + | Kw_const 11 + | Kw_continue 12 + | Kw_debugger 13 + | Kw_default 14 + | Kw_delete 15 + | Kw_do 16 + | Kw_else 17 + | Kw_enum 18 + | Kw_export 19 + | Kw_extends 20 + | Kw_false 21 + | Kw_finally 22 + | Kw_for 23 + | Kw_function 24 + | Kw_if 25 + | Kw_import 26 + | Kw_in 27 + | Kw_instanceof 28 + | Kw_let 29 + | Kw_new 30 + | Kw_null 31 + | Kw_return 32 + | Kw_static 33 + | Kw_super 34 + | Kw_switch 35 + | Kw_this 36 + | Kw_throw 37 + | Kw_true 38 + | Kw_try 39 + | Kw_typeof 40 + | Kw_var 41 + | Kw_void 42 + | Kw_while 43 + | Kw_with 44 + | Kw_yield 45 + | Kw_implements 46 + | Kw_interface 47 + | Kw_package 48 + | Kw_private 49 + | Kw_protected 50 + | Kw_public 51 + | Kw_as 52 + | Kw_async 53 + | Kw_from 54 + | Kw_get 55 + | Kw_meta 56 + | Kw_of 57 + | Kw_set 58 + | Kw_target 59 + | Kw_accessor 60 + 61 + val keyword_to_string : keyword -> string 62 + val keyword_of_string : string -> keyword option 63 + val contextual_keyword_of_string : string -> keyword option 64 + 65 + (** Number literal types *) 66 + type number_kind = 67 + | Decimal 68 + | Hex 69 + | Octal 70 + | Binary 71 + | Legacy_octal 72 + 73 + (** String literal types *) 74 + type string_kind = 75 + | Single_quoted 76 + | Double_quoted 77 + 78 + (** Template literal parts *) 79 + type template_part = 80 + | Template_head of string 81 + | Template_middle of string 82 + | Template_tail of string 83 + | Template_no_sub of string 84 + 85 + (** Token type *) 86 + type t = 87 + | Eof 88 + | Number of float * number_kind 89 + | BigInt of string 90 + | String of string * string_kind 91 + | Template of template_part 92 + | Regexp of string * string 93 + | Identifier of string 94 + | Private_identifier of string 95 + | Keyword of keyword 96 + | LBrace 97 + | RBrace 98 + | LParen 99 + | RParen 100 + | LBracket 101 + | RBracket 102 + | Dot 103 + | Ellipsis 104 + | Semicolon 105 + | Comma 106 + | Colon 107 + | Question 108 + | Question_dot 109 + | Question_question 110 + | Question_question_eq 111 + | Arrow 112 + | Plus 113 + | Minus 114 + | Star 115 + | Star_star 116 + | Slash 117 + | Percent 118 + | Plus_plus 119 + | Minus_minus 120 + | Lt 121 + | Gt 122 + | Lt_eq 123 + | Gt_eq 124 + | Eq_eq 125 + | Not_eq 126 + | Eq_eq_eq 127 + | Not_eq_eq 128 + | Ampersand 129 + | Pipe 130 + | Caret 131 + | Tilde 132 + | Lt_lt 133 + | Gt_gt 134 + | Gt_gt_gt 135 + | Ampersand_ampersand 136 + | Pipe_pipe 137 + | Bang 138 + | Eq 139 + | Plus_eq 140 + | Minus_eq 141 + | Star_eq 142 + | Star_star_eq 143 + | Slash_eq 144 + | Percent_eq 145 + | Lt_lt_eq 146 + | Gt_gt_eq 147 + | Gt_gt_gt_eq 148 + | Ampersand_eq 149 + | Pipe_eq 150 + | Caret_eq 151 + | Ampersand_ampersand_eq 152 + | Pipe_pipe_eq 153 + 154 + (** Token with location *) 155 + type token = { 156 + tok : t; 157 + loc : Source.loc; 158 + preceded_by_newline : bool; 159 + } 160 + 161 + val pp : Format.formatter -> t -> unit 162 + val pp_token : Format.formatter -> token -> unit 163 + val show : t -> string 164 + 165 + val is_assignment_op : t -> bool 166 + val is_binary_op : t -> bool 167 + val is_unary_op : t -> bool 168 + val is_update_op : t -> bool 169 + val is_strict_reserved : keyword -> bool 170 + val is_identifier_name : keyword -> strict:bool -> bool
+29
lib/quickjs/quickjs.ml
··· 1 + (** ocaml-quickjs: Pure OCaml JavaScript Engine 2 + 3 + A faithful port of QuickJS to OCaml, supporting ES2024. *) 4 + 5 + (** Core runtime types *) 6 + module Tag = Quickjs_core.Tag 7 + module Value = Quickjs_core.Value 8 + module Atom = Quickjs_core.Atom 9 + module Runtime = Quickjs_core.Runtime 10 + module Context = Quickjs_core.Context 11 + 12 + (** Parser *) 13 + module Source = Quickjs_parser.Source 14 + module Token = Quickjs_parser.Token 15 + module Lexer = Quickjs_parser.Lexer 16 + 17 + (** {1 Quick evaluation} 18 + 19 + These are convenience functions for common operations. *) 20 + 21 + let create_runtime () = Runtime.create () 22 + 23 + let create_context rt = Context.create rt 24 + 25 + (** Version string *) 26 + let version = "0.1.0-dev" 27 + 28 + (** Version matching QuickJS C *) 29 + let quickjs_version = "2024-09-13"
+36
ocaml-quickjs.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "Pure OCaml implementation of QuickJS JavaScript engine" 4 + description: "A faithful port of QuickJS to OCaml, supporting ES2024" 5 + maintainer: ["dev@anthropic.com"] 6 + authors: ["Anthropic"] 7 + license: "MIT" 8 + homepage: "https://github.com/anthropics/ocaml-quickjs" 9 + bug-reports: "https://github.com/anthropics/ocaml-quickjs/issues" 10 + depends: [ 11 + "ocaml" {>= "5.1"} 12 + "dune" {>= "3.20" & >= "3.20"} 13 + "zarith" {>= "1.13"} 14 + "fmt" {>= "0.9"} 15 + "sedlex" {>= "3.2"} 16 + "yojson" {>= "2.1"} 17 + "cmdliner" {>= "1.2"} 18 + "alcotest" {with-test} 19 + "odoc" {with-doc} 20 + ] 21 + build: [ 22 + ["dune" "subst"] {dev} 23 + [ 24 + "dune" 25 + "build" 26 + "-p" 27 + name 28 + "-j" 29 + jobs 30 + "@install" 31 + "@runtest" {with-test} 32 + "@doc" {with-doc} 33 + ] 34 + ] 35 + dev-repo: "git+https://github.com/anthropics/ocaml-quickjs.git" 36 + x-maintenance-intent: ["(latest)"]
+1
test/dune
··· 1 + ; Tests directory
+6
test/runner/dune
··· 1 + (executable 2 + (name test262_runner) 3 + (public_name qjs-test262) 4 + (libraries quickjs str unix) 5 + (flags (:standard -w -32-37-69)) ; Suppress warnings during development 6 + (preprocess no_preprocessing))
+263
test/runner/test262_runner.ml
··· 1 + (** Test262 test runner for ocaml-quickjs. 2 + 3 + This runner executes ECMAScript Test262 conformance tests against 4 + our JavaScript implementation. *) 5 + 6 + type result = 7 + | Pass 8 + | Fail of string 9 + | Skip of string 10 + | Timeout 11 + | Error of string 12 + 13 + type test_metadata = { 14 + description : string; 15 + es5id : string option; 16 + es6id : string option; 17 + esid : string option; 18 + info : string option; 19 + negative : negative option; 20 + includes : string list; 21 + flags : string list; 22 + features : string list; 23 + locale : string list; 24 + } 25 + 26 + and negative = { 27 + phase : string; (* "parse", "resolution", "runtime" *) 28 + type_ : string; (* Error type expected *) 29 + } 30 + 31 + type config = { 32 + test_dir : string; 33 + harness_dir : string; 34 + timeout_ms : int; 35 + features : string list; 36 + skip_patterns : string list; 37 + verbose : bool; 38 + } 39 + 40 + let default_config = { 41 + test_dir = "test262/test"; 42 + harness_dir = "test262/harness"; 43 + timeout_ms = 10000; 44 + features = []; 45 + skip_patterns = []; 46 + verbose = false; 47 + } 48 + 49 + (* Parse YAML frontmatter from test file *) 50 + let parse_metadata content = 51 + let default = { 52 + description = ""; 53 + es5id = None; 54 + es6id = None; 55 + esid = None; 56 + info = None; 57 + negative = None; 58 + includes = []; 59 + flags = []; 60 + features = []; 61 + locale = []; 62 + } in 63 + (* Find /*--- ... ---*/ block *) 64 + match String.index_opt content '/' with 65 + | None -> default 66 + | Some start -> 67 + if start + 4 < String.length content && 68 + String.sub content start 4 = "/*--" then 69 + let end_marker = "---*/" in 70 + match Str.search_forward (Str.regexp_string end_marker) content (start + 4) with 71 + | exception Not_found -> default 72 + | end_pos -> 73 + let yaml_content = String.sub content (start + 5) (end_pos - start - 5) in 74 + (* Simple YAML-like parsing - just extract key fields *) 75 + let lines = String.split_on_char '\n' yaml_content in 76 + let rec parse_lines acc = function 77 + | [] -> acc 78 + | line :: rest -> 79 + let line = String.trim line in 80 + if String.length line > 0 && line.[0] <> '-' then 81 + match String.index_opt line ':' with 82 + | None -> parse_lines acc rest 83 + | Some colon -> 84 + let key = String.trim (String.sub line 0 colon) in 85 + let value = String.trim (String.sub line (colon + 1) (String.length line - colon - 1)) in 86 + let acc = match key with 87 + | "description" -> { acc with description = value } 88 + | "es5id" -> { acc with es5id = Some value } 89 + | "es6id" -> { acc with es6id = Some value } 90 + | "esid" -> { acc with esid = Some value } 91 + | _ -> acc 92 + in 93 + parse_lines acc rest 94 + else 95 + parse_lines acc rest 96 + in 97 + parse_lines default lines 98 + else default 99 + 100 + (* Check if test should be skipped *) 101 + let should_skip (config : config) (filename : string) (metadata : test_metadata) : string option = 102 + (* Skip if uses unsupported features *) 103 + let unsupported_features = [ 104 + (* We can incrementally add support and remove from this list *) 105 + "Atomics"; "SharedArrayBuffer"; 106 + (* Platform-specific *) 107 + "caller"; "Intl"; 108 + ] in 109 + let uses_unsupported = List.exists (fun f -> 110 + List.mem f unsupported_features 111 + ) metadata.features in 112 + if uses_unsupported then 113 + Some ("Unsupported feature: " ^ String.concat ", " metadata.features) 114 + else 115 + (* Check skip patterns *) 116 + let matches_skip = List.exists (fun pattern -> 117 + Str.string_match (Str.regexp pattern) filename 0 118 + ) config.skip_patterns in 119 + if matches_skip then Some "Matched skip pattern" 120 + else None 121 + 122 + (* Load harness file *) 123 + let load_harness config name = 124 + let path = Filename.concat config.harness_dir name in 125 + try 126 + let ic = open_in path in 127 + let n = in_channel_length ic in 128 + let s = really_input_string ic n in 129 + close_in ic; 130 + Some s 131 + with _ -> None 132 + 133 + (* Run a single test *) 134 + let run_test config filename = 135 + try 136 + let ic = open_in filename in 137 + let content = really_input_string ic (in_channel_length ic) in 138 + close_in ic; 139 + 140 + let metadata = parse_metadata content in 141 + 142 + (* Check if should skip *) 143 + match should_skip config filename metadata with 144 + | Some reason -> Skip reason 145 + | None -> 146 + (* For now, just try to lex the file as a basic test *) 147 + (try 148 + let lexer = Quickjs.Lexer.create ~filename ~content in 149 + let rec lex_all () = 150 + let tok = Quickjs.Lexer.next_token lexer in 151 + match tok.Quickjs.Token.tok with 152 + | Quickjs.Token.Eof -> () 153 + | _ -> lex_all () 154 + in 155 + lex_all (); 156 + (* If negative test expecting parse error, this is a failure *) 157 + (match metadata.negative with 158 + | Some { phase = "parse"; _ } -> Fail "Expected parse error but succeeded" 159 + | _ -> Pass) 160 + with 161 + | Quickjs.Lexer.Lexer_error (err, _loc) -> 162 + (* If negative test expecting this error, it's a pass *) 163 + (match metadata.negative with 164 + | Some { phase = "parse"; _ } -> Pass 165 + | _ -> Fail (Quickjs.Lexer.show_error err))) 166 + with 167 + | Sys_error msg -> Error msg 168 + | exn -> Error (Printexc.to_string exn) 169 + 170 + (* Collect all test files *) 171 + let collect_tests dir = 172 + let rec walk acc path = 173 + if Sys.is_directory path then 174 + let entries = Sys.readdir path in 175 + Array.fold_left (fun acc entry -> 176 + walk acc (Filename.concat path entry) 177 + ) acc entries 178 + else if Filename.check_suffix path ".js" then 179 + path :: acc 180 + else 181 + acc 182 + in 183 + walk [] dir 184 + 185 + (* Run all tests and collect results *) 186 + let run_all config = 187 + let tests = collect_tests config.test_dir in 188 + let total = List.length tests in 189 + let pass = ref 0 in 190 + let fail = ref 0 in 191 + let skip = ref 0 in 192 + let error = ref 0 in 193 + 194 + List.iteri (fun i filename -> 195 + if config.verbose then 196 + Printf.printf "[%d/%d] %s..." (i + 1) total filename; 197 + 198 + let result = run_test config filename in 199 + 200 + (match result with 201 + | Pass -> 202 + incr pass; 203 + if config.verbose then print_endline " PASS" 204 + | Fail msg -> 205 + incr fail; 206 + if config.verbose then Printf.printf " FAIL: %s\n" msg 207 + else Printf.printf "FAIL: %s - %s\n" filename msg 208 + | Skip msg -> 209 + incr skip; 210 + if config.verbose then Printf.printf " SKIP: %s\n" msg 211 + | Timeout -> 212 + incr fail; 213 + if config.verbose then print_endline " TIMEOUT" 214 + | Error msg -> 215 + incr error; 216 + if config.verbose then Printf.printf " ERROR: %s\n" msg); 217 + ) tests; 218 + 219 + Printf.printf "\n=== Test262 Results ===\n"; 220 + Printf.printf "Total: %d\n" total; 221 + Printf.printf "Pass: %d (%.1f%%)\n" !pass (100.0 *. float !pass /. float total); 222 + Printf.printf "Fail: %d (%.1f%%)\n" !fail (100.0 *. float !fail /. float total); 223 + Printf.printf "Skip: %d (%.1f%%)\n" !skip (100.0 *. float !skip /. float total); 224 + Printf.printf "Error: %d (%.1f%%)\n" !error (100.0 *. float !error /. float total); 225 + 226 + if !fail = 0 && !error = 0 then 0 else 1 227 + 228 + (* Command line interface *) 229 + let () = 230 + let test_dir = ref default_config.test_dir in 231 + let harness_dir = ref default_config.harness_dir in 232 + let verbose = ref false in 233 + let single_test = ref None in 234 + 235 + let usage = "qjs-test262 [options]" in 236 + let speclist = [ 237 + ("--test-dir", Arg.Set_string test_dir, "Test262 test directory"); 238 + ("--harness-dir", Arg.Set_string harness_dir, "Test262 harness directory"); 239 + ("--verbose", Arg.Set verbose, "Verbose output"); 240 + ("--test", Arg.String (fun s -> single_test := Some s), "Run single test file"); 241 + ] in 242 + 243 + Arg.parse speclist (fun _ -> ()) usage; 244 + 245 + let config = { 246 + default_config with 247 + test_dir = !test_dir; 248 + harness_dir = !harness_dir; 249 + verbose = !verbose; 250 + } in 251 + 252 + let exit_code = match !single_test with 253 + | Some filename -> 254 + (match run_test config filename with 255 + | Pass -> print_endline "PASS"; 0 256 + | Fail msg -> Printf.printf "FAIL: %s\n" msg; 1 257 + | Skip msg -> Printf.printf "SKIP: %s\n" msg; 0 258 + | Timeout -> print_endline "TIMEOUT"; 1 259 + | Error msg -> Printf.printf "ERROR: %s\n" msg; 1) 260 + | None -> 261 + run_all config 262 + in 263 + exit exit_code