···11+1.14.0 (16-Sep-2025)
22+--------------------
33+44+* Thread-safety with OCaml 5 (#574).
55+66+* Introduce [Re.Pcre.get_named_substring_opt]. A non raising version of
77+ [Re.Pcre.get_named_substring] (#525)
88+99+* Introduce parsing functions in `Re.{Perl,Pcre,Emacs,Glob}` that return a
1010+ result instead of raising. (#542)
1111+1212+* Introduce experimental streaming API `Re.Stream`. (#456)
1313+1414+* Make [Re.Str] functions tail recursive (#539)
1515+1616+* Fix [Re.Pcre.split]. Regression introduced in 1.12 and a previous bug with
1717+ [Re.Pcre.split] (#538).
1818+1919+* Avoid parsing unnecessary patterns supported only by `Re.Emacs` in `Re.Str`
2020+ (#563)
2121+2222+1.13.1 (30-Sep-2024)
2323+--------------------
2424+2525+* Fix re on jsoo (#150)
2626+2727+1.13.0 (30-Sep-2024)
2828+--------------------
2929+3030+* Add non raising versions of all [Re.Group] functions (#414, fixes #150)
3131+3232+* Add support for hex and octal of the form: `\o{...}` and `\x{...}` (#403)
3333+3434+* Add support for octal characters using `\0dd` and `\ddd` (#402)
3535+3636+* Add support for `\Q...\E` quoted expressions in Pcre and Perl syntax (#401)
3737+3838+* Re.execp and related function raise [Invalid_argument "$function"] when [pos]
3939+ or [len] arguments are out of bounds. In 1.12.0, a regerssion was introduced
4040+ that raised [Invalid_argument _] from [String.get].
4141+4242+1.12.0 (29-Aug-2024)
4343+--------------------
4444+4545+* Add `Re.split_delim` (#233)
4646+* Fix handling of empty matches in splitting and substitution functions (#233)
4747+* Add support for character classes in `Re.Posix` (#263)
4848+4949+1.11.0 (19-Aug-2023)
5050+--------------------
5151+5252+* Add `Re.group_count` to get the number of groups in a compiled regex (#218)
5353+* Add `Re.exec_partial_detailed` to allow resuming searches from partial inputs
5454+ (#219)
5555+* Re-export `Re.Perl`'s `Parse_error` and `Not_supported` exceptions
5656+ in Pcre (#222)
5757+* Add support for `DOTALL` flag in `Re.Pcre.regexp` (#225)
5858+* Add support for named groups (#223)
5959+* Add support for some control characters in `Re.Perl` (#227)
6060+6161+1.10.4 (27-Apr-2022)
6262+--------------------
6363+6464+* Improve handling of word boundaries (#179)
6565+6666+1.10.3 (13-Sep-2021)
6767+--------------------
6868+6969+* Glob: change optional argument `?backslash_escapes` to `?match_backslashes`.
7070+ The interpretation of backslashes in the glob pattern remains unchanged with
7171+ the new option, but forward slashes match backslashes when activated (#199)
7272+7373+1.10.2 (09-Sep-2021)
7474+--------------------
7575+7676+* Fix missing aliases introduced in 1.10.1
7777+7878+1.10.1 (08-Sep-2021)
7979+--------------------
8080+8181+* Glob: add optional argument `?backslash_escapes` to control interpretation of
8282+ backslashes (useful under Windows) (#197, #198)
8383+8484+* Restore accidentally deleted `*_seq` deprecated aliases.
8585+8686+1.10.0 (25-Aug-2021)
8787+--------------------
8888+8989+* Add the `[:alpha:]` character class in `Re.Perl` (#169)
9090+* Double asterisk (`**`) in `Re.Glob` (#172)
9191+ Like `*` but also match `/` characters when `pathname` is set.
9292+* Double asterisk should match 0 or more directories unless in trailing
9393+ position. (#192, fixes #185)
9494+9595+1.9.0 (05-Apr-2019)
9696+-------------------
9797+9898+* Fix regression in `Re.exec_partial` (#164)
9999+* Mov gen related functions to `Re.Gen` and deprecate the old names (#167)
100100+* Introduce `Re.View` that exposes the internal representation (#163)
101101+102102+1.8.0 (04-Aug-2018)
103103+-------------------
104104+105105+* Fix index-out-of-bounds exception in Re.Perl.re (#160)
106106+* Add seq based iterators (#170)
107107+108108+1.7.3 (05-Mar-2018)
109109+-------------------
110110+111111+* Remove dependency on bytes package (#155)
112112+113113+1.7.2 (01-Mar-2018)
114114+-------------------
115115+116116+* Deprecate all Re_* modules. Re_x is now available as Re.X
117117+* Deprecate all re.x sub libraries. Those are all available as Re.X
118118+* Make all function in Re.Str tail recursive.
119119+120120+1.7.1 (19-Oct-2016)
121121+-------------------
122122+123123+* Fix Re_str.global_replace (#132)
124124+125125+1.7.0 (18-Sep-2016)
126126+-------------------
127127+128128+* Fix stack overflow in Re_str.full_split
129129+* Use correct exceptions in Re_str group functions
130130+* Add experimental Re.witness
131131+* Add experimental Re.Group.nb_groups
132132+133133+1.6.1 (20-Jun-2016)
134134+-------------------
135135+136136+* Fix Re.pp (#101)
137137+* Add Re.Group.pp (#102)
138138+139139+1.6.0 (30-May-2016)
140140+-------------------
141141+142142+* Add Re.pp and Re.pp_re (#55)
143143+* Fix ocamldoc syntax (#87)
144144+145145+1.5.0 (04-Jan-2016)
146146+-------------------
147147+148148+* Add Re.exec_opt. Like exec but doesn't raise
149149+* Add Group module. Old group accessors are deprecated.
150150+* Add Mark module
151151+* Improve docs of Re.repn
152152+* Improve docs of Re_pcre
153153+* Fix doc of Re_pcre.match
154154+* Consolidate variants of Re.glob that takes options to modify its behavior
155155+ (?period, ?expand_braces). Old variants are deprecated.
156156+* New option ?pathname added for Re_glob.glob. Controls how the `/` character
157157+ is matched
158158+159159+1.4.1 (06-Jun-2015)
160160+-------------------
161161+162162+* Fix 4.00.1 compatibilty with tests.
163163+164164+1.4.0 (12-May-2015)
165165+-------------------
166166+167167+* Add Re.{mark,marked,mark_set}. Regexps can now be "marked" to query post
168168+ execution if they matched.
169169+170170+1.3.2 (14-Apr-2015)
171171+-------------------
172172+173173+* Fix replacing 0 length matches (#55)
174174+175175+1.3.1 (13-Mar-2015)
176176+-------------------
177177+178178+* Rename {Cset, Automata} to {Re_cset, Re_automata}
179179+180180+1.3.0 (02-Feb-2015)
181181+-------------------
182182+183183+* Add Re.split{,_gen,_token,_full,_full_gen}
184184+* Add Re.replace{,_string}
185185+* Add Re.all{,_gen}
186186+* Add posix classes of the form [:xxx:]
187187+* Add complement suport for posix classes
188188+* Add Multiline and anchored flag to Re_pcre
189189+* Add Re_pcre.full_split
190190+191191+1.2.2 (05-May-2014)
192192+-------------------
193193+194194+* Add a Re.whole_string convenience function to only match whole strings
195195+* Add a ?anchored parameter to functions in Re_glob to specify whole
196196+ string matching
197197+* Document Re_glob module
198198+* Fix compilation of submatches occurring inside a Kleen star
199199+* Fix word boundary matching
200200+* Fix definition of Re.xdigit
201201+* Fix Re.exec_partial function
202202+* Fix compilation of patterns of the shape r1r2|r1r3
203203+* Fixed compilation of re.cmxs (Vincent Bernardoff)
204204+* Improved matching of anchored regular expressions: stop as soon as
205205+ we know there cannot possibly be any match.
206206+* Updated to OASIS 0.4.x (Vincent Bernardoff)
207207+* Add the linking exception to the license
208208+209209+1.2.1 (07-Apr-2013)
210210+-------------------
211211+212212+* Correct OASIS metadata (Christophe Troestler).
213213+* Fix typo in Invalid_arg error message (Jeremy Yallop).
214214+215215+1.2.0 (15-Jan-2012)
216216+-------------------
217217+218218+* Rename Pcre module to `Re_pcre` to make it more suitable for
219219+ upstream packaging (it currently conflicts with the `Pcre` package).
220220+ (Mehdi Dogguy).
221221+222222+1.1.0 (05-Sep-2012)
223223+-------------------
224224+225225+* Add a basic Pcre wrapper around Re_perl for porting applications using that
226226+ API (Thomas Gazagnaire).
227227+228228+1.0.0 (01-Aug-2012)
229229+-------------------
230230+231231+* Initial public release.
+523
vendor/opam/re/LICENSE.md
···11+This Software is distributed under the terms of the GNU Lesser
22+General Public License version 2.1 (included below), or (at your
33+option) any later version.
44+55+As a special exception to the GNU Library General Public License, you
66+may link, statically or dynamically, a "work that uses the Library"
77+with a publicly distributed version of the Library to produce an
88+executable file containing portions of the Library, and distribute
99+that executable file under terms of your choice, without any of the
1010+additional requirements listed in clause 6 of the GNU Library General
1111+Public License. By "a publicly distributed version of the Library",
1212+we mean either the unmodified Library, or a modified version of the
1313+Library that is distributed under the conditions defined in clause 3
1414+of the GNU Library General Public License. This exception does not
1515+however invalidate any other reasons why the executable file might be
1616+covered by the GNU Library General Public License.
1717+1818+----------------------------------------------------------------------
1919+2020+ GNU LESSER GENERAL PUBLIC LICENSE
2121+ Version 2.1, February 1999
2222+2323+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
2424+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2525+ Everyone is permitted to copy and distribute verbatim copies
2626+ of this license document, but changing it is not allowed.
2727+2828+[This is the first released version of the Lesser GPL. It also counts
2929+ as the successor of the GNU Library Public License, version 2, hence
3030+ the version number 2.1.]
3131+3232+ Preamble
3333+3434+ The licenses for most software are designed to take away your
3535+freedom to share and change it. By contrast, the GNU General Public
3636+Licenses are intended to guarantee your freedom to share and change
3737+free software--to make sure the software is free for all its users.
3838+3939+ This license, the Lesser General Public License, applies to some
4040+specially designated software packages--typically libraries--of the
4141+Free Software Foundation and other authors who decide to use it. You
4242+can use it too, but we suggest you first think carefully about whether
4343+this license or the ordinary General Public License is the better
4444+strategy to use in any particular case, based on the explanations below.
4545+4646+ When we speak of free software, we are referring to freedom of use,
4747+not price. Our General Public Licenses are designed to make sure that
4848+you have the freedom to distribute copies of free software (and charge
4949+for this service if you wish); that you receive source code or can get
5050+it if you want it; that you can change the software and use pieces of
5151+it in new free programs; and that you are informed that you can do
5252+these things.
5353+5454+ To protect your rights, we need to make restrictions that forbid
5555+distributors to deny you these rights or to ask you to surrender these
5656+rights. These restrictions translate to certain responsibilities for
5757+you if you distribute copies of the library or if you modify it.
5858+5959+ For example, if you distribute copies of the library, whether gratis
6060+or for a fee, you must give the recipients all the rights that we gave
6161+you. You must make sure that they, too, receive or can get the source
6262+code. If you link other code with the library, you must provide
6363+complete object files to the recipients, so that they can relink them
6464+with the library after making changes to the library and recompiling
6565+it. And you must show them these terms so they know their rights.
6666+6767+ We protect your rights with a two-step method: (1) we copyright the
6868+library, and (2) we offer you this license, which gives you legal
6969+permission to copy, distribute and/or modify the library.
7070+7171+ To protect each distributor, we want to make it very clear that
7272+there is no warranty for the free library. Also, if the library is
7373+modified by someone else and passed on, the recipients should know
7474+that what they have is not the original version, so that the original
7575+author's reputation will not be affected by problems that might be
7676+introduced by others.
7777+7878+ Finally, software patents pose a constant threat to the existence of
7979+any free program. We wish to make sure that a company cannot
8080+effectively restrict the users of a free program by obtaining a
8181+restrictive license from a patent holder. Therefore, we insist that
8282+any patent license obtained for a version of the library must be
8383+consistent with the full freedom of use specified in this license.
8484+8585+ Most GNU software, including some libraries, is covered by the
8686+ordinary GNU General Public License. This license, the GNU Lesser
8787+General Public License, applies to certain designated libraries, and
8888+is quite different from the ordinary General Public License. We use
8989+this license for certain libraries in order to permit linking those
9090+libraries into non-free programs.
9191+9292+ When a program is linked with a library, whether statically or using
9393+a shared library, the combination of the two is legally speaking a
9494+combined work, a derivative of the original library. The ordinary
9595+General Public License therefore permits such linking only if the
9696+entire combination fits its criteria of freedom. The Lesser General
9797+Public License permits more lax criteria for linking other code with
9898+the library.
9999+100100+ We call this license the "Lesser" General Public License because it
101101+does Less to protect the user's freedom than the ordinary General
102102+Public License. It also provides other free software developers Less
103103+of an advantage over competing non-free programs. These disadvantages
104104+are the reason we use the ordinary General Public License for many
105105+libraries. However, the Lesser license provides advantages in certain
106106+special circumstances.
107107+108108+ For example, on rare occasions, there may be a special need to
109109+encourage the widest possible use of a certain library, so that it becomes
110110+a de-facto standard. To achieve this, non-free programs must be
111111+allowed to use the library. A more frequent case is that a free
112112+library does the same job as widely used non-free libraries. In this
113113+case, there is little to gain by limiting the free library to free
114114+software only, so we use the Lesser General Public License.
115115+116116+ In other cases, permission to use a particular library in non-free
117117+programs enables a greater number of people to use a large body of
118118+free software. For example, permission to use the GNU C Library in
119119+non-free programs enables many more people to use the whole GNU
120120+operating system, as well as its variant, the GNU/Linux operating
121121+system.
122122+123123+ Although the Lesser General Public License is Less protective of the
124124+users' freedom, it does ensure that the user of a program that is
125125+linked with the Library has the freedom and the wherewithal to run
126126+that program using a modified version of the Library.
127127+128128+ The precise terms and conditions for copying, distribution and
129129+modification follow. Pay close attention to the difference between a
130130+"work based on the library" and a "work that uses the library". The
131131+former contains code derived from the library, whereas the latter must
132132+be combined with the library in order to run.
133133+134134+ GNU LESSER GENERAL PUBLIC LICENSE
135135+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
136136+137137+ 0. This License Agreement applies to any software library or other
138138+program which contains a notice placed by the copyright holder or
139139+other authorized party saying it may be distributed under the terms of
140140+this Lesser General Public License (also called "this License").
141141+Each licensee is addressed as "you".
142142+143143+ A "library" means a collection of software functions and/or data
144144+prepared so as to be conveniently linked with application programs
145145+(which use some of those functions and data) to form executables.
146146+147147+ The "Library", below, refers to any such software library or work
148148+which has been distributed under these terms. A "work based on the
149149+Library" means either the Library or any derivative work under
150150+copyright law: that is to say, a work containing the Library or a
151151+portion of it, either verbatim or with modifications and/or translated
152152+straightforwardly into another language. (Hereinafter, translation is
153153+included without limitation in the term "modification".)
154154+155155+ "Source code" for a work means the preferred form of the work for
156156+making modifications to it. For a library, complete source code means
157157+all the source code for all modules it contains, plus any associated
158158+interface definition files, plus the scripts used to control compilation
159159+and installation of the library.
160160+161161+ Activities other than copying, distribution and modification are not
162162+covered by this License; they are outside its scope. The act of
163163+running a program using the Library is not restricted, and output from
164164+such a program is covered only if its contents constitute a work based
165165+on the Library (independent of the use of the Library in a tool for
166166+writing it). Whether that is true depends on what the Library does
167167+and what the program that uses the Library does.
168168+169169+ 1. You may copy and distribute verbatim copies of the Library's
170170+complete source code as you receive it, in any medium, provided that
171171+you conspicuously and appropriately publish on each copy an
172172+appropriate copyright notice and disclaimer of warranty; keep intact
173173+all the notices that refer to this License and to the absence of any
174174+warranty; and distribute a copy of this License along with the
175175+Library.
176176+177177+ You may charge a fee for the physical act of transferring a copy,
178178+and you may at your option offer warranty protection in exchange for a
179179+fee.
180180+181181+ 2. You may modify your copy or copies of the Library or any portion
182182+of it, thus forming a work based on the Library, and copy and
183183+distribute such modifications or work under the terms of Section 1
184184+above, provided that you also meet all of these conditions:
185185+186186+ a) The modified work must itself be a software library.
187187+188188+ b) You must cause the files modified to carry prominent notices
189189+ stating that you changed the files and the date of any change.
190190+191191+ c) You must cause the whole of the work to be licensed at no
192192+ charge to all third parties under the terms of this License.
193193+194194+ d) If a facility in the modified Library refers to a function or a
195195+ table of data to be supplied by an application program that uses
196196+ the facility, other than as an argument passed when the facility
197197+ is invoked, then you must make a good faith effort to ensure that,
198198+ in the event an application does not supply such function or
199199+ table, the facility still operates, and performs whatever part of
200200+ its purpose remains meaningful.
201201+202202+ (For example, a function in a library to compute square roots has
203203+ a purpose that is entirely well-defined independent of the
204204+ application. Therefore, Subsection 2d requires that any
205205+ application-supplied function or table used by this function must
206206+ be optional: if the application does not supply it, the square
207207+ root function must still compute square roots.)
208208+209209+These requirements apply to the modified work as a whole. If
210210+identifiable sections of that work are not derived from the Library,
211211+and can be reasonably considered independent and separate works in
212212+themselves, then this License, and its terms, do not apply to those
213213+sections when you distribute them as separate works. But when you
214214+distribute the same sections as part of a whole which is a work based
215215+on the Library, the distribution of the whole must be on the terms of
216216+this License, whose permissions for other licensees extend to the
217217+entire whole, and thus to each and every part regardless of who wrote
218218+it.
219219+220220+Thus, it is not the intent of this section to claim rights or contest
221221+your rights to work written entirely by you; rather, the intent is to
222222+exercise the right to control the distribution of derivative or
223223+collective works based on the Library.
224224+225225+In addition, mere aggregation of another work not based on the Library
226226+with the Library (or with a work based on the Library) on a volume of
227227+a storage or distribution medium does not bring the other work under
228228+the scope of this License.
229229+230230+ 3. You may opt to apply the terms of the ordinary GNU General Public
231231+License instead of this License to a given copy of the Library. To do
232232+this, you must alter all the notices that refer to this License, so
233233+that they refer to the ordinary GNU General Public License, version 2,
234234+instead of to this License. (If a newer version than version 2 of the
235235+ordinary GNU General Public License has appeared, then you can specify
236236+that version instead if you wish.) Do not make any other change in
237237+these notices.
238238+239239+ Once this change is made in a given copy, it is irreversible for
240240+that copy, so the ordinary GNU General Public License applies to all
241241+subsequent copies and derivative works made from that copy.
242242+243243+ This option is useful when you wish to copy part of the code of
244244+the Library into a program that is not a library.
245245+246246+ 4. You may copy and distribute the Library (or a portion or
247247+derivative of it, under Section 2) in object code or executable form
248248+under the terms of Sections 1 and 2 above provided that you accompany
249249+it with the complete corresponding machine-readable source code, which
250250+must be distributed under the terms of Sections 1 and 2 above on a
251251+medium customarily used for software interchange.
252252+253253+ If distribution of object code is made by offering access to copy
254254+from a designated place, then offering equivalent access to copy the
255255+source code from the same place satisfies the requirement to
256256+distribute the source code, even though third parties are not
257257+compelled to copy the source along with the object code.
258258+259259+ 5. A program that contains no derivative of any portion of the
260260+Library, but is designed to work with the Library by being compiled or
261261+linked with it, is called a "work that uses the Library". Such a
262262+work, in isolation, is not a derivative work of the Library, and
263263+therefore falls outside the scope of this License.
264264+265265+ However, linking a "work that uses the Library" with the Library
266266+creates an executable that is a derivative of the Library (because it
267267+contains portions of the Library), rather than a "work that uses the
268268+library". The executable is therefore covered by this License.
269269+Section 6 states terms for distribution of such executables.
270270+271271+ When a "work that uses the Library" uses material from a header file
272272+that is part of the Library, the object code for the work may be a
273273+derivative work of the Library even though the source code is not.
274274+Whether this is true is especially significant if the work can be
275275+linked without the Library, or if the work is itself a library. The
276276+threshold for this to be true is not precisely defined by law.
277277+278278+ If such an object file uses only numerical parameters, data
279279+structure layouts and accessors, and small macros and small inline
280280+functions (ten lines or less in length), then the use of the object
281281+file is unrestricted, regardless of whether it is legally a derivative
282282+work. (Executables containing this object code plus portions of the
283283+Library will still fall under Section 6.)
284284+285285+ Otherwise, if the work is a derivative of the Library, you may
286286+distribute the object code for the work under the terms of Section 6.
287287+Any executables containing that work also fall under Section 6,
288288+whether or not they are linked directly with the Library itself.
289289+290290+ 6. As an exception to the Sections above, you may also combine or
291291+link a "work that uses the Library" with the Library to produce a
292292+work containing portions of the Library, and distribute that work
293293+under terms of your choice, provided that the terms permit
294294+modification of the work for the customer's own use and reverse
295295+engineering for debugging such modifications.
296296+297297+ You must give prominent notice with each copy of the work that the
298298+Library is used in it and that the Library and its use are covered by
299299+this License. You must supply a copy of this License. If the work
300300+during execution displays copyright notices, you must include the
301301+copyright notice for the Library among them, as well as a reference
302302+directing the user to the copy of this License. Also, you must do one
303303+of these things:
304304+305305+ a) Accompany the work with the complete corresponding
306306+ machine-readable source code for the Library including whatever
307307+ changes were used in the work (which must be distributed under
308308+ Sections 1 and 2 above); and, if the work is an executable linked
309309+ with the Library, with the complete machine-readable "work that
310310+ uses the Library", as object code and/or source code, so that the
311311+ user can modify the Library and then relink to produce a modified
312312+ executable containing the modified Library. (It is understood
313313+ that the user who changes the contents of definitions files in the
314314+ Library will not necessarily be able to recompile the application
315315+ to use the modified definitions.)
316316+317317+ b) Use a suitable shared library mechanism for linking with the
318318+ Library. A suitable mechanism is one that (1) uses at run time a
319319+ copy of the library already present on the user's computer system,
320320+ rather than copying library functions into the executable, and (2)
321321+ will operate properly with a modified version of the library, if
322322+ the user installs one, as long as the modified version is
323323+ interface-compatible with the version that the work was made with.
324324+325325+ c) Accompany the work with a written offer, valid for at
326326+ least three years, to give the same user the materials
327327+ specified in Subsection 6a, above, for a charge no more
328328+ than the cost of performing this distribution.
329329+330330+ d) If distribution of the work is made by offering access to copy
331331+ from a designated place, offer equivalent access to copy the above
332332+ specified materials from the same place.
333333+334334+ e) Verify that the user has already received a copy of these
335335+ materials or that you have already sent this user a copy.
336336+337337+ For an executable, the required form of the "work that uses the
338338+Library" must include any data and utility programs needed for
339339+reproducing the executable from it. However, as a special exception,
340340+the materials to be distributed need not include anything that is
341341+normally distributed (in either source or binary form) with the major
342342+components (compiler, kernel, and so on) of the operating system on
343343+which the executable runs, unless that component itself accompanies
344344+the executable.
345345+346346+ It may happen that this requirement contradicts the license
347347+restrictions of other proprietary libraries that do not normally
348348+accompany the operating system. Such a contradiction means you cannot
349349+use both them and the Library together in an executable that you
350350+distribute.
351351+352352+ 7. You may place library facilities that are a work based on the
353353+Library side-by-side in a single library together with other library
354354+facilities not covered by this License, and distribute such a combined
355355+library, provided that the separate distribution of the work based on
356356+the Library and of the other library facilities is otherwise
357357+permitted, and provided that you do these two things:
358358+359359+ a) Accompany the combined library with a copy of the same work
360360+ based on the Library, uncombined with any other library
361361+ facilities. This must be distributed under the terms of the
362362+ Sections above.
363363+364364+ b) Give prominent notice with the combined library of the fact
365365+ that part of it is a work based on the Library, and explaining
366366+ where to find the accompanying uncombined form of the same work.
367367+368368+ 8. You may not copy, modify, sublicense, link with, or distribute
369369+the Library except as expressly provided under this License. Any
370370+attempt otherwise to copy, modify, sublicense, link with, or
371371+distribute the Library is void, and will automatically terminate your
372372+rights under this License. However, parties who have received copies,
373373+or rights, from you under this License will not have their licenses
374374+terminated so long as such parties remain in full compliance.
375375+376376+ 9. You are not required to accept this License, since you have not
377377+signed it. However, nothing else grants you permission to modify or
378378+distribute the Library or its derivative works. These actions are
379379+prohibited by law if you do not accept this License. Therefore, by
380380+modifying or distributing the Library (or any work based on the
381381+Library), you indicate your acceptance of this License to do so, and
382382+all its terms and conditions for copying, distributing or modifying
383383+the Library or works based on it.
384384+385385+ 10. Each time you redistribute the Library (or any work based on the
386386+Library), the recipient automatically receives a license from the
387387+original licensor to copy, distribute, link with or modify the Library
388388+subject to these terms and conditions. You may not impose any further
389389+restrictions on the recipients' exercise of the rights granted herein.
390390+You are not responsible for enforcing compliance by third parties with
391391+this License.
392392+393393+ 11. If, as a consequence of a court judgment or allegation of patent
394394+infringement or for any other reason (not limited to patent issues),
395395+conditions are imposed on you (whether by court order, agreement or
396396+otherwise) that contradict the conditions of this License, they do not
397397+excuse you from the conditions of this License. If you cannot
398398+distribute so as to satisfy simultaneously your obligations under this
399399+License and any other pertinent obligations, then as a consequence you
400400+may not distribute the Library at all. For example, if a patent
401401+license would not permit royalty-free redistribution of the Library by
402402+all those who receive copies directly or indirectly through you, then
403403+the only way you could satisfy both it and this License would be to
404404+refrain entirely from distribution of the Library.
405405+406406+If any portion of this section is held invalid or unenforceable under any
407407+particular circumstance, the balance of the section is intended to apply,
408408+and the section as a whole is intended to apply in other circumstances.
409409+410410+It is not the purpose of this section to induce you to infringe any
411411+patents or other property right claims or to contest validity of any
412412+such claims; this section has the sole purpose of protecting the
413413+integrity of the free software distribution system which is
414414+implemented by public license practices. Many people have made
415415+generous contributions to the wide range of software distributed
416416+through that system in reliance on consistent application of that
417417+system; it is up to the author/donor to decide if he or she is willing
418418+to distribute software through any other system and a licensee cannot
419419+impose that choice.
420420+421421+This section is intended to make thoroughly clear what is believed to
422422+be a consequence of the rest of this License.
423423+424424+ 12. If the distribution and/or use of the Library is restricted in
425425+certain countries either by patents or by copyrighted interfaces, the
426426+original copyright holder who places the Library under this License may add
427427+an explicit geographical distribution limitation excluding those countries,
428428+so that distribution is permitted only in or among countries not thus
429429+excluded. In such case, this License incorporates the limitation as if
430430+written in the body of this License.
431431+432432+ 13. The Free Software Foundation may publish revised and/or new
433433+versions of the Lesser General Public License from time to time.
434434+Such new versions will be similar in spirit to the present version,
435435+but may differ in detail to address new problems or concerns.
436436+437437+Each version is given a distinguishing version number. If the Library
438438+specifies a version number of this License which applies to it and
439439+"any later version", you have the option of following the terms and
440440+conditions either of that version or of any later version published by
441441+the Free Software Foundation. If the Library does not specify a
442442+license version number, you may choose any version ever published by
443443+the Free Software Foundation.
444444+445445+ 14. If you wish to incorporate parts of the Library into other free
446446+programs whose distribution conditions are incompatible with these,
447447+write to the author to ask for permission. For software which is
448448+copyrighted by the Free Software Foundation, write to the Free
449449+Software Foundation; we sometimes make exceptions for this. Our
450450+decision will be guided by the two goals of preserving the free status
451451+of all derivatives of our free software and of promoting the sharing
452452+and reuse of software generally.
453453+454454+ NO WARRANTY
455455+456456+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
457457+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
458458+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
459459+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
460460+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
461461+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
462462+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
463463+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
464464+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
465465+466466+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
467467+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
468468+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
469469+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
470470+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
471471+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
472472+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
473473+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
474474+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
475475+DAMAGES.
476476+477477+ END OF TERMS AND CONDITIONS
478478+479479+ How to Apply These Terms to Your New Libraries
480480+481481+ If you develop a new library, and you want it to be of the greatest
482482+possible use to the public, we recommend making it free software that
483483+everyone can redistribute and change. You can do so by permitting
484484+redistribution under these terms (or, alternatively, under the terms of the
485485+ordinary General Public License).
486486+487487+ To apply these terms, attach the following notices to the library. It is
488488+safest to attach them to the start of each source file to most effectively
489489+convey the exclusion of warranty; and each file should have at least the
490490+"copyright" line and a pointer to where the full notice is found.
491491+492492+ <one line to give the library's name and a brief idea of what it does.>
493493+ Copyright (C) <year> <name of author>
494494+495495+ This library is free software; you can redistribute it and/or
496496+ modify it under the terms of the GNU Lesser General Public
497497+ License as published by the Free Software Foundation; either
498498+ version 2 of the License, or (at your option) any later version.
499499+500500+ This library is distributed in the hope that it will be useful,
501501+ but WITHOUT ANY WARRANTY; without even the implied warranty of
502502+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
503503+ Lesser General Public License for more details.
504504+505505+ You should have received a copy of the GNU Lesser General Public
506506+ License along with this library; if not, write to the Free Software
507507+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
508508+509509+Also add information on how to contact you by electronic and paper mail.
510510+511511+You should also get your employer (if you work as a programmer) or your
512512+school, if any, to sign a "copyright disclaimer" for the library, if
513513+necessary. Here is a sample; alter the names:
514514+515515+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
516516+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
517517+518518+ <signature of Ty Coon>, 1 April 1990
519519+ Ty Coon, President of Vice
520520+521521+That's all there is to it!
522522+523523+
···11+Description
22+===========
33+44+Re is a regular expression library for OCaml.
55+[](https://github.com/ocaml/ocaml-re/actions/workflows/main.yml)
66+77+Contact
88+=======
99+1010+This library has been written by Jerome Vouillon
1111+(Jerome.Vouillon@pps.univ-paris-diderot.fr).
1212+It can be downloaded from <https://github.com/ocaml/ocaml-re>
1313+1414+Bug reports, suggestions and contributions are welcome.
1515+1616+Features
1717+========
1818+1919+The following styles of regular expressions are supported:
2020+- Perl-style regular expressions (module `Re.Perl`);
2121+- Posix extended regular expressions (module `Re.Posix`);
2222+- Emacs-style regular expressions (module `Re.Emacs`);
2323+- Shell-style file globbing (module `Re.Glob`).
2424+2525+It is also possible to build regular expressions by combining simpler regular
2626+expressions (module `Re`).
2727+2828+The most notable missing features are **back-references** and
2929+look-ahead/look-behind **assertions**.
3030+3131+There is also a subset of the PCRE interface available in the `Re.Pcre` module.
3232+This makes it easier to port code from that library to Re with minimal changes.
3333+3434+Performances
3535+============
3636+3737+The matches are performed by lazily building a DFA (deterministic
3838+finite automaton) from the regular expression. As a consequence,
3939+matching takes linear time in the length of the matched string.
4040+4141+The compilation of patterns is slower than with libraries using
4242+back-tracking, such as PCRE. But, once a large enough part of the
4343+DFA is built, matching is extremely fast.
4444+4545+Of course, for some combinations of regular expression and string, the
4646+part of the DFA that needs to be build is so large that this point is
4747+never reached, and matching will be slow. This is not expected to
4848+happen often in practice, and actually a lot of expressions that
4949+behaves badly with a backtracking implementation are very efficient
5050+with this implementation.
5151+5252+The library is at the moment entirely written in OCaml. As a
5353+consequence, regular expression matching is much slower when the
5454+library is compiled to bytecode than when it is compiled to native
5555+code.
5656+5757+Here are some timing results (Pentium III 500Mhz):
5858+* Scanning a 1Mb string containing only `a`s, except for the last
5959+ character which is a `b`, searching for the pattern `aa?b`
6060+ (repeated 100 times):
6161+ - RE: 2.6s
6262+ - PCRE: 68s
6363+* Regular expression example from http://www.bagley.org/~doug/shootout/ [1]
6464+ - RE: 0.43s
6565+ - PCRE: 3.68s
6666+6767+ [1] this page is no longer up but is available via the Internet Archive
6868+ http://web.archive.org/web/20010429190941/http://www.bagley.org/~doug/shootout/bench/regexmatch/
6969+7070+* The large regular expression (about 2000 characters long) that
7171+ Unison uses with my preference file to decide whether a file should
7272+ be ignored or not. This expression is matched against a filename
7373+ about 20000 times.
7474+ - RE: 0.31s
7575+ - PCRE: 3.7s
7676+ However, RE is only faster than PCRE when there are more than about
7777+ 300 filenames.
+81
vendor/opam/re/TODO.txt
···11+* To compile r{i,j} we need a sequence that does not match epsilon
22+ (or a constructor around an expression telling that this expression
33+ does not match epsilon)
44+* A subexpression repeated by an asterisk ( '*' ) or an interval
55+ expression shall not match a null expression unless this is the only
66+ match for the repetition or it is necessary to satisfy the exact or
77+ minimum number of occurrences for the interval expression.
88+* There might be a typo in deriv_1/delta_1: should we generate 'TMatch
99+ mark' or 'TMatch mark'? (neither is correct!)
1010+1111+POSIX:
1212+ "(a?)*" "b" ""
1313+ "(a?)*" "ab" "a"
1414+ "((a)|(b))*" "ab" -> "b" none "b"
1515+Str
1616+ "(a?)*" "b" no submatch
1717+ "(a?)*" "ab" "a"
1818+ "((a)|(b))*" "ab" -> "b" "a" "b"
1919+Javascript
2020+ "(a?)*" "b" no submatch
2121+ "(a?)*" "ab" "a"
2222+ "((a)|(b))*" "ab" -> "b" none "b"
2323+PCRE
2424+ "(a?)*" "b" ""
2525+ "(a?)*" "ab" ""
2626+ "(a?)*?" "b" ""
2727+ "(a?)*?" "ab" "a"
2828+ "((a)|(b))*" "ab" -> "b" "a" "b"
2929+Emacs
3030+ "(a?)*" "b" ""
3131+ "(a?)*" "ab" ""
3232+ "(a?)*?" "b" ""
3333+ "(a?)*?" "ab" "a"
3434+ "((a)|(b))*" "ab" -> "b" "a" "b"
3535+3636+3737+ r{0,0} = eps
3838+ r{i+1,j+1} = r,r{i,j}
3939+ r{0,j+1} = r,r{0,j} | eps PCRE/Emacs
4040+ r{0,j+1} = (r-eps},r{0,j} | eps JavaScript
4141+4242+* Rewrite sequences of sequences when possible...
4343+4444+High priority
4545+=============
4646+* Improve the Perl regular expressions parser
4747+* Character classes (in the three regular expression parsers)
4848+4949+* Reduce memory usage
5050+ - More compact representation of character sequences
5151+ - Special notation for "anything but this set of characters"
5252+ (more generally, optimize the compilation of regular expressions)
5353+* Simple optimisations
5454+ - alt containing alt
5555+ - epsilon elimination
5656+ - Seq (Seq (x,y), z) => Seq (x, Seq (y, z)) under some circumstances
5757+ (x or y has a fixed length)
5858+ ...
5959+6060+* Test suite
6161+6262+Medium priority
6363+===============
6464+* Implement back-references
6565+* Implement look-ahead and look-behind assertions
6666+6767+Low priority
6868+============
6969+* Optimize the main loop for processor that are not register starved
7070+* Rewrite the main loops in C
7171+ (but keep the option to compile a pure OCaml version)
7272+* Limit the size of the cached DFAs by removing states that have not
7373+ been used recently
7474+* Documentation
7575+7676+Other ideas
7777+===========
7878+* It would be great to have a more generic interface (parameterized
7979+ over some abstract tokens).
8080+* Compile checked printers parameterized over match groups (DRY for
8181+ literal subexpressions)
+222
vendor/opam/re/benchmarks/benchmark.ml
···11+open Core
22+open Core_bench
33+44+let str_20_zeroes = String.make 20 '0'
55+let re_20_zeroes = Re.(str str_20_zeroes)
66+77+let lots_of_a's =
88+ String.init 101 ~f:(function
99+ | 100 -> 'b'
1010+ | _ -> 'a')
1111+;;
1212+1313+let lots_o_a's_re = Re.(seq [ char 'a'; opt (char 'a'); char 'b' ])
1414+1515+let media_type_re =
1616+ let re = Re.Emacs.re ~case:true "[ \t]*\\([^ \t;]+\\)" in
1717+ Re.(seq [ start; re ])
1818+;;
1919+2020+(* Taken from https://github.com/rgrinberg/ocaml-uri/blob/903ef1010f9808d6f3f6d9c1fe4b4eabbd76082d/lib/uri.ml*)
2121+let uri_reference =
2222+ Re.Posix.re "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
2323+;;
2424+2525+let uris =
2626+ [ "https://google.com"
2727+ ; "http://yahoo.com/xxx/yyy?query=param&one=two"
2828+ ; "file:/random_crap"
2929+ ]
3030+;;
3131+3232+let benchmarks =
3333+ [ "20 zeroes", re_20_zeroes, [ str_20_zeroes ]
3434+ ; "lots of a's", lots_o_a's_re, [ lots_of_a's ]
3535+ ; "media type match", media_type_re, [ " foo/bar ; charset=UTF-8" ]
3636+ ; "uri", uri_reference, uris
3737+ ]
3838+;;
3939+4040+let test ~name re f =
4141+ [ Bench.Test.create ~name (fun () -> f re)
4242+ ; (let re () =
4343+ let re = lazy (re ()) in
4444+ Lazy.force re
4545+ in
4646+ Bench.Test.create ~name:(sprintf "%s (compiled)" name) (fun () -> f re))
4747+ ]
4848+;;
4949+5050+let exec_bench exec name (re : Re.t) cases =
5151+ Bench.Test.create_group
5252+ ~name
5353+ (List.concat_map cases ~f:(fun data ->
5454+ let name =
5555+ let len = String.length data in
5656+ if len > 70
5757+ then Printf.sprintf "%s .. (%d)" (String.sub data ~pos:0 ~len:10) len
5858+ else data
5959+ in
6060+ let re () = Re.compile re in
6161+ test ~name re (fun re -> ignore (exec (re ()) data))))
6262+;;
6363+6464+let exec_bench_many exec name re cases =
6565+ test
6666+ ~name
6767+ (fun () -> Re.compile re)
6868+ (fun re ->
6969+ let re = re () in
7070+ List.iter cases ~f:(fun x -> ignore (exec re x)))
7171+;;
7272+7373+let string_traversal =
7474+ let len = 1000 * 1000 in
7575+ let s = String.make len 'a' in
7676+ let re =
7777+ let re = Re.Pcre.re "aaaaaaaaaaaaaaaaz" in
7878+ fun () -> Re.compile re
7979+ in
8080+ test ~name:"string traversal from #210" re (fun re ->
8181+ ignore (Re.execp (re ()) s ~pos:0))
8282+;;
8383+8484+let compile_clean_star =
8585+ let c = 'c' in
8686+ let s = String.make 10_000 c in
8787+ let re = Re.rep (Re.char 'c') in
8888+ let re () = Re.compile re in
8989+ test ~name:"kleene star compilation" re (fun re -> ignore (Re.execp (re ()) s))
9090+;;
9191+9292+let repeated_sequence =
9393+ let s = String.init 256 ~f:Char.of_int_exn in
9494+ let re () = Re.repn (Re.str s) 50 (Some 50) |> Re.compile in
9595+ let s = List.init 50 ~f:(fun _ -> s) |> String.concat ~sep:"" in
9696+ test ~name:"repeated sequence re" re (fun re ->
9797+ let re = re () in
9898+ ignore (Re.execp re s))
9999+;;
100100+101101+let split =
102102+ let s = Bytes.make 1_000 '_' in
103103+ for i = 0 to 100 do
104104+ Bytes.set s (i * 9) ' '
105105+ done;
106106+ let s = Bytes.to_string s in
107107+ let re () = Re.(rep1 space |> compile) in
108108+ test ~name:"split on whitespace" re (fun re -> ignore (Re.split_full (re ()) s))
109109+;;
110110+111111+let prefixes =
112112+ let make_ext =
113113+ let chars = "abcdefghiklmnopqrstuvwxyz" in
114114+ let buf = Buffer.create 4 in
115115+ let rec loop remains =
116116+ match remains with
117117+ | 0 -> Buffer.contents buf
118118+ | _ ->
119119+ let char = remains mod String.length chars in
120120+ Buffer.add_char buf chars.[char];
121121+ loop (remains / String.length chars)
122122+ in
123123+ fun n ->
124124+ Buffer.clear buf;
125125+ loop n
126126+ in
127127+ let n_extensions = 100 in
128128+ let n_base = 20 in
129129+ let base = String.make n_base 'x' ^ "." in
130130+ let extensions = List.init n_extensions ~f:make_ext in
131131+ let re () =
132132+ (* This regular expression can be heavily optimized by computing the shared prefix *)
133133+ List.init 100 ~f:(fun i ->
134134+ let ext = make_ext i in
135135+ let open Re in
136136+ seq [ rep1 any; char '.'; str ext ])
137137+ |> Re.alt
138138+ |> Re.compile
139139+ in
140140+ let extensions = Array.of_list extensions in
141141+ test ~name:"shared prefixes" re (fun re ->
142142+ let re = re () in
143143+ for i = 0 to Array.length extensions - 1 do
144144+ let extension = extensions.(i) in
145145+ let str = base ^ extension in
146146+ ignore (Re.execp re str)
147147+ done)
148148+;;
149149+150150+let benchmarks =
151151+ let benches =
152152+ List.map benchmarks ~f:(fun (name, re, cases) ->
153153+ Bench.Test.create_group
154154+ ~name
155155+ [ exec_bench Re.exec "exec" re cases
156156+ ; exec_bench Re.execp "execp" re cases
157157+ ; exec_bench Re.exec_opt "exec_opt" re cases
158158+ ])
159159+ in
160160+ let http_benches =
161161+ let open Http.Export in
162162+ let manual =
163163+ [ request, "no group"; request_g, "group" ]
164164+ |> List.concat_map ~f:(fun (re, name) ->
165165+ let re () = Re.compile re in
166166+ test ~name re (fun re ->
167167+ let re = re () in
168168+ Http.read_all 0 re Http.requests))
169169+ |> Bench.Test.create_group ~name:"manual"
170170+ in
171171+ let many =
172172+ [ test
173173+ ~name:"execp no group"
174174+ (fun () -> Re.compile requests)
175175+ (fun re -> ignore (Re.execp (re ()) Http.requests))
176176+ ; test
177177+ ~name:"all_gen"
178178+ (fun () -> Re.compile requests_g)
179179+ (fun re -> Http.requests |> Re.all (re ()))
180180+ ]
181181+ |> List.concat
182182+ |> Bench.Test.create_group ~name:"auto"
183183+ in
184184+ Bench.Test.create_group ~name:"http" [ manual; many ]
185185+ in
186186+ benches
187187+ @ [ [ exec_bench_many Re.execp "execp"; exec_bench_many Re.exec_opt "exec_opt" ]
188188+ |> List.concat_map ~f:(fun f -> f Tex.ignore_re Tex.ignore_filesnames)
189189+ |> Bench.Test.create_group ~name:"tex gitignore"
190190+ ]
191191+ @ [ http_benches ]
192192+ @ string_traversal
193193+ @ compile_clean_star
194194+ @ Memory.benchmarks
195195+ @ repeated_sequence
196196+ @ split
197197+ @ prefixes
198198+;;
199199+200200+let () =
201201+ let benchmarks =
202202+ match Sys.getenv "RE_BENCH_FILTER" with
203203+ | None -> benchmarks
204204+ | Some only ->
205205+ let only = String.split ~on:',' only in
206206+ let filtered =
207207+ List.filter benchmarks ~f:(fun bench ->
208208+ let name = Bench.Test.name bench in
209209+ List.mem only name ~equal:String.equal)
210210+ in
211211+ (match filtered with
212212+ | _ :: _ -> filtered
213213+ | [] ->
214214+ print_endline "No benchmarks to run. Your options are:";
215215+ List.iter benchmarks ~f:(fun bench ->
216216+ let name = Bench.Test.name bench in
217217+ Printf.printf "- %s\n" name);
218218+ exit 1)
219219+ in
220220+ Memtrace.trace_if_requested ();
221221+ Command_unix.run (Bench.make_command benchmarks)
222222+;;
+182
vendor/opam/re/benchmarks/compare.ml
···11+open Core
22+33+module Both = struct
44+ type 'a t =
55+ { lhs : 'a
66+ ; rhs : 'a
77+ }
88+end
99+1010+module Value = struct
1111+ type t =
1212+ | Int of int
1313+ | Float of float
1414+1515+ let of_string s =
1616+ try Int (Int.of_string s) with
1717+ | _ -> Float (Float.of_string s)
1818+ ;;
1919+2020+ let rec percent_delta x y =
2121+ match x, y with
2222+ | Int x, Int y ->
2323+ let delta = y - x in
2424+ let open Float in
2525+ Float (100. * Float.of_int delta / Float.of_int x)
2626+ | Float x, Float y -> Float Float.(100. * (y - x) / x)
2727+ | Float x, Int y -> percent_delta (Float x) (Float (Float.of_int y))
2828+ | Int x, Float y -> percent_delta (Float (Float.of_int x)) (Float y)
2929+ ;;
3030+3131+ let to_csv t =
3232+ match t with
3333+ | Float f -> Float.to_string_hum f
3434+ | Int x -> Int.to_string_hum x
3535+ ;;
3636+3737+ let compare x y =
3838+ match x, y with
3939+ | Float x, Float y -> Float.compare x y
4040+ | Int x, Int y -> Int.compare x y
4141+ | _, _ -> assert false
4242+ ;;
4343+end
4444+4545+type 'a bench =
4646+ { name : string
4747+ ; time_per_run_nanos : 'a
4848+ ; major_words_per_run : 'a
4949+ ; promoted_words_per_run : 'a
5050+ ; minor_words_per_run : 'a
5151+ }
5252+5353+let of_sexp (sexp : Sexp.t) =
5454+ match sexp with
5555+ | Atom _ -> failwith "expected list"
5656+ | List fields ->
5757+ let kv (sexp : Sexp.t) =
5858+ match sexp with
5959+ | List [ Atom k; Atom v ] -> Some (k, v)
6060+ | _ -> None
6161+ in
6262+ let fields = List.filter_map fields ~f:kv in
6363+ let field name =
6464+ List.find_map_exn fields ~f:(fun (k, v) ->
6565+ if String.equal k name then Some v else None)
6666+ in
6767+ let name = field "full_benchmark_name" in
6868+ let time_per_run_nanos = Value.of_string (field "time_per_run_nanos") in
6969+ let major_words_per_run = Value.of_string (field "major_words_per_run") in
7070+ let promoted_words_per_run = Value.of_string (field "promoted_words_per_run") in
7171+ let minor_words_per_run = Value.of_string (field "minor_words_per_run") in
7272+ { name
7373+ ; time_per_run_nanos
7474+ ; major_words_per_run
7575+ ; promoted_words_per_run
7676+ ; minor_words_per_run
7777+ }
7878+;;
7979+8080+let parse_all s =
8181+ match Sexp.of_string s with
8282+ | Atom _ -> failwith "list expected"
8383+ | List benches ->
8484+ List.map benches ~f:of_sexp
8585+ |> String.Map.of_list_with_key_exn ~get_key:(fun v -> v.name)
8686+;;
8787+8888+let merge_one
8989+ { name
9090+ ; time_per_run_nanos
9191+ ; major_words_per_run
9292+ ; promoted_words_per_run
9393+ ; minor_words_per_run
9494+ }
9595+ b
9696+ =
9797+ assert (String.equal name b.name);
9898+ { b with
9999+ time_per_run_nanos = { Both.lhs = time_per_run_nanos; rhs = b.time_per_run_nanos }
100100+ ; major_words_per_run = { Both.lhs = major_words_per_run; rhs = b.major_words_per_run }
101101+ ; promoted_words_per_run =
102102+ { Both.lhs = promoted_words_per_run; rhs = b.promoted_words_per_run }
103103+ ; minor_words_per_run = { Both.lhs = minor_words_per_run; rhs = b.minor_words_per_run }
104104+ }
105105+;;
106106+107107+let merge lhs rhs =
108108+ Map.merge lhs rhs ~f:(fun ~key:_ v ->
109109+ match v with
110110+ | `Left _ -> None
111111+ | `Right _ -> None
112112+ | `Both (lhs, rhs) -> Some (merge_one lhs rhs))
113113+;;
114114+115115+let run ~prev ~next =
116116+ let report =
117117+ let prev = Stdio.In_channel.read_all prev |> parse_all in
118118+ let next = Stdio.In_channel.read_all next |> parse_all in
119119+ merge prev next
120120+ in
121121+ let records =
122122+ let headers =
123123+ [ "name"
124124+ ; "time_per_run_nanos"
125125+ ; "delta (%)"
126126+ ; "major_words_per_run"
127127+ ; "delta (%)"
128128+ ; "promoted_words_per_run"
129129+ ; "delta (%)"
130130+ ; "minor_words_per_run"
131131+ ; "delta (%)"
132132+ ]
133133+ in
134134+ let values =
135135+ Map.to_alist report
136136+ |> List.map ~f:snd
137137+ |> List.map
138138+ ~f:
139139+ (fun
140140+ ({ name
141141+ ; time_per_run_nanos
142142+ ; major_words_per_run
143143+ ; promoted_words_per_run
144144+ ; minor_words_per_run
145145+ } :
146146+ Value.t Both.t bench)
147147+ ->
148148+ let time_delta =
149149+ Value.percent_delta time_per_run_nanos.lhs time_per_run_nanos.rhs
150150+ in
151151+ let make_delta { Both.lhs; rhs } =
152152+ let delta = Value.percent_delta lhs rhs in
153153+ [ Value.to_csv lhs; Value.to_csv delta ]
154154+ in
155155+ ( time_delta
156156+ , name
157157+ :: List.concat
158158+ [ make_delta time_per_run_nanos
159159+ ; make_delta major_words_per_run
160160+ ; make_delta promoted_words_per_run
161161+ ; make_delta minor_words_per_run
162162+ ] ))
163163+ |> List.sort ~compare:(fun (x, _) (y, _) -> Value.compare x y)
164164+ |> List.map ~f:snd
165165+ in
166166+ headers :: values
167167+ in
168168+ let chan = Csv.to_channel Stdio.stdout in
169169+ Csv.output_all chan records
170170+;;
171171+172172+let command =
173173+ let open Command.Param in
174174+ let open Command.Param.Applicative_infix in
175175+ Command.basic
176176+ ~summary:"compare two runs"
177177+ (let prev = flag "prev" (required string) ~doc:"sexp file" in
178178+ let next = flag "next" (required string) ~doc:"sexp file" in
179179+ Command.Param.return (fun prev next () -> run ~prev ~next) <*> prev <*> next)
180180+;;
181181+182182+let () = Command_unix.run command
···11+GET / HTTP/1.1
22+Host: www.reddit.com
33+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
44+Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
55+Accept-Language: en-us,en;q=0.5
66+Accept-Encoding: gzip, deflate
77+Connection: keep-alive
88+99+GET /reddit.v_EZwRzV-Ns.css HTTP/1.1
1010+Host: www.redditstatic.com
1111+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
1212+Accept: text/css,*/*;q=0.1
1313+Accept-Language: en-us,en;q=0.5
1414+Accept-Encoding: gzip, deflate
1515+Connection: keep-alive
1616+Referer: http://www.reddit.com/
1717+1818+GET /reddit-init.en-us.O1zuMqOOQvY.js HTTP/1.1
1919+Host: www.redditstatic.com
2020+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
2121+Accept: */*
2222+Accept-Language: en-us,en;q=0.5
2323+Accept-Encoding: gzip, deflate
2424+Connection: keep-alive
2525+Referer: http://www.reddit.com/
2626+2727+GET /reddit.en-us.31yAfSoTsfo.js HTTP/1.1
2828+Host: www.redditstatic.com
2929+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
3030+Accept: */*
3131+Accept-Language: en-us,en;q=0.5
3232+Accept-Encoding: gzip, deflate
3333+Connection: keep-alive
3434+Referer: http://www.reddit.com/
3535+3636+GET /kill.png HTTP/1.1
3737+Host: www.redditstatic.com
3838+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
3939+Accept: image/png,image/*;q=0.8,*/*;q=0.5
4040+Accept-Language: en-us,en;q=0.5
4141+Accept-Encoding: gzip, deflate
4242+Connection: keep-alive
4343+Referer: http://www.reddit.com/
4444+4545+GET /icon.png HTTP/1.1
4646+Host: www.redditstatic.com
4747+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
4848+Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
4949+Accept-Language: en-us,en;q=0.5
5050+Accept-Encoding: gzip, deflate
5151+Connection: keep-alive
5252+5353+GET /favicon.ico HTTP/1.1
5454+Host: www.redditstatic.com
5555+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
5656+Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
5757+Accept-Language: en-us,en;q=0.5
5858+Accept-Encoding: gzip, deflate
5959+Connection: keep-alive
6060+6161+GET /AMZM4CWd6zstSC8y.jpg HTTP/1.1
6262+Host: b.thumbs.redditmedia.com
6363+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
6464+Accept: image/png,image/*;q=0.8,*/*;q=0.5
6565+Accept-Language: en-us,en;q=0.5
6666+Accept-Encoding: gzip, deflate
6767+Connection: keep-alive
6868+Referer: http://www.reddit.com/
6969+7070+GET /jz1d5Nm0w97-YyNm.jpg HTTP/1.1
7171+Host: b.thumbs.redditmedia.com
7272+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
7373+Accept: image/png,image/*;q=0.8,*/*;q=0.5
7474+Accept-Language: en-us,en;q=0.5
7575+Accept-Encoding: gzip, deflate
7676+Connection: keep-alive
7777+Referer: http://www.reddit.com/
7878+7979+GET /aWGO99I6yOcNUKXB.jpg HTTP/1.1
8080+Host: a.thumbs.redditmedia.com
8181+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
8282+Accept: image/png,image/*;q=0.8,*/*;q=0.5
8383+Accept-Language: en-us,en;q=0.5
8484+Accept-Encoding: gzip, deflate
8585+Connection: keep-alive
8686+Referer: http://www.reddit.com/
8787+8888+GET /rZ_rD5TjrJM0E9Aj.css HTTP/1.1
8989+Host: e.thumbs.redditmedia.com
9090+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
9191+Accept: text/css,*/*;q=0.1
9292+Accept-Language: en-us,en;q=0.5
9393+Accept-Encoding: gzip, deflate
9494+Connection: keep-alive
9595+Referer: http://www.reddit.com/
9696+9797+GET /tmsPwagFzyTvrGRx.jpg HTTP/1.1
9898+Host: a.thumbs.redditmedia.com
9999+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
100100+Accept: image/png,image/*;q=0.8,*/*;q=0.5
101101+Accept-Language: en-us,en;q=0.5
102102+Accept-Encoding: gzip, deflate
103103+Connection: keep-alive
104104+Referer: http://www.reddit.com/
105105+106106+GET /KYgUaLvXCK3TCEJx.jpg HTTP/1.1
107107+Host: a.thumbs.redditmedia.com
108108+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
109109+Accept: image/png,image/*;q=0.8,*/*;q=0.5
110110+Accept-Language: en-us,en;q=0.5
111111+Accept-Encoding: gzip, deflate
112112+Connection: keep-alive
113113+Referer: http://www.reddit.com/
114114+115115+GET /81pzxT5x2ozuEaxX.jpg HTTP/1.1
116116+Host: e.thumbs.redditmedia.com
117117+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
118118+Accept: image/png,image/*;q=0.8,*/*;q=0.5
119119+Accept-Language: en-us,en;q=0.5
120120+Accept-Encoding: gzip, deflate
121121+Connection: keep-alive
122122+Referer: http://www.reddit.com/
123123+124124+GET /MFqCUiUVPO5V8t6x.jpg HTTP/1.1
125125+Host: a.thumbs.redditmedia.com
126126+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
127127+Accept: image/png,image/*;q=0.8,*/*;q=0.5
128128+Accept-Language: en-us,en;q=0.5
129129+Accept-Encoding: gzip, deflate
130130+Connection: keep-alive
131131+Referer: http://www.reddit.com/
132132+133133+GET /TFpYTiAO5aEowokv.jpg HTTP/1.1
134134+Host: e.thumbs.redditmedia.com
135135+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
136136+Accept: image/png,image/*;q=0.8,*/*;q=0.5
137137+Accept-Language: en-us,en;q=0.5
138138+Accept-Encoding: gzip, deflate
139139+Connection: keep-alive
140140+Referer: http://www.reddit.com/
141141+142142+GET /eMWMpmm9APNeNqcF.jpg HTTP/1.1
143143+Host: e.thumbs.redditmedia.com
144144+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
145145+Accept: image/png,image/*;q=0.8,*/*;q=0.5
146146+Accept-Language: en-us,en;q=0.5
147147+Accept-Encoding: gzip, deflate
148148+Connection: keep-alive
149149+Referer: http://www.reddit.com/
150150+151151+GET /S-IpsJrOKuaK9GZ8.jpg HTTP/1.1
152152+Host: c.thumbs.redditmedia.com
153153+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
154154+Accept: image/png,image/*;q=0.8,*/*;q=0.5
155155+Accept-Language: en-us,en;q=0.5
156156+Accept-Encoding: gzip, deflate
157157+Connection: keep-alive
158158+Referer: http://www.reddit.com/
159159+160160+GET /3V6dj9PDsNnheDXn.jpg HTTP/1.1
161161+Host: c.thumbs.redditmedia.com
162162+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
163163+Accept: image/png,image/*;q=0.8,*/*;q=0.5
164164+Accept-Language: en-us,en;q=0.5
165165+Accept-Encoding: gzip, deflate
166166+Connection: keep-alive
167167+Referer: http://www.reddit.com/
168168+169169+GET /wQ3-VmNXhv8sg4SJ.jpg HTTP/1.1
170170+Host: c.thumbs.redditmedia.com
171171+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
172172+Accept: image/png,image/*;q=0.8,*/*;q=0.5
173173+Accept-Language: en-us,en;q=0.5
174174+Accept-Encoding: gzip, deflate
175175+Connection: keep-alive
176176+Referer: http://www.reddit.com/
177177+178178+GET /ixd1C1njpczEWC22.jpg HTTP/1.1
179179+Host: c.thumbs.redditmedia.com
180180+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
181181+Accept: image/png,image/*;q=0.8,*/*;q=0.5
182182+Accept-Language: en-us,en;q=0.5
183183+Accept-Encoding: gzip, deflate
184184+Connection: keep-alive
185185+Referer: http://www.reddit.com/
186186+187187+GET /nGsQj15VyOHMwmq8.jpg HTTP/1.1
188188+Host: c.thumbs.redditmedia.com
189189+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
190190+Accept: image/png,image/*;q=0.8,*/*;q=0.5
191191+Accept-Language: en-us,en;q=0.5
192192+Accept-Encoding: gzip, deflate
193193+Connection: keep-alive
194194+Referer: http://www.reddit.com/
195195+196196+GET /zT4yQmDxQLbIxK1b.jpg HTTP/1.1
197197+Host: c.thumbs.redditmedia.com
198198+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
199199+Accept: image/png,image/*;q=0.8,*/*;q=0.5
200200+Accept-Language: en-us,en;q=0.5
201201+Accept-Encoding: gzip, deflate
202202+Connection: keep-alive
203203+Referer: http://www.reddit.com/
204204+205205+GET /L5e1HcZLv1iu4nrG.jpg HTTP/1.1
206206+Host: f.thumbs.redditmedia.com
207207+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
208208+Accept: image/png,image/*;q=0.8,*/*;q=0.5
209209+Accept-Language: en-us,en;q=0.5
210210+Accept-Encoding: gzip, deflate
211211+Connection: keep-alive
212212+Referer: http://www.reddit.com/
213213+214214+GET /WJFFPxD8X4JO_lIG.jpg HTTP/1.1
215215+Host: f.thumbs.redditmedia.com
216216+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
217217+Accept: image/png,image/*;q=0.8,*/*;q=0.5
218218+Accept-Language: en-us,en;q=0.5
219219+Accept-Encoding: gzip, deflate
220220+Connection: keep-alive
221221+Referer: http://www.reddit.com/
222222+223223+GET /hVMVTDdjuY3bQox5.jpg HTTP/1.1
224224+Host: f.thumbs.redditmedia.com
225225+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
226226+Accept: image/png,image/*;q=0.8,*/*;q=0.5
227227+Accept-Language: en-us,en;q=0.5
228228+Accept-Encoding: gzip, deflate
229229+Connection: keep-alive
230230+Referer: http://www.reddit.com/
231231+232232+GET /rnWf8CjBcyPQs5y_.jpg HTTP/1.1
233233+Host: f.thumbs.redditmedia.com
234234+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
235235+Accept: image/png,image/*;q=0.8,*/*;q=0.5
236236+Accept-Language: en-us,en;q=0.5
237237+Accept-Encoding: gzip, deflate
238238+Connection: keep-alive
239239+Referer: http://www.reddit.com/
240240+241241+GET /gZJL1jNylKbGV4d-.jpg HTTP/1.1
242242+Host: d.thumbs.redditmedia.com
243243+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
244244+Accept: image/png,image/*;q=0.8,*/*;q=0.5
245245+Accept-Language: en-us,en;q=0.5
246246+Accept-Encoding: gzip, deflate
247247+Connection: keep-alive
248248+Referer: http://www.reddit.com/
249249+250250+GET /aNd2zNRLXiMnKUFh.jpg HTTP/1.1
251251+Host: c.thumbs.redditmedia.com
252252+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
253253+Accept: image/png,image/*;q=0.8,*/*;q=0.5
254254+Accept-Language: en-us,en;q=0.5
255255+Accept-Encoding: gzip, deflate
256256+Connection: keep-alive
257257+Referer: http://www.reddit.com/
258258+259259+GET /droparrowgray.gif HTTP/1.1
260260+Host: www.redditstatic.com
261261+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
262262+Accept: image/png,image/*;q=0.8,*/*;q=0.5
263263+Accept-Language: en-us,en;q=0.5
264264+Accept-Encoding: gzip, deflate
265265+Connection: keep-alive
266266+Referer: http://www.redditstatic.com/reddit.v_EZwRzV-Ns.css
267267+268268+GET /sprite-reddit.an0Lnf61Ap4.png HTTP/1.1
269269+Host: www.redditstatic.com
270270+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
271271+Accept: image/png,image/*;q=0.8,*/*;q=0.5
272272+Accept-Language: en-us,en;q=0.5
273273+Accept-Encoding: gzip, deflate
274274+Connection: keep-alive
275275+Referer: http://www.redditstatic.com/reddit.v_EZwRzV-Ns.css
276276+277277+GET /ga.js HTTP/1.1
278278+Host: www.google-analytics.com
279279+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
280280+Accept: */*
281281+Accept-Language: en-us,en;q=0.5
282282+Accept-Encoding: gzip, deflate
283283+Connection: keep-alive
284284+Referer: http://www.reddit.com/
285285+If-Modified-Since: Tue, 29 Oct 2013 19:33:51 GMT
286286+287287+GET /reddit/ads.html?sr=-reddit.com&bust2 HTTP/1.1
288288+Host: static.adzerk.net
289289+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
290290+Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
291291+Accept-Language: en-us,en;q=0.5
292292+Accept-Encoding: gzip, deflate
293293+Connection: keep-alive
294294+Referer: http://www.reddit.com/
295295+296296+GET /pixel/of_destiny.png?v=hOlmDALJCWWdjzfBV4ZxJPmrdCLWB%2Ftq7Z%2Ffp4Q%2FxXbVPPREuMJMVGzKraTuhhNWxCCwi6yFEZg%3D&r=783333388 HTTP/1.1
297297+Host: pixel.redditmedia.com
298298+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
299299+Accept: image/png,image/*;q=0.8,*/*;q=0.5
300300+Accept-Language: en-us,en;q=0.5
301301+Accept-Encoding: gzip, deflate
302302+Connection: keep-alive
303303+Referer: http://www.reddit.com/
304304+305305+GET /UNcO-h_QcS9PD-Gn.jpg HTTP/1.1
306306+Host: c.thumbs.redditmedia.com
307307+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
308308+Accept: image/png,image/*;q=0.8,*/*;q=0.5
309309+Accept-Language: en-us,en;q=0.5
310310+Accept-Encoding: gzip, deflate
311311+Connection: keep-alive
312312+Referer: http://e.thumbs.redditmedia.com/rZ_rD5TjrJM0E9Aj.css
313313+314314+GET /welcome-lines.png HTTP/1.1
315315+Host: www.redditstatic.com
316316+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
317317+Accept: image/png,image/*;q=0.8,*/*;q=0.5
318318+Accept-Language: en-us,en;q=0.5
319319+Accept-Encoding: gzip, deflate
320320+Connection: keep-alive
321321+Referer: http://www.redditstatic.com/reddit.v_EZwRzV-Ns.css
322322+323323+GET /welcome-upvote.png HTTP/1.1
324324+Host: www.redditstatic.com
325325+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
326326+Accept: image/png,image/*;q=0.8,*/*;q=0.5
327327+Accept-Language: en-us,en;q=0.5
328328+Accept-Encoding: gzip, deflate
329329+Connection: keep-alive
330330+Referer: http://www.redditstatic.com/reddit.v_EZwRzV-Ns.css
331331+332332+GET /__utm.gif?utmwv=5.5.1&utms=1&utmn=720496082&utmhn=www.reddit.com&utme=8(site*srpath*usertype*uitype)9(%20reddit.com*%20reddit.com-GET_listing*guest*web)11(3!2)&utmcs=UTF-8&utmsr=2560x1600&utmvp=1288x792&utmsc=24-bit&utmul=en-us&utmje=1&utmfl=13.0%20r0&utmdt=reddit%3A%20the%20front%20page%20of%20the%20internet&utmhid=2129416330&utmr=-&utmp=%2F&utmht=1400862512705&utmac=UA-12131688-1&utmcc=__utma%3D55650728.585571751.1400862513.1400862513.1400862513.1%3B%2B__utmz%3D55650728.1400862513.1.1.utmcsr%3D(direct)%7Cutmccn%3D(direct)%7Cutmcmd%3D(none)%3B&utmu=qR~ HTTP/1.1
333333+Host: www.google-analytics.com
334334+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
335335+Accept: image/png,image/*;q=0.8,*/*;q=0.5
336336+Accept-Language: en-us,en;q=0.5
337337+Accept-Encoding: gzip, deflate
338338+Connection: keep-alive
339339+Referer: http://www.reddit.com/
340340+341341+GET /ImnpOQhbXUPkwceN.png HTTP/1.1
342342+Host: a.thumbs.redditmedia.com
343343+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
344344+Accept: image/png,image/*;q=0.8,*/*;q=0.5
345345+Accept-Language: en-us,en;q=0.5
346346+Accept-Encoding: gzip, deflate
347347+Connection: keep-alive
348348+Referer: http://www.reddit.com/
349349+350350+GET /ajax/libs/jquery/1.7.1/jquery.min.js HTTP/1.1
351351+Host: ajax.googleapis.com
352352+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
353353+Accept: */*
354354+Accept-Language: en-us,en;q=0.5
355355+Accept-Encoding: gzip, deflate
356356+Connection: keep-alive
357357+Referer: http://static.adzerk.net/reddit/ads.html?sr=-reddit.com&bust2
358358+359359+GET /__utm.gif?utmwv=5.5.1&utms=2&utmn=1493472678&utmhn=www.reddit.com&utmt=event&utme=5(AdBlock*enabled*false)(0)8(site*srpath*usertype*uitype)9(%20reddit.com*%20reddit.com-GET_listing*guest*web)11(3!2)&utmcs=UTF-8&utmsr=2560x1600&utmvp=1288x792&utmsc=24-bit&utmul=en-us&utmje=1&utmfl=13.0%20r0&utmdt=reddit%3A%20the%20front%20page%20of%20the%20internet&utmhid=2129416330&utmr=-&utmp=%2F&utmht=1400862512708&utmac=UA-12131688-1&utmni=1&utmcc=__utma%3D55650728.585571751.1400862513.1400862513.1400862513.1%3B%2B__utmz%3D55650728.1400862513.1.1.utmcsr%3D(direct)%7Cutmccn%3D(direct)%7Cutmcmd%3D(none)%3B&utmu=6R~ HTTP/1.1
360360+Host: www.google-analytics.com
361361+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
362362+Accept: image/png,image/*;q=0.8,*/*;q=0.5
363363+Accept-Language: en-us,en;q=0.5
364364+Accept-Encoding: gzip, deflate
365365+Connection: keep-alive
366366+Referer: http://www.reddit.com/
367367+368368+GET /ados.js?q=43 HTTP/1.1
369369+Host: secure.adzerk.net
370370+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
371371+Accept: */*
372372+Accept-Language: en-us,en;q=0.5
373373+Accept-Encoding: gzip, deflate
374374+Connection: keep-alive
375375+Referer: http://static.adzerk.net/reddit/ads.html?sr=-reddit.com&bust2
376376+377377+GET /fetch-trackers?callback=jQuery111005268222517967478_1400862512407&ids%5B%5D=t3_25jzeq-t8_k2ii&_=1400862512408 HTTP/1.1
378378+Host: tracker.redditmedia.com
379379+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
380380+Accept: */*
381381+Accept-Language: en-us,en;q=0.5
382382+Accept-Encoding: gzip, deflate
383383+Connection: keep-alive
384384+Referer: http://www.reddit.com/
385385+386386+GET /ados?t=1400862512892&request={%22Placements%22:[{%22A%22:5146,%22S%22:24950,%22D%22:%22main%22,%22AT%22:5},{%22A%22:5146,%22S%22:24950,%22D%22:%22sponsorship%22,%22AT%22:8}],%22Keywords%22:%22-reddit.com%22,%22Referrer%22:%22http%3A%2F%2Fwww.reddit.com%2F%22,%22IsAsync%22:true,%22WriteResults%22:true} HTTP/1.1
387387+Host: engine.adzerk.net
388388+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
389389+Accept: */*
390390+Accept-Language: en-us,en;q=0.5
391391+Accept-Encoding: gzip, deflate
392392+Connection: keep-alive
393393+Referer: http://static.adzerk.net/reddit/ads.html?sr=-reddit.com&bust2
394394+395395+GET /pixel/of_doom.png?id=t3_25jzeq-t8_k2ii&hash=da31d967485cdbd459ce1e9a5dde279fef7fc381&r=1738649500 HTTP/1.1
396396+Host: pixel.redditmedia.com
397397+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
398398+Accept: image/png,image/*;q=0.8,*/*;q=0.5
399399+Accept-Language: en-us,en;q=0.5
400400+Accept-Encoding: gzip, deflate
401401+Connection: keep-alive
402402+Referer: http://www.reddit.com/
403403+404404+GET /Extensions/adFeedback.js HTTP/1.1
405405+Host: static.adzrk.net
406406+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
407407+Accept: */*
408408+Accept-Language: en-us,en;q=0.5
409409+Accept-Encoding: gzip, deflate
410410+Connection: keep-alive
411411+Referer: http://static.adzerk.net/reddit/ads.html?sr=-reddit.com&bust2
412412+413413+GET /Extensions/adFeedback.css HTTP/1.1
414414+Host: static.adzrk.net
415415+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
416416+Accept: text/css,*/*;q=0.1
417417+Accept-Language: en-us,en;q=0.5
418418+Accept-Encoding: gzip, deflate
419419+Connection: keep-alive
420420+Referer: http://static.adzerk.net/reddit/ads.html?sr=-reddit.com&bust2
421421+422422+GET /reddit/ads-load.html?bust2 HTTP/1.1
423423+Host: static.adzerk.net
424424+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
425425+Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
426426+Accept-Language: en-us,en;q=0.5
427427+Accept-Encoding: gzip, deflate
428428+Connection: keep-alive
429429+Referer: http://www.reddit.com/
430430+431431+GET /Advertisers/a774d7d6148046efa89403a8db635a81.jpg HTTP/1.1
432432+Host: static.adzerk.net
433433+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
434434+Accept: image/png,image/*;q=0.8,*/*;q=0.5
435435+Accept-Language: en-us,en;q=0.5
436436+Accept-Encoding: gzip, deflate
437437+Connection: keep-alive
438438+Referer: http://static.adzerk.net/reddit/ads.html?sr=-reddit.com&bust2
439439+440440+GET /i.gif?e=eyJhdiI6NjIzNTcsImF0Ijo1LCJjbSI6MTE2MzUxLCJjaCI6Nzk4NCwiY3IiOjMzNzAxNSwiZGkiOiI4NmI2Y2UzYWM5NDM0MjhkOTk2ZTg4MjYwZDE5ZTE1YyIsImRtIjoxLCJmYyI6NDE2MTI4LCJmbCI6MjEwNDY0LCJrdyI6Ii1yZWRkaXQuY29tIiwibWsiOiItcmVkZGl0LmNvbSIsIm53Ijo1MTQ2LCJwYyI6MCwicHIiOjIwMzYyLCJydCI6MSwicmYiOiJodHRwOi8vd3d3LnJlZGRpdC5jb20vIiwic3QiOjI0OTUwLCJ1ayI6InVlMS01ZWIwOGFlZWQ5YTc0MDFjOTE5NWNiOTMzZWI3Yzk2NiIsInRzIjoxNDAwODYyNTkzNjQ1fQ&s=lwlbFf2Uywt7zVBFRj_qXXu7msY HTTP/1.1
441441+Host: engine.adzerk.net
442442+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
443443+Accept: image/png,image/*;q=0.8,*/*;q=0.5
444444+Accept-Language: en-us,en;q=0.5
445445+Accept-Encoding: gzip, deflate
446446+Connection: keep-alive
447447+Referer: http://static.adzerk.net/reddit/ads.html?sr=-reddit.com&bust2
448448+Cookie: azk=ue1-5eb08aeed9a7401c9195cb933eb7c966
449449+450450+GET /BurstingPipe/adServer.bs?cn=tf&c=19&mc=imp&pli=9994987&PluID=0&ord=1400862593644&rtu=-1 HTTP/1.1
451451+Host: bs.serving-sys.com
452452+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
453453+Accept: image/png,image/*;q=0.8,*/*;q=0.5
454454+Accept-Language: en-us,en;q=0.5
455455+Accept-Encoding: gzip, deflate
456456+Connection: keep-alive
457457+Referer: http://static.adzerk.net/reddit/ads.html?sr=-reddit.com&bust2
458458+459459+GET /Advertisers/63cfd0044ffd49c0a71a6626f7a1d8f0.jpg HTTP/1.1
460460+Host: static.adzerk.net
461461+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
462462+Accept: image/png,image/*;q=0.8,*/*;q=0.5
463463+Accept-Language: en-us,en;q=0.5
464464+Accept-Encoding: gzip, deflate
465465+Connection: keep-alive
466466+Referer: http://static.adzerk.net/reddit/ads-load.html?bust2
467467+468468+GET /BurstingPipe/adServer.bs?cn=tf&c=19&mc=imp&pli=9962555&PluID=0&ord=1400862593645&rtu=-1 HTTP/1.1
469469+Host: bs.serving-sys.com
470470+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
471471+Accept: image/png,image/*;q=0.8,*/*;q=0.5
472472+Accept-Language: en-us,en;q=0.5
473473+Accept-Encoding: gzip, deflate
474474+Connection: keep-alive
475475+Referer: http://static.adzerk.net/reddit/ads-load.html?bust2
476476+Cookie: S_9994987=6754579095859875029; A4=01fmFvgRnI09SF00000; u2=d1263d39-874b-4a89-86cd-a2ab0860ed4e3Zl040
477477+478478+GET /i.gif?e=eyJhdiI6NjIzNTcsImF0Ijo4LCJjbSI6MTE2MzUxLCJjaCI6Nzk4NCwiY3IiOjMzNzAxOCwiZGkiOiI3OTdlZjU3OWQ5NjE0ODdiODYyMGMyMGJkOTE4YzNiMSIsImRtIjoxLCJmYyI6NDE2MTMxLCJmbCI6MjEwNDY0LCJrdyI6Ii1yZWRkaXQuY29tIiwibWsiOiItcmVkZGl0LmNvbSIsIm53Ijo1MTQ2LCJwYyI6MCwicHIiOjIwMzYyLCJydCI6MSwicmYiOiJodHRwOi8vd3d3LnJlZGRpdC5jb20vIiwic3QiOjI0OTUwLCJ1ayI6InVlMS01ZWIwOGFlZWQ5YTc0MDFjOTE5NWNiOTMzZWI3Yzk2NiIsInRzIjoxNDAwODYyNTkzNjQ2fQ&s=OjzxzXAgQksbdQOHNm-bjZcnZPA HTTP/1.1
479479+Host: engine.adzerk.net
480480+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1
481481+Accept: image/png,image/*;q=0.8,*/*;q=0.5
482482+Accept-Language: en-us,en;q=0.5
483483+Accept-Encoding: gzip, deflate
484484+Connection: keep-alive
485485+Referer: http://static.adzerk.net/reddit/ads-load.html?bust2
486486+Cookie: azk=ue1-5eb08aeed9a7401c9195cb933eb7c966
487487+488488+GET /subscribe?host_int=1042356184&ns_map=571794054_374233948806,464381511_13349283399&user_id=245722467&nid=1399334269710011966&ts=1400862514 HTTP/1.1
489489+Host: notify8.dropbox.com
490490+Accept-Encoding: identity
491491+Connection: keep-alive
492492+X-Dropbox-Locale: en_US
493493+User-Agent: DropboxDesktopClient/2.7.54 (Macintosh; 10.8; ('i32',); en_US)
494494+
+40
vendor/opam/re/benchmarks/http.ml
···11+open Re
22+33+let space = rep blank
44+let crlf = str "\r\n"
55+let token = rep1 @@ compl [ rg '\000' '\031'; set "\127)(<>@,;:\\/[]?={}" ]
66+let meth = token
77+88+let version =
99+ let digits = rep1 digit in
1010+ let decimal = seq [ digits; opt (seq [ char '.'; digits ]) ] in
1111+ seq [ str "HTTP/"; decimal ]
1212+;;
1313+1414+let uri = rep1 (compl [ char '\n' ])
1515+let request_line = [ space; group meth; space; group uri; group version; space ] |> seq
1616+1717+let header =
1818+ let key = group (rep1 (Re.compl [ char ':' ])) in
1919+ let value = group (rep1 (Re.compl [ char '\n' ])) in
2020+ seq [ space; key; space; char ':'; space; value; space; crlf ]
2121+;;
2222+2323+let request' = seq [ request_line; crlf; rep header; crlf ]
2424+2525+module Export = struct
2626+ let request = request'
2727+ let request_g = request' |> no_group
2828+ let requests = request' |> rep1
2929+ let requests_g = request' |> no_group |> rep1
3030+end
3131+3232+let requests = Stdio.In_channel.read_all "benchmarks/http-requests.txt"
3333+3434+let rec read_all pos re reqs =
3535+ if pos < String.length reqs
3636+ then (
3737+ let g = Re.exec ~pos re reqs in
3838+ let _, pos = Re.Group.offset g 0 in
3939+ read_all (pos + 1) re reqs)
4040+;;
+31
vendor/opam/re/benchmarks/memory.ml
···11+open Core
22+(* This set of benchmarks is designed for testing re's memory usage rather than
33+ speed. *)
44+55+module Bench = Core_bench.Bench
66+77+let size = 1_000
88+99+(* a pathological re that will consume a bunch of memory *)
1010+let re () =
1111+ let open Re in
1212+ compile @@ seq [ rep (set "01"); char '1'; repn (set "01") size (Some size) ]
1313+;;
1414+1515+(* Another pathological case that is a simplified version of the above *)
1616+let re2 () =
1717+ let open Re in
1818+ seq [ rep (set "01"); char '1'; repn (set "01") size (Some size); char 'x' ] |> compile
1919+;;
2020+2121+let str = "01" ^ String.make size '1'
2222+2323+let benchmarks =
2424+ [ "memory 1", re; "memory 2", re2 ]
2525+ |> ListLabels.map ~f:(fun (name, re) ->
2626+ Bench.Test.create_indexed ~name ~args:[ 10; 20; 40; 80; 100; size ] (fun len ->
2727+ Staged.stage (fun () ->
2828+ let re = re () in
2929+ let len = Int.min (String.length str) len in
3030+ ignore (Re.execp ~pos:0 ~len re str))))
3131+;;
+1
vendor/opam/re/benchmarks/memory.mli
···11+val benchmarks : Core_bench.Bench.Test.t list
···11+open Import
22+33+type ('a, _) ast =
44+ | Alternative : 'a list -> ('a, [> `Uncased ]) ast
55+ | No_case : 'a -> ('a, [> `Cased ]) ast
66+ | Case : 'a -> ('a, [> `Cased ]) ast
77+88+let dyn_of_ast f =
99+ let open Dyn in
1010+ function
1111+ | Alternative xs -> variant "Alternative" (List.map xs ~f)
1212+ | No_case a -> variant "No_case" [ f a ]
1313+ | Case a -> variant "Case" [ f a ]
1414+;;
1515+1616+let empty_alternative : ('a, 'b) ast = Alternative []
1717+1818+let equal_ast (type a) eq (x : (a, [ `Uncased ]) ast) (y : (a, [ `Uncased ]) ast) =
1919+ match x, y with
2020+ | Alternative a, Alternative b -> List.equal ~eq a b
2121+;;
2222+2323+let pp_ast (type a b) f fmt (ast : (a, b) ast) =
2424+ let open Fmt in
2525+ let var s re = sexp fmt s f re in
2626+ match ast with
2727+ | Alternative alt -> sexp fmt "Alternative" (list f) alt
2828+ | Case c -> var "Case" c
2929+ | No_case c -> var "No_case" c
3030+;;
3131+3232+type cset =
3333+ | Cset of Cset.t
3434+ | Intersection of cset list
3535+ | Complement of cset list
3636+ | Difference of cset * cset
3737+ | Cast of (cset, [ `Cased | `Uncased ]) ast
3838+3939+let rec dyn_of_cset =
4040+ let open Dyn in
4141+ function
4242+ | Cset cset -> variant "Cset" [ Cset.to_dyn cset ]
4343+ | Intersection xs -> variant "Intersection" (List.map xs ~f:dyn_of_cset)
4444+ | Complement xs -> variant "Complement" (List.map xs ~f:dyn_of_cset)
4545+ | Difference (x, y) -> variant "Difference" [ dyn_of_cset x; dyn_of_cset y ]
4646+ | Cast c -> variant "Cast" [ dyn_of_ast dyn_of_cset c ]
4747+;;
4848+4949+type ('a, 'case) gen =
5050+ | Set of 'a
5151+ | Ast of (('a, 'case) gen, 'case) ast
5252+ | Sequence of ('a, 'case) gen list
5353+ | Repeat of ('a, 'case) gen * int * int option
5454+ | Beg_of_line
5555+ | End_of_line
5656+ | Beg_of_word
5757+ | End_of_word
5858+ | Not_bound
5959+ | Beg_of_str
6060+ | End_of_str
6161+ | Last_end_of_line
6262+ | Start
6363+ | Stop
6464+ | Group of string option * ('a, 'case) gen
6565+ | No_group of ('a, 'case) gen
6666+ | Nest of ('a, 'case) gen
6767+ | Pmark of Pmark.t * ('a, 'case) gen
6868+ | Sem of Automata.Sem.t * ('a, 'case) gen
6969+ | Sem_greedy of Automata.Rep_kind.t * ('a, 'case) gen
7070+7171+let rec dyn_of_gen f =
7272+ let open Dyn in
7373+ function
7474+ | Set a -> variant "Set" [ f a ]
7575+ | Ast ast -> variant "Ast" [ dyn_of_ast (dyn_of_gen f) ast ]
7676+ | Sequence xs -> variant "Sequence" (List.map xs ~f:(dyn_of_gen f))
7777+ | Repeat (gen, min, max) ->
7878+ let base =
7979+ match max with
8080+ | None -> []
8181+ | Some x -> [ int x ]
8282+ in
8383+ variant "Repeat" (dyn_of_gen f gen :: int min :: base)
8484+ | Beg_of_line -> enum "Beg_of_line"
8585+ | End_of_line -> enum "End_of_line"
8686+ | Beg_of_word -> enum "Beg_of_word"
8787+ | End_of_word -> enum "End_of_word"
8888+ | Not_bound -> enum "Not_bound"
8989+ | Beg_of_str -> enum "Beg_of_str"
9090+ | End_of_str -> enum "End_of_str"
9191+ | Last_end_of_line -> enum "Last_end_of_line"
9292+ | Start -> enum "Start"
9393+ | Stop -> enum "Stop"
9494+ | Group (name, t) ->
9595+ let args =
9696+ let args = [ dyn_of_gen f t ] in
9797+ match name with
9898+ | None -> args
9999+ | Some name -> string name :: args
100100+ in
101101+ variant "Group" args
102102+ | No_group x -> variant "No_group" [ dyn_of_gen f x ]
103103+ | Nest x -> variant "Nest" [ dyn_of_gen f x ]
104104+ | Pmark (pmark, t) -> variant "Pmark" [ Pmark.to_dyn pmark; dyn_of_gen f t ]
105105+ | Sem (sem, t) -> variant "Sem" [ Automata.Sem.to_dyn sem; dyn_of_gen f t ]
106106+ | Sem_greedy (rep, t) ->
107107+ variant "Sem_greedy" [ Automata.Rep_kind.to_dyn rep; dyn_of_gen f t ]
108108+;;
109109+110110+let rec pp_gen pp_cset fmt t =
111111+ let open Format in
112112+ let open Fmt in
113113+ let pp = pp_gen pp_cset in
114114+ let var s re = sexp fmt s pp re in
115115+ let seq s rel = sexp fmt s (list pp) rel in
116116+ match t with
117117+ | Set cset -> pp_cset fmt cset
118118+ | Sequence sq -> seq "Sequence" sq
119119+ | Repeat (re, start, stop) ->
120120+ let pp' fmt () = fprintf fmt "%a@ %d%a" pp re start optint stop in
121121+ sexp fmt "Repeat" pp' ()
122122+ | Beg_of_line -> str fmt "Beg_of_line"
123123+ | End_of_line -> str fmt "End_of_line"
124124+ | Beg_of_word -> str fmt "Beg_of_word"
125125+ | End_of_word -> str fmt "End_of_word"
126126+ | Not_bound -> str fmt "Not_bound"
127127+ | Beg_of_str -> str fmt "Beg_of_str"
128128+ | End_of_str -> str fmt "End_of_str"
129129+ | Last_end_of_line -> str fmt "Last_end_of_line"
130130+ | Start -> str fmt "Start"
131131+ | Stop -> str fmt "Stop"
132132+ | Group (None, c) -> var "Group" c
133133+ | Group (Some n, c) -> sexp fmt "Named_group" (pair str pp) (n, c)
134134+ | Nest c -> var "Nest" c
135135+ | Pmark (m, r) -> sexp fmt "Pmark" (pair Pmark.pp pp) (m, r)
136136+ | Ast a -> pp_ast pp fmt a
137137+ | Sem (sem, a) -> sexp fmt "Sem" (pair Automata.Sem.pp pp) (sem, a)
138138+ | Sem_greedy (k, re) -> sexp fmt "Sem_greedy" (pair Automata.Rep_kind.pp pp) (k, re)
139139+ | No_group c -> var "No_group" c
140140+;;
141141+142142+let rec pp_cset fmt cset =
143143+ let open Fmt in
144144+ let seq s rel = sexp fmt s (list pp_cset) rel in
145145+ match cset with
146146+ | Cast s -> pp_ast pp_cset fmt s
147147+ | Cset s -> sexp fmt "Set" Cset.pp s
148148+ | Intersection c -> seq "Intersection" c
149149+ | Complement c -> seq "Complement" c
150150+ | Difference (a, b) -> sexp fmt "Difference" (pair pp_cset pp_cset) (a, b)
151151+;;
152152+153153+let rec equal cset x1 x2 =
154154+ match x1, x2 with
155155+ | Set s1, Set s2 -> cset s1 s2
156156+ | Sequence l1, Sequence l2 -> List.equal ~eq:(equal cset) l1 l2
157157+ | Repeat (x1', i1, j1), Repeat (x2', i2, j2) ->
158158+ Int.equal i1 i2 && Option.equal Int.equal j1 j2 && equal cset x1' x2'
159159+ | Beg_of_line, Beg_of_line
160160+ | End_of_line, End_of_line
161161+ | Beg_of_word, Beg_of_word
162162+ | End_of_word, End_of_word
163163+ | Not_bound, Not_bound
164164+ | Beg_of_str, Beg_of_str
165165+ | End_of_str, End_of_str
166166+ | Last_end_of_line, Last_end_of_line
167167+ | Start, Start
168168+ | Stop, Stop -> true
169169+ | Group _, Group _ ->
170170+ (* Do not merge groups! *)
171171+ false
172172+ | Pmark (m1, r1), Pmark (m2, r2) -> Pmark.equal m1 m2 && equal cset r1 r2
173173+ | Nest x, Nest y -> equal cset x y
174174+ | Ast x, Ast y -> equal_ast (equal cset) x y
175175+ | Sem (sem, a), Sem (sem', a') -> Poly.equal sem sem' && equal cset a a'
176176+ | Sem_greedy (rep, a), Sem_greedy (rep', a') -> Poly.equal rep rep' && equal cset a a'
177177+ | _ -> false
178178+;;
179179+180180+type t = (cset, [ `Cased | `Uncased ]) gen
181181+type no_case = (Cset.t, [ `Uncased ]) gen
182182+183183+let to_dyn = dyn_of_gen dyn_of_cset
184184+let pp = pp_gen pp_cset
185185+let cset cset = Set (Cset cset)
186186+187187+let rec handle_case_cset ign_case = function
188188+ | Cset s -> if ign_case then Cset.case_insens s else s
189189+ | Cast (Alternative l) -> List.map ~f:(handle_case_cset ign_case) l |> Cset.union_all
190190+ | Complement l ->
191191+ List.map ~f:(handle_case_cset ign_case) l |> Cset.union_all |> Cset.diff Cset.cany
192192+ | Difference (r, r') ->
193193+ Cset.inter
194194+ (handle_case_cset ign_case r)
195195+ (Cset.diff Cset.cany (handle_case_cset ign_case r'))
196196+ | Intersection l -> List.map ~f:(handle_case_cset ign_case) l |> Cset.intersect_all
197197+ | Cast (No_case a) -> handle_case_cset true a
198198+ | Cast (Case a) -> handle_case_cset false a
199199+;;
200200+201201+let rec handle_case ign_case : t -> (Cset.t, [ `Uncased ]) gen = function
202202+ | Set s -> Set (handle_case_cset ign_case s)
203203+ | Sequence l -> Sequence (List.map ~f:(handle_case ign_case) l)
204204+ | Ast (Alternative l) ->
205205+ let l = List.map ~f:(handle_case ign_case) l in
206206+ Ast (Alternative l)
207207+ | Repeat (r, i, j) -> Repeat (handle_case ign_case r, i, j)
208208+ | ( Beg_of_line
209209+ | End_of_line
210210+ | Beg_of_word
211211+ | End_of_word
212212+ | Not_bound
213213+ | Beg_of_str
214214+ | End_of_str
215215+ | Last_end_of_line
216216+ | Start
217217+ | Stop ) as r -> r
218218+ | Sem (k, r) -> Sem (k, handle_case ign_case r)
219219+ | Sem_greedy (k, r) -> Sem_greedy (k, handle_case ign_case r)
220220+ | Group (n, r) -> Group (n, handle_case ign_case r)
221221+ | No_group r -> No_group (handle_case ign_case r)
222222+ | Nest r -> Nest (handle_case ign_case r)
223223+ | Ast (Case r) -> handle_case false r
224224+ | Ast (No_case r) -> handle_case true r
225225+ | Pmark (i, r) -> Pmark (i, handle_case ign_case r)
226226+;;
227227+228228+module Export = struct
229229+ type nonrec t = t
230230+231231+ let pp = pp
232232+233233+ let seq = function
234234+ | [ r ] -> r
235235+ | l -> Sequence l
236236+ ;;
237237+238238+ let char =
239239+ let f = Dense_map.make ~size:256 ~f:(fun i -> cset (Cset.csingle (Char.chr i))) in
240240+ fun c -> f (Char.code c)
241241+ ;;
242242+243243+ let any = cset Cset.cany
244244+245245+ let str s : t =
246246+ let l = ref [] in
247247+ for i = String.length s - 1 downto 0 do
248248+ l := char s.[i] :: !l
249249+ done;
250250+ seq !l
251251+ ;;
252252+253253+ let as_set_elems elems =
254254+ match
255255+ List.map elems ~f:(function
256256+ | Set e -> e
257257+ | _ -> raise_notrace Exit)
258258+ with
259259+ | exception Exit -> None
260260+ | e -> Some e
261261+ ;;
262262+263263+ let empty : t = Ast empty_alternative
264264+265265+ let alt (elems : t list) : t =
266266+ match elems with
267267+ | [] -> empty
268268+ | [ x ] -> x
269269+ | _ ->
270270+ (match as_set_elems elems with
271271+ | None -> Ast (Alternative elems)
272272+ | Some elems -> Set (Cast (Alternative elems)))
273273+ ;;
274274+275275+ let epsilon = seq []
276276+277277+ let repn r i j =
278278+ if i < 0 then invalid_arg "Re.repn";
279279+ match j, i with
280280+ | Some j, _ when j < i -> invalid_arg "Re.repn"
281281+ | Some 0, 0 -> epsilon
282282+ | Some 1, 1 -> r
283283+ | _ -> Repeat (r, i, j)
284284+ ;;
285285+286286+ let rep r = repn r 0 None
287287+ let rep1 r = repn r 1 None
288288+ let opt r = repn r 0 (Some 1)
289289+ let bol = Beg_of_line
290290+ let eol = End_of_line
291291+ let bow = Beg_of_word
292292+ let eow = End_of_word
293293+ let word r = seq [ bow; r; eow ]
294294+ let not_boundary = Not_bound
295295+ let bos = Beg_of_str
296296+ let eos = End_of_str
297297+ let whole_string r = seq [ bos; r; eos ]
298298+ let leol = Last_end_of_line
299299+ let start = Start
300300+ let stop = Stop
301301+302302+ type 'b f = { f : 'a. 'a -> ('a, 'b) ast }
303303+304304+ let make_set f t =
305305+ match t with
306306+ | Set x -> Set (Cast (f.f x))
307307+ | _ -> Ast (f.f t)
308308+ ;;
309309+310310+ let preserve_set f t =
311311+ match t with
312312+ | Set _ -> t
313313+ | _ -> f t
314314+ ;;
315315+316316+ let longest = preserve_set (fun t -> Sem (`Longest, t))
317317+ let shortest = preserve_set (fun t -> Sem (`Shortest, t))
318318+ let first = preserve_set (fun t -> Sem (`First, t))
319319+ let greedy = preserve_set (fun t -> Sem_greedy (`Greedy, t))
320320+ let non_greedy = preserve_set (fun t -> Sem_greedy (`Non_greedy, t))
321321+ let group ?name r = Group (name, r)
322322+ let no_group = preserve_set (fun t -> No_group t)
323323+ let nest r = Nest r
324324+ let set str = cset (Cset.set str)
325325+326326+ let mark r =
327327+ let i = Pmark.gen () in
328328+ i, Pmark (i, r)
329329+ ;;
330330+331331+ (**** Character sets ****)
332332+ let as_set_or_error name elems =
333333+ match as_set_elems elems with
334334+ | None -> invalid_arg name
335335+ | Some s -> s
336336+ ;;
337337+338338+ let inter elems = Set (Intersection (as_set_or_error "Re.inter" elems))
339339+ let compl elems = Set (Complement (as_set_or_error "Re.compl" elems))
340340+341341+ let diff r r' =
342342+ match r, r' with
343343+ | Set r, Set r' -> Set (Difference (r, r'))
344344+ | _, _ -> invalid_arg "Re.diff"
345345+ ;;
346346+347347+ let case =
348348+ let f = { f = (fun r -> Case r) } in
349349+ fun t -> make_set f t
350350+ ;;
351351+352352+ let no_case =
353353+ let f = { f = (fun r -> No_case r) } in
354354+ fun t -> make_set f t
355355+ ;;
356356+357357+ let witness t =
358358+ let rec witness (t : no_case) =
359359+ match t with
360360+ | Set c -> String.make 1 (Cset.to_char (Cset.pick c))
361361+ | Sequence xs -> String.concat "" (List.map ~f:witness xs)
362362+ | Ast (Alternative (x :: _)) -> witness x
363363+ | Ast (Alternative []) -> assert false
364364+ | Repeat (r, from, _to) ->
365365+ let w = witness r in
366366+ let b = Buffer.create (String.length w * from) in
367367+ for _i = 1 to from do
368368+ Buffer.add_string b w
369369+ done;
370370+ Buffer.contents b
371371+ | No_group r -> witness r
372372+ | Sem_greedy (_, r) | Sem (_, r) | Nest r | Pmark (_, r) | Group (_, r) -> witness r
373373+ | Beg_of_line
374374+ | End_of_line
375375+ | Beg_of_word
376376+ | End_of_word
377377+ | Not_bound
378378+ | Beg_of_str
379379+ | Last_end_of_line
380380+ | Start
381381+ | Stop
382382+ | End_of_str -> ""
383383+ in
384384+ witness (handle_case false t)
385385+ ;;
386386+end
387387+388388+open Export
389389+390390+let rec merge_sequences = function
391391+ | [] -> []
392392+ | Ast (Alternative l') :: r -> merge_sequences (l' @ r)
393393+ | Sequence (x :: y) :: r ->
394394+ (match merge_sequences r with
395395+ | Sequence (x' :: y') :: r' when equal Cset.equal x x' ->
396396+ Sequence [ x; Ast (Alternative [ seq y; seq y' ]) ] :: r'
397397+ | r' -> Sequence (x :: y) :: r')
398398+ | x :: r -> x :: merge_sequences r
399399+;;
400400+401401+(*XXX Use a better algorithm allowing non-contiguous regions? *)
402402+403403+let colorize color_map (regexp : no_case) =
404404+ let lnl = ref false in
405405+ let rec colorize regexp =
406406+ match (regexp : no_case) with
407407+ | Set s -> Color_map.split color_map s
408408+ | Sequence l -> List.iter ~f:colorize l
409409+ | Ast (Alternative l) -> List.iter ~f:colorize l
410410+ | Repeat (r, _, _) -> colorize r
411411+ | Beg_of_line | End_of_line -> Color_map.split color_map Cset.nl
412412+ | Beg_of_word | End_of_word | Not_bound -> Color_map.split color_map Cset.cword
413413+ | Beg_of_str | End_of_str | Start | Stop -> ()
414414+ | Last_end_of_line -> lnl := true
415415+ | No_group r | Group (_, r) | Nest r | Pmark (_, r) -> colorize r
416416+ | Sem (_, r) | Sem_greedy (_, r) -> colorize r
417417+ in
418418+ colorize regexp;
419419+ !lnl
420420+;;
421421+422422+let rec anchored_ast : (t, _) ast -> bool = function
423423+ | Alternative als -> List.for_all ~f:anchored als
424424+ | No_case r | Case r -> anchored r
425425+426426+and anchored : t -> bool = function
427427+ | Ast a -> anchored_ast a
428428+ | Sequence l -> List.exists ~f:anchored l
429429+ | Repeat (r, i, _) -> i > 0 && anchored r
430430+ | No_group r | Sem (_, r) | Sem_greedy (_, r) | Group (_, r) | Nest r | Pmark (_, r) ->
431431+ anchored r
432432+ | Set _
433433+ | Beg_of_line
434434+ | End_of_line
435435+ | Beg_of_word
436436+ | End_of_word
437437+ | Not_bound
438438+ | End_of_str
439439+ | Last_end_of_line
440440+ | Stop -> false
441441+ | Beg_of_str | Start -> true
442442+;;
443443+444444+let t_of_cset x = Set x
+91
vendor/opam/re/lib/ast.mli
···11+type ('a, _) ast = private
22+ | Alternative : 'a list -> ('a, [> `Uncased ]) ast
33+ | No_case : 'a -> ('a, [> `Cased ]) ast
44+ | Case : 'a -> ('a, [> `Cased ]) ast
55+66+type cset = private
77+ | Cset of Cset.t
88+ | Intersection of cset list
99+ | Complement of cset list
1010+ | Difference of cset * cset
1111+ | Cast of (cset, [ `Cased | `Uncased ]) ast
1212+1313+type ('a, 'case) gen = private
1414+ | Set of 'a
1515+ | Ast of (('a, 'case) gen, 'case) ast
1616+ | Sequence of ('a, 'case) gen list
1717+ | Repeat of ('a, 'case) gen * int * int option
1818+ | Beg_of_line
1919+ | End_of_line
2020+ | Beg_of_word
2121+ | End_of_word
2222+ | Not_bound
2323+ | Beg_of_str
2424+ | End_of_str
2525+ | Last_end_of_line
2626+ | Start
2727+ | Stop
2828+ | Group of string option * ('a, 'case) gen
2929+ | No_group of ('a, 'case) gen
3030+ | Nest of ('a, 'case) gen
3131+ | Pmark of Pmark.t * ('a, 'case) gen
3232+ | Sem of Automata.Sem.t * ('a, 'case) gen
3333+ | Sem_greedy of Automata.Rep_kind.t * ('a, 'case) gen
3434+3535+type t = (cset, [ `Cased | `Uncased ]) gen
3636+type no_case = (Cset.t, [ `Uncased ]) gen
3737+3838+val to_dyn : t -> Dyn.t
3939+val pp : t Fmt.t
4040+val merge_sequences : (Cset.t, [ `Uncased ]) gen list -> (Cset.t, [ `Uncased ]) gen list
4141+val handle_case : bool -> t -> (Cset.t, [ `Uncased ]) gen
4242+val anchored : t -> bool
4343+val colorize : Color_map.t -> (Cset.t, [ `Uncased ]) gen -> bool
4444+4545+module Export : sig
4646+ type nonrec t = t
4747+4848+ val empty : t
4949+ val epsilon : t
5050+ val str : string -> t
5151+ val no_case : t -> t
5252+ val case : t -> t
5353+ val diff : t -> t -> t
5454+ val compl : t list -> t
5555+ val repn : t -> int -> int option -> t
5656+ val inter : t list -> t
5757+ val char : char -> t
5858+ val any : t
5959+ val set : string -> t
6060+ val mark : t -> Pmark.t * t
6161+ val nest : t -> t
6262+ val no_group : t -> t
6363+ val whole_string : t -> t
6464+ val leol : t
6565+ val longest : t -> t
6666+ val greedy : t -> t
6767+ val non_greedy : t -> t
6868+ val stop : t
6969+ val not_boundary : t
7070+ val group : ?name:string -> t -> t
7171+ val word : t -> t
7272+ val first : t -> t
7373+ val bos : t
7474+ val bow : t
7575+ val eow : t
7676+ val eos : t
7777+ val bol : t
7878+ val start : t
7979+ val eol : t
8080+ val opt : t -> t
8181+ val rep : t -> t
8282+ val rep1 : t -> t
8383+ val alt : t list -> t
8484+ val shortest : t -> t
8585+ val seq : t list -> t
8686+ val pp : t Fmt.t
8787+ val witness : t -> string
8888+end
8989+9090+val cset : Cset.t -> t
9191+val t_of_cset : cset -> t
+781
vendor/opam/re/lib/automata.ml
···11+open Import
22+33+(*
44+ RE - A regular expression library
55+66+ Copyright (C) 2001 Jerome Vouillon
77+ email: Jerome.Vouillon@pps.jussieu.fr
88+99+ This library is free software; you can redistribute it and/or
1010+ modify it under the terms of the GNU Lesser General Public
1111+ License as published by the Free Software Foundation, with
1212+ linking exception; either version 2.1 of the License, or (at
1313+ your option) any later version.
1414+1515+ This library is distributed in the hope that it will be useful,
1616+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1717+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1818+ Lesser General Public License for more details.
1919+2020+ You should have received a copy of the GNU Lesser General Public
2121+ License along with this library; if not, write to the Free Software
2222+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2323+*)
2424+2525+let hash_combine h accu = (accu * 65599) + h
2626+2727+module Ids : sig
2828+ module Id : sig
2929+ type t
3030+3131+ val equal : t -> t -> bool
3232+ val zero : t
3333+ val hash : t -> int
3434+ val pp : t Fmt.t
3535+3636+ module Hash_set : sig
3737+ type id := t
3838+ type t
3939+4040+ val create : unit -> t
4141+ val mem : t -> id -> bool
4242+ val add : t -> id -> unit
4343+ val clear : t -> unit
4444+ end
4545+ end
4646+4747+ type t
4848+4949+ val create : unit -> t
5050+ val next : t -> Id.t
5151+end = struct
5252+ module Id = struct
5353+ type t = int
5454+5555+ module Hash_set = Hash_set
5656+5757+ let equal = Int.equal
5858+ let zero = 0
5959+ let hash x = x
6060+ let pp = Fmt.int
6161+ end
6262+6363+ type t = int ref
6464+6565+ let create () = ref 0
6666+6767+ let next t =
6868+ incr t;
6969+ !t
7070+ ;;
7171+end
7272+7373+module Id = Ids.Id
7474+7575+module Sem = struct
7676+ type t =
7777+ [ `Longest
7878+ | `Shortest
7979+ | `First
8080+ ]
8181+8282+ let to_string = function
8383+ | `Shortest -> "short"
8484+ | `Longest -> "long"
8585+ | `First -> "first"
8686+ ;;
8787+8888+ let to_dyn t = Dyn.enum (to_string t)
8989+ let equal = Poly.equal
9090+ let pp ch k = Format.pp_print_string ch (to_string k)
9191+end
9292+9393+module Rep_kind = struct
9494+ type t =
9595+ [ `Greedy
9696+ | `Non_greedy
9797+ ]
9898+9999+ let to_string = function
100100+ | `Greedy -> "Greedy"
101101+ | `Non_greedy -> "Non_greedy"
102102+ ;;
103103+104104+ let to_dyn t = Dyn.enum (to_string t)
105105+ let pp fmt t = Format.pp_print_string fmt (to_string t)
106106+end
107107+108108+module Mark : sig
109109+ type t = private int
110110+111111+ val compare : t -> t -> int
112112+ val equal : t -> t -> bool
113113+ val pp : t Fmt.t
114114+ val to_dyn : t -> Dyn.t
115115+ val start : t
116116+ val prev : t -> t
117117+ val next : t -> t
118118+ val next2 : t -> t
119119+ val group_count : t -> int
120120+ val outside_range : t -> start_inclusive:t -> stop_inclusive:t -> bool
121121+end = struct
122122+ type t = int
123123+124124+ let equal = Int.equal
125125+ let compare = Int.compare
126126+ let pp = Format.pp_print_int
127127+ let to_dyn = Dyn.int
128128+ let start = 0
129129+ let prev x = pred x
130130+ let next x = succ x
131131+ let next2 x = x + 2
132132+ let group_count x = x / 2
133133+134134+ let outside_range t ~start_inclusive ~stop_inclusive =
135135+ t < start_inclusive || t > stop_inclusive
136136+ ;;
137137+end
138138+139139+module Idx : sig
140140+ type t = private int
141141+142142+ val pp : t Fmt.t
143143+ val to_dyn : t -> Dyn.t
144144+ val to_int : t -> int
145145+ val unknown : t
146146+ val initial : t
147147+ val used : t -> bool
148148+ val make : int -> t
149149+ val equal : t -> t -> bool
150150+end = struct
151151+ type t = int
152152+153153+ let to_dyn = Dyn.int
154154+ let to_int x = x
155155+ let pp = Format.pp_print_int
156156+ let used t = t >= 0
157157+ let make x = x
158158+ let equal = Int.equal
159159+ let unknown = -1
160160+ let initial = 0
161161+end
162162+163163+module Expr = struct
164164+ type t =
165165+ { id : Id.t
166166+ ; def : def
167167+ }
168168+169169+ and def =
170170+ | Cst of Cset.t
171171+ | Alt of t list
172172+ | Seq of Sem.t * t * t
173173+ | Eps
174174+ | Rep of Rep_kind.t * Sem.t * t
175175+ | Mark of Mark.t
176176+ | Erase of Mark.t * Mark.t
177177+ | Before of Category.t
178178+ | After of Category.t
179179+ | Pmark of Pmark.t
180180+181181+ let wrap_sem sem sem' v =
182182+ let open Dyn in
183183+ let name = Sem.to_string sem' in
184184+ match sem with
185185+ | Some sem when Sem.equal sem sem' -> v
186186+ | None | Some _ ->
187187+ (match v with
188188+ | List v -> variant name v
189189+ | _ -> variant name [ v ])
190190+ ;;
191191+192192+ let rec seq_as_list sem = function
193193+ | Eps -> []
194194+ | Cst cs -> [ Cst cs ]
195195+ | Seq (sem', x, y) ->
196196+ if Sem.equal sem sem'
197197+ then x.def :: seq_as_list sem y.def
198198+ else raise_notrace Not_found
199199+ | _ -> raise_notrace Not_found
200200+ ;;
201201+202202+ let seq_as_list sem t =
203203+ match seq_as_list sem t with
204204+ | exception Not_found -> None
205205+ | s -> Some s
206206+ ;;
207207+208208+ let rec dyn_of_def sem =
209209+ let open Dyn in
210210+ function
211211+ | Cst cset -> Cset.to_dyn cset
212212+ | Alt alt -> variant "Alt" (List.map ~f:(to_dyn sem) alt)
213213+ | Seq (sem', x, y) ->
214214+ let to_dyn = to_dyn (Some sem') in
215215+ let x =
216216+ match seq_as_list sem' y.def with
217217+ | None -> variant "Seq" [ to_dyn x; to_dyn y ]
218218+ | Some y -> variant "Seq" (to_dyn x :: List.map y ~f:(dyn_of_def sem))
219219+ in
220220+ wrap_sem sem sem' x
221221+ | Eps -> Enum "Eps"
222222+ | Rep (_, sem', t) -> wrap_sem sem sem' (variant "Rep" [ to_dyn (Some sem') t ])
223223+ | Mark m -> variant "Mark" [ Mark.to_dyn m ]
224224+ | Pmark m -> variant "Pmark" [ Pmark.to_dyn m ]
225225+ | Erase (x, y) -> variant "Erase" [ Mark.to_dyn x; Mark.to_dyn y ]
226226+ | Before c -> variant "Before" [ Category.to_dyn c ]
227227+ | After c -> variant "After" [ Category.to_dyn c ]
228228+229229+ and to_dyn sem { id = _; def } = dyn_of_def sem def
230230+231231+ let rec pp_with_sem sem ch e =
232232+ let open Fmt in
233233+ match e.def with
234234+ | Cst l -> sexp ch "cst" Cset.pp l
235235+ | Alt l -> sexp ch "alt" (list (pp_with_sem sem)) l
236236+ | Seq (k, e, e') ->
237237+ sexp ch "seq" (triple Sem.pp (pp_with_sem sem) (pp_with_sem sem)) (k, e, e')
238238+ | Eps -> str ch "eps"
239239+ | Rep (_rk, k, e) -> sexp ch "rep" (pair Sem.pp (pp_with_sem (Some k))) (k, e)
240240+ | Mark i -> sexp ch "mark" Mark.pp i
241241+ | Pmark i -> sexp ch "pmark" Pmark.pp i
242242+ | Erase (b, e) -> sexp ch "erase" (pair Mark.pp Mark.pp) (b, e)
243243+ | Before c -> sexp ch "before" Category.pp c
244244+ | After c -> sexp ch "after" Category.pp c
245245+ ;;
246246+247247+ let pp = pp_with_sem None
248248+ let eps_expr = { id = Id.zero; def = Eps }
249249+ let mk ids def = { id = Ids.next ids; def }
250250+ let empty ids = mk ids (Alt [])
251251+ let cst ids s = if Cset.is_empty s then empty ids else mk ids (Cst s)
252252+ let eps ids = mk ids Eps
253253+ let rep ids kind sem x = mk ids (Rep (kind, sem, x))
254254+ let mark ids m = mk ids (Mark m)
255255+ let pmark ids i = mk ids (Pmark i)
256256+ let erase ids m m' = mk ids (Erase (m, m'))
257257+ let before ids c = mk ids (Before c)
258258+ let after ids c = mk ids (After c)
259259+260260+ let alt ids = function
261261+ | [] -> empty ids
262262+ | [ c ] -> c
263263+ | l -> mk ids (Alt l)
264264+ ;;
265265+266266+ let seq ids (kind : Sem.t) x y =
267267+ match x.def, y.def with
268268+ | Alt [], _ -> x
269269+ | _, Alt [] -> y
270270+ | Eps, _ -> y
271271+ | _, Eps when Sem.equal kind `First -> x
272272+ | _ -> mk ids (Seq (kind, x, y))
273273+ ;;
274274+275275+ let is_eps expr =
276276+ match expr.def with
277277+ | Eps -> true
278278+ | _ -> false
279279+ ;;
280280+281281+ let rec rename ids x =
282282+ match x.def with
283283+ | Cst _ | Eps | Mark _ | Pmark _ | Erase _ | Before _ | After _ -> mk ids x.def
284284+ | Alt l -> mk ids (Alt (List.map ~f:(rename ids) l))
285285+ | Seq (k, y, z) -> mk ids (Seq (k, rename ids y, rename ids z))
286286+ | Rep (g, k, y) -> mk ids (Rep (g, k, rename ids y))
287287+ ;;
288288+end
289289+290290+type expr = Expr.t
291291+292292+include Expr
293293+294294+module Marks = struct
295295+ type t =
296296+ { marks : (Mark.t * Idx.t) list
297297+ ; pmarks : Pmark.Set.t
298298+ }
299299+300300+ let to_dyn { marks; pmarks } : Dyn.t =
301301+ let open Dyn in
302302+ record
303303+ [ ( "marks"
304304+ , List.map marks ~f:(fun (m, idx) -> pair (Mark.to_dyn m) (Idx.to_dyn idx))
305305+ |> list )
306306+ ; "pmarks", Pmark.Set.to_list pmarks |> List.map ~f:Pmark.to_dyn |> list
307307+ ]
308308+ ;;
309309+310310+ let equal { marks; pmarks } t =
311311+ List.equal
312312+ ~eq:(fun (x, y) (x', y') -> Mark.equal x x' && Idx.equal y y')
313313+ marks
314314+ t.marks
315315+ && Pmark.Set.equal pmarks t.pmarks
316316+ ;;
317317+318318+ let empty = { marks = []; pmarks = Pmark.Set.empty }
319319+320320+ let hash_marks_offset =
321321+ let f acc ((a : Mark.t), (i : Idx.t)) =
322322+ hash_combine (a :> int) (hash_combine (i :> int) acc)
323323+ in
324324+ fun l init -> List.fold_left l ~init ~f
325325+ ;;
326326+327327+ let hash m accu = hash_marks_offset m.marks (hash_combine (Hashtbl.hash m.pmarks) accu)
328328+329329+ let marks_set_idx =
330330+ let rec marks_set_idx idx marks =
331331+ match marks with
332332+ | [] -> []
333333+ | (a, idx') :: rem ->
334334+ if Idx.equal idx' Idx.unknown then (a, idx) :: marks_set_idx idx rem else marks
335335+ in
336336+ fun marks idx -> { marks with marks = marks_set_idx idx marks.marks }
337337+ ;;
338338+339339+ let filter t (b : Mark.t) (e : Mark.t) =
340340+ { t with
341341+ marks =
342342+ List.filter t.marks ~f:(fun ((i : Mark.t), _) ->
343343+ Mark.outside_range i ~start_inclusive:b ~stop_inclusive:e)
344344+ }
345345+ ;;
346346+347347+ let set_mark t (i : Mark.t) =
348348+ { t with marks = (i, Idx.unknown) :: List.remove_assq i t.marks }
349349+ ;;
350350+351351+ let set_pmark t i = { t with pmarks = Pmark.Set.add i t.pmarks }
352352+353353+ let pp fmt { marks; pmarks } =
354354+ Format.pp_open_box fmt 1;
355355+ (match marks with
356356+ | [] -> ()
357357+ | _ :: _ ->
358358+ Format.fprintf
359359+ fmt
360360+ "@[<2>marks@ %a@]"
361361+ (Format.pp_print_list (fun fmt (a, i) ->
362362+ Format.fprintf fmt "%a-%a" Mark.pp a Idx.pp i))
363363+ marks);
364364+ (match Pmark.Set.to_list pmarks with
365365+ | [] -> ()
366366+ | pmarks ->
367367+ Format.fprintf fmt "@[<2>pmarks %a@]" (Format.pp_print_list Pmark.pp) pmarks);
368368+ Format.pp_close_box fmt ()
369369+ ;;
370370+end
371371+372372+module Status = struct
373373+ type t =
374374+ | Failed
375375+ | Match of Mark_infos.t * Pmark.Set.t
376376+ | Running
377377+end
378378+379379+module Desc : sig
380380+ type t
381381+382382+ val pp : t Fmt.t
383383+384384+ module E : sig
385385+ type nonrec t = private
386386+ | TSeq of Sem.t * t * Expr.t
387387+ | TExp of Marks.t * Expr.t
388388+ | TMatch of Marks.t
389389+ end
390390+391391+ val to_dyn : t -> Dyn.t
392392+ val fold_right : t -> init:'acc -> f:(E.t -> 'acc -> 'acc) -> 'acc
393393+ val tseq : Sem.t -> t -> Expr.t -> t -> t
394394+ val initial : Expr.t -> t
395395+ val empty : t
396396+ val set_idx : Idx.t -> t -> t
397397+ val hash : t -> int -> int
398398+ val equal : t -> t -> bool
399399+ val status : t -> Status.t
400400+ val first_match : t -> Marks.t option
401401+ val remove_matches : t -> t
402402+ val split_at_match : t -> t * t
403403+ val add_match : t -> Marks.t -> t
404404+ val add_eps : t -> Marks.t -> t
405405+ val add_expr : t -> E.t -> t
406406+ val iter_marks : t -> f:(Marks.t -> unit) -> unit
407407+ val remove_duplicates : Id.Hash_set.t -> t -> Expr.t -> t
408408+end = struct
409409+ module E = struct
410410+ type t =
411411+ | TSeq of Sem.t * t list * Expr.t
412412+ | TExp of Marks.t * Expr.t
413413+ | TMatch of Marks.t
414414+415415+ let rec equal_list l1 l2 = List.equal ~eq:equal l1 l2
416416+417417+ and equal x y =
418418+ match x, y with
419419+ | TSeq (_, l1, e1), TSeq (_, l2, e2) -> Id.equal e1.id e2.id && equal_list l1 l2
420420+ | TExp (marks1, e1), TExp (marks2, e2) ->
421421+ Id.equal e1.id e2.id && Marks.equal marks1 marks2
422422+ | TMatch marks1, TMatch marks2 -> Marks.equal marks1 marks2
423423+ | _, _ -> false
424424+ ;;
425425+426426+ let rec hash (t : t) accu =
427427+ match t with
428428+ | TSeq (_, l, e) ->
429429+ hash_combine 0x172a1bce (hash_combine (Id.hash e.id) (hash_list l accu))
430430+ | TExp (marks, e) ->
431431+ hash_combine 0x2b4c0d77 (hash_combine (Id.hash e.id) (Marks.hash marks accu))
432432+ | TMatch marks -> hash_combine 0x1c205ad5 (Marks.hash marks accu)
433433+434434+ and hash_list =
435435+ let f acc x = hash x acc in
436436+ fun l init -> List.fold_left l ~init ~f
437437+ ;;
438438+ end
439439+440440+ type t = E.t list
441441+442442+ let rec to_dyn sem t = Dyn.list (List.map ~f:(dyn_of_e sem) t)
443443+444444+ and dyn_of_e sem =
445445+ let open Dyn in
446446+ function
447447+ | E.TSeq (sem', x, y) ->
448448+ wrap_sem
449449+ sem
450450+ sem'
451451+ (variant "TSeq" [ to_dyn (Some sem') x; Expr.to_dyn (Some sem') y ])
452452+ | TExp (marks, e) ->
453453+ let e =
454454+ let base = [ Expr.to_dyn sem e ] in
455455+ if Marks.(equal empty marks) then base else Marks.to_dyn marks :: base
456456+ in
457457+ variant "TExp" e
458458+ | TMatch m -> variant "TMarks" [ Marks.to_dyn m ]
459459+ ;;
460460+461461+ let to_dyn t = to_dyn None t
462462+463463+ open E
464464+465465+ let equal = E.equal_list
466466+ let hash = E.hash_list
467467+468468+ let tseq' kind x y =
469469+ match x with
470470+ | [] -> []
471471+ | [ TExp (marks, { def = Eps; _ }) ] -> [ TExp (marks, y) ]
472472+ | _ -> [ TSeq (kind, x, y) ]
473473+ ;;
474474+475475+ let tseq kind x y rem = tseq' kind x y @ rem
476476+477477+ let rec fold_right t ~init ~f =
478478+ match t with
479479+ | [] -> init
480480+ | x :: xs -> f x (fold_right xs ~init ~f)
481481+ ;;
482482+483483+ let rec iter_marks t ~f =
484484+ List.iter t ~f:(fun (e : E.t) ->
485485+ match e with
486486+ | TSeq (_, l, _) -> iter_marks l ~f
487487+ | TExp (marks, _) | TMatch marks -> f marks)
488488+ ;;
489489+490490+ let rec print_state_rec ch e (y : Expr.t) =
491491+ match e with
492492+ | TMatch marks -> Format.fprintf ch "@[<2>(TMatch@ %a)@]" Marks.pp marks
493493+ | TSeq (sem, l', x) ->
494494+ Format.fprintf ch "@[<2>(TSeq@ %a@ " Sem.pp sem;
495495+ print_state_lst ch l' x;
496496+ Format.fprintf ch "@ %a)@]" Expr.pp x
497497+ | TExp (marks, { def = Eps; _ }) ->
498498+ Format.fprintf ch "@[<2>(TExp@ %a@ (%a)@ (eps))@]" Id.pp y.id Marks.pp marks
499499+ | TExp (marks, x) ->
500500+ Format.fprintf ch "@[<2>(TExp@ %a@ (%a)@ %a)@]" Id.pp x.id Marks.pp marks Expr.pp x
501501+502502+ and print_state_lst ch l y =
503503+ match l with
504504+ | [] -> Format.fprintf ch "()"
505505+ | e :: rem ->
506506+ print_state_rec ch e y;
507507+ List.iter rem ~f:(fun e ->
508508+ Format.fprintf ch "@ | ";
509509+ print_state_rec ch e y)
510510+ ;;
511511+512512+ let pp ch t = print_state_lst ch [ t ] { id = Id.zero; def = Eps }
513513+514514+ let rec first_match = function
515515+ | [] -> None
516516+ | TMatch marks :: _ -> Some marks
517517+ | _ :: r -> first_match r
518518+ ;;
519519+520520+ let remove_matches t =
521521+ List.filter t ~f:(function
522522+ | TMatch _ -> false
523523+ | _ -> true)
524524+ ;;
525525+526526+ let split_at_match =
527527+ let rec split_at_match_rec l = function
528528+ | [] -> assert false
529529+ | TMatch _ :: r -> List.rev l, remove_matches r
530530+ | x :: r -> split_at_match_rec (x :: l) r
531531+ in
532532+ fun l -> split_at_match_rec [] l
533533+ ;;
534534+535535+ let status : _ -> Status.t = function
536536+ | [] -> Failed
537537+ | TMatch m :: _ -> Match (Mark_infos.make (m.marks :> (int * int) list), m.pmarks)
538538+ | _ -> Running
539539+ ;;
540540+541541+ let set_idx =
542542+ let rec f idx = function
543543+ | TMatch marks -> TMatch (Marks.marks_set_idx marks idx)
544544+ | TSeq (kind, l, x) -> TSeq (kind, set_idx idx l, x)
545545+ | TExp (marks, x) -> TExp (Marks.marks_set_idx marks idx, x)
546546+ and set_idx idx xs = List.map xs ~f:(f idx) in
547547+ set_idx
548548+ ;;
549549+550550+ let[@ocaml.warning "-32"] pp fmt t =
551551+ Format.fprintf fmt "[%a]" (Format.pp_print_list ~pp_sep:(Fmt.lit "; ") pp) t
552552+ ;;
553553+554554+ let empty = []
555555+ let initial expr = [ TExp (Marks.empty, expr) ]
556556+ let add_match t marks = TMatch marks :: t
557557+ let add_eps t marks = TExp (marks, eps_expr) :: t
558558+ let add_expr t expr = expr :: t
559559+560560+ let remove_duplicates =
561561+ let rec loop seen l y =
562562+ match l with
563563+ | [] -> []
564564+ | (TMatch _ as x) :: _ ->
565565+ (* Truncate after first match *)
566566+ [ x ]
567567+ | TSeq (kind, l, x) :: r ->
568568+ let l = loop seen l x in
569569+ let r = loop seen r y in
570570+ tseq kind l x r
571571+ | (TExp (_marks, { def = Eps; _ }) as e) :: r ->
572572+ if Id.Hash_set.mem seen y.id
573573+ then loop seen r y
574574+ else (
575575+ Id.Hash_set.add seen y.id;
576576+ e :: loop seen r y)
577577+ | (TExp (_marks, x) as e) :: r ->
578578+ if Id.Hash_set.mem seen x.id
579579+ then loop seen r y
580580+ else (
581581+ Id.Hash_set.add seen x.id;
582582+ e :: loop seen r y)
583583+ in
584584+ fun seen l y ->
585585+ Id.Hash_set.clear seen;
586586+ loop seen l y
587587+ ;;
588588+end
589589+590590+module E = Desc.E
591591+592592+module State = struct
593593+ type t =
594594+ { idx : Idx.t
595595+ ; category : Category.t
596596+ ; desc : Desc.t
597597+ ; mutable status : Status.t option
598598+ ; hash : int
599599+ }
600600+ (* Thread-safety: We use double-checked locking to access field
601601+ [status] in function [status] below. *)
602602+603603+ let pp fmt t = Desc.pp fmt t.desc
604604+ let[@inline] idx t = t.idx
605605+ let to_dyn t = Desc.to_dyn t.desc
606606+607607+ let dummy =
608608+ { idx = Idx.unknown
609609+ ; category = Category.dummy
610610+ ; desc = Desc.empty
611611+ ; status = None
612612+ ; hash = -1
613613+ }
614614+ ;;
615615+616616+ let hash idx cat desc =
617617+ Desc.hash desc (hash_combine idx (hash_combine (Category.to_int cat) 0))
618618+ land 0x3FFFFFFF
619619+ ;;
620620+621621+ let mk idx cat desc =
622622+ { idx; category = cat; desc; status = None; hash = hash (idx :> int) cat desc }
623623+ ;;
624624+625625+ let create cat e = mk Idx.initial cat (Desc.initial e)
626626+627627+ let equal { idx; category; desc; status = _; hash } t =
628628+ Int.equal hash t.hash
629629+ && Idx.equal idx t.idx
630630+ && Category.equal category t.category
631631+ && Desc.equal desc t.desc
632632+ ;;
633633+634634+ (* To be called when the mutex has already been acquired *)
635635+ let status_no_mutex s =
636636+ match s.status with
637637+ | Some s -> s
638638+ | None ->
639639+ let st = Desc.status s.desc in
640640+ s.status <- Some st;
641641+ st
642642+ ;;
643643+644644+ let status m s =
645645+ match s.status with
646646+ | Some s -> s
647647+ | None ->
648648+ Mutex.lock m;
649649+ let st = status_no_mutex s in
650650+ Mutex.unlock m;
651651+ st
652652+ ;;
653653+654654+ module Table = Hashtbl.Make (struct
655655+ type nonrec t = t
656656+657657+ let equal = equal
658658+ let hash t = t.hash
659659+ end)
660660+end
661661+662662+(**** Find a free index ****)
663663+664664+module Working_area = struct
665665+ type t =
666666+ { mutable ids : Bit_vector.t
667667+ ; seen : Id.Hash_set.t
668668+ ; index_count : int Atomic.t
669669+ }
670670+671671+ let create () =
672672+ { ids = Bit_vector.create_zero 1
673673+ ; seen = Id.Hash_set.create ()
674674+ ; index_count = Atomic.make 0
675675+ }
676676+ ;;
677677+678678+ let index_count w = Atomic.get w.index_count
679679+680680+ let mark_used_indices tbl =
681681+ Desc.iter_marks ~f:(fun marks ->
682682+ List.iter marks.marks ~f:(fun (_, i) ->
683683+ if Idx.used i then Bit_vector.set tbl (i :> int) true))
684684+ ;;
685685+686686+ let rec find_free tbl idx len =
687687+ if idx = len || not (Bit_vector.get tbl idx) then idx else find_free tbl (idx + 1) len
688688+ ;;
689689+690690+ let free_index t l =
691691+ Bit_vector.reset_zero t.ids;
692692+ mark_used_indices t.ids l;
693693+ let len = Bit_vector.length t.ids in
694694+ let idx = find_free t.ids 0 len in
695695+ if idx = len
696696+ then (
697697+ t.ids <- Bit_vector.create_zero (2 * len);
698698+ (* This function is only called when the mutex is locked. So we
699699+ are sure that this is always coherent with the length of
700700+ [t.ids]. *)
701701+ Atomic.set t.index_count (2 * len));
702702+ Idx.make idx
703703+ ;;
704704+end
705705+706706+(**** Computation of the next state ****)
707707+708708+type ctx =
709709+ { c : Cset.c
710710+ ; prev_cat : Category.t
711711+ ; next_cat : Category.t
712712+ }
713713+714714+let rec delta_expr ({ c; _ } as ctx) marks (x : Expr.t) rem =
715715+ (*Format.eprintf "%d@." x.id;*)
716716+ match x.def with
717717+ | Cst s -> if Cset.mem c s then Desc.add_eps rem marks else rem
718718+ | Alt l -> delta_alt ctx marks l rem
719719+ | Seq (kind, y, z) ->
720720+ let y = delta_expr ctx marks y Desc.empty in
721721+ delta_seq ctx kind y z rem
722722+ | Rep (rep_kind, kind, y) -> delta_rep ctx marks x rep_kind kind y rem
723723+ | Eps -> Desc.add_match rem marks
724724+ | Mark i -> Desc.add_match rem (Marks.set_mark marks i)
725725+ | Pmark i -> Desc.add_match rem (Marks.set_pmark marks i)
726726+ | Erase (b, e) -> Desc.add_match rem (Marks.filter marks b e)
727727+ | Before cat ->
728728+ if Category.intersect ctx.next_cat cat then Desc.add_match rem marks else rem
729729+ | After cat ->
730730+ if Category.intersect ctx.prev_cat cat then Desc.add_match rem marks else rem
731731+732732+and delta_rep ctx marks x rep_kind kind y rem =
733733+ let y, marks' =
734734+ let y = delta_expr ctx marks y Desc.empty in
735735+ match Desc.first_match y with
736736+ | None -> y, marks
737737+ | Some marks -> Desc.remove_matches y, marks
738738+ in
739739+ match rep_kind with
740740+ | `Greedy -> Desc.tseq kind y x (Desc.add_match rem marks')
741741+ | `Non_greedy -> Desc.add_match (Desc.tseq kind y x rem) marks
742742+743743+and delta_alt ctx marks l rem = List.fold_right l ~init:rem ~f:(delta_expr ctx marks)
744744+745745+and delta_seq ctx (kind : Sem.t) y z rem =
746746+ match Desc.first_match y with
747747+ | None -> Desc.tseq kind y z rem
748748+ | Some marks ->
749749+ (match kind with
750750+ | `Longest -> Desc.tseq kind (Desc.remove_matches y) z (delta_expr ctx marks z rem)
751751+ | `Shortest -> delta_expr ctx marks z (Desc.tseq kind (Desc.remove_matches y) z rem)
752752+ | `First ->
753753+ let y, y' = Desc.split_at_match y in
754754+ Desc.tseq kind y z (delta_expr ctx marks z (Desc.tseq kind y' z rem)))
755755+;;
756756+757757+let rec delta_e ctx marks (x : E.t) rem =
758758+ match x with
759759+ | TSeq (kind, y, z) ->
760760+ let y = delta_desc ctx marks y Desc.empty in
761761+ delta_seq ctx kind y z rem
762762+ | TExp (marks, e) -> delta_expr ctx marks e rem
763763+ | TMatch _ -> Desc.add_expr rem x
764764+765765+and delta_desc ctx marks (l : Desc.t) rem =
766766+ Desc.fold_right l ~init:rem ~f:(fun y acc -> delta_e ctx marks y acc)
767767+;;
768768+769769+let delta (tbl_ref : Working_area.t) next_cat char (st : State.t) =
770770+ let expr =
771771+ let prev_cat = st.category in
772772+ let ctx = { c = char; next_cat; prev_cat } in
773773+ Desc.remove_duplicates
774774+ tbl_ref.seen
775775+ (delta_desc ctx Marks.empty st.desc Desc.empty)
776776+ Expr.eps_expr
777777+ in
778778+ let idx = Working_area.free_index tbl_ref expr in
779779+ let expr = Desc.set_idx idx expr in
780780+ State.mk idx next_cat expr
781781+;;
+123
vendor/opam/re/lib/automata.mli
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+(* Regular expressions *)
2424+2525+module Mark : sig
2626+ type t [@@immediate]
2727+2828+ val compare : t -> t -> int
2929+ val start : t
3030+ val prev : t -> t
3131+ val next : t -> t
3232+ val next2 : t -> t
3333+ val group_count : t -> int
3434+end
3535+3636+module Sem : sig
3737+ type t =
3838+ [ `Longest
3939+ | `Shortest
4040+ | `First
4141+ ]
4242+4343+ val to_dyn : t -> Dyn.t
4444+ val pp : t Fmt.t
4545+end
4646+4747+module Rep_kind : sig
4848+ type t =
4949+ [ `Greedy
5050+ | `Non_greedy
5151+ ]
5252+5353+ val to_dyn : t -> Dyn.t
5454+ val pp : t Fmt.t
5555+end
5656+5757+type expr
5858+5959+val is_eps : expr -> bool
6060+val pp : expr Fmt.t
6161+6262+module Ids : sig
6363+ type t
6464+6565+ val create : unit -> t
6666+end
6767+6868+val cst : Ids.t -> Cset.t -> expr
6969+val empty : Ids.t -> expr
7070+val alt : Ids.t -> expr list -> expr
7171+val seq : Ids.t -> Sem.t -> expr -> expr -> expr
7272+val eps : Ids.t -> expr
7373+val rep : Ids.t -> Rep_kind.t -> Sem.t -> expr -> expr
7474+val mark : Ids.t -> Mark.t -> expr
7575+val pmark : Ids.t -> Pmark.t -> expr
7676+val erase : Ids.t -> Mark.t -> Mark.t -> expr
7777+val before : Ids.t -> Category.t -> expr
7878+val after : Ids.t -> Category.t -> expr
7979+val rename : Ids.t -> expr -> expr
8080+8181+(****)
8282+8383+(* States of the automata *)
8484+8585+module Idx : sig
8686+ type t
8787+8888+ val to_int : t -> int
8989+end
9090+9191+module Status : sig
9292+ type t =
9393+ | Failed
9494+ | Match of Mark_infos.t * Pmark.Set.t
9595+ | Running
9696+end
9797+9898+module State : sig
9999+ type t
100100+101101+ val pp : t Fmt.t
102102+ val dummy : t
103103+ val create : Category.t -> expr -> t
104104+ val idx : t -> Idx.t
105105+ val status_no_mutex : t -> Status.t
106106+ val status : Mutex.t -> t -> Status.t
107107+ val to_dyn : t -> Dyn.t
108108+109109+ module Table : Hashtbl.S with type key = t
110110+end
111111+112112+(****)
113113+114114+(* Computation of the states following a given state *)
115115+116116+module Working_area : sig
117117+ type t
118118+119119+ val create : unit -> t
120120+ val index_count : t -> int
121121+end
122122+123123+val delta : Working_area.t -> Category.t -> Cset.c -> State.t -> State.t
+48
vendor/opam/re/lib/bit_vector.ml
···11+type t =
22+ { len : int
33+ ; bits : Bytes.t
44+ }
55+66+let byte s i = Char.code (Bytes.unsafe_get s i)
77+let set_byte s i x = Bytes.unsafe_set s i (Char.chr x)
88+let length t = t.len
99+1010+let unsafe_set v n b =
1111+ let i = n lsr 3 in
1212+ let c = byte v.bits i in
1313+ let mask = 1 lsl (n land 7) in
1414+ set_byte v.bits i (if b then c lor mask else c land lnot mask)
1515+;;
1616+1717+let set v n b =
1818+ if n < 0 || n >= v.len then invalid_arg "Bit_vector.set";
1919+ unsafe_set v n b
2020+;;
2121+2222+let unsafe_get v n =
2323+ let i = n lsr 3 in
2424+ byte v.bits i land (1 lsl (n land 7)) > 0
2525+;;
2626+2727+let get v n =
2828+ if n < 0 || n >= v.len then invalid_arg "Bit_vector.get";
2929+ unsafe_get v n
3030+;;
3131+3232+let reset_zero t = Bytes.fill t.bits 0 (Bytes.length t.bits) '\000'
3333+3434+let create_zero len =
3535+ let bits =
3636+ let r = len land 7 in
3737+ let q = len lsr 3 in
3838+ let len = if r = 0 then q else q + 1 in
3939+ Bytes.make len '\000'
4040+ in
4141+ { len; bits }
4242+;;
4343+4444+let pp fmt { len; bits } =
4545+ let len fmt () = Fmt.sexp fmt "len" Fmt.int len in
4646+ let bits fmt () = Fmt.sexp fmt "bits" Fmt.bytes bits in
4747+ Format.fprintf fmt "%a@.%a@." len () bits ()
4848+;;
+8
vendor/opam/re/lib/bit_vector.mli
···11+type t
22+33+val length : t -> int
44+val set : t -> int -> bool -> unit
55+val create_zero : int -> t
66+val get : t -> int -> bool
77+val reset_zero : t -> unit
88+val pp : t Fmt.t
+29
vendor/opam/re/lib/category.ml
···11+type t = int
22+33+let equal (x : int) (y : int) = x = y
44+let compare (x : int) (y : int) = compare x y
55+let to_int x = x
66+let pp = Format.pp_print_int
77+let intersect x y = x land y <> 0
88+let ( ++ ) x y = x lor y
99+let dummy = -1
1010+let inexistant = 1
1111+let letter = 2
1212+let not_letter = 4
1313+let newline = 8
1414+let lastnewline = 16
1515+let search_boundary = 32
1616+let to_dyn = Dyn.int
1717+1818+let from_char = function
1919+ (* Should match [cword] definition *)
2020+ | 'a' .. 'z'
2121+ | 'A' .. 'Z'
2222+ | '0' .. '9'
2323+ | '_' | '\170' | '\181' | '\186'
2424+ | '\192' .. '\214'
2525+ | '\216' .. '\246'
2626+ | '\248' .. '\255' -> letter
2727+ | '\n' -> not_letter ++ newline
2828+ | _ -> not_letter
2929+;;
+22
vendor/opam/re/lib/category.mli
···11+(** Categories represent the various kinds of characters that can be tested
22+ by look-ahead and look-behind operations.
33+44+ This is more restricted than Cset, but faster. *)
55+66+type t [@@immediate]
77+88+val ( ++ ) : t -> t -> t
99+val from_char : char -> t
1010+val dummy : t
1111+val inexistant : t
1212+val letter : t
1313+val not_letter : t
1414+val newline : t
1515+val lastnewline : t
1616+val search_boundary : t
1717+val to_int : t -> int
1818+val equal : t -> t -> bool
1919+val compare : t -> t -> int
2020+val intersect : t -> t -> bool
2121+val pp : t Fmt.t
2222+val to_dyn : t -> Dyn.t
+56
vendor/opam/re/lib/color_map.ml
···11+(* In reality, this can really be represented as a bool array.
22+33+ The representation is best thought of as a list of all chars along with a
44+ flag:
55+66+ (a, 0), (b, 1), (c, 0), (d, 0), ...
77+88+ characters belonging to the same color are represented by sequnces of
99+ characters with the flag set to 0.
1010+*)
1111+1212+type t = Bytes.t
1313+1414+module Repr = struct
1515+ type t = string
1616+1717+ let repr t color = t.[Cset.to_int color]
1818+ let length = String.length
1919+end
2020+2121+module Table = struct
2222+ type t = string
2323+2424+ let get_char t c = t.[Cset.to_int c]
2525+ let get t c = Cset.of_char (String.unsafe_get t (Char.code c))
2626+2727+ let translate_colors (cm : t) cset =
2828+ Cset.fold_right cset ~init:Cset.empty ~f:(fun i j l ->
2929+ let start = get_char cm i in
3030+ let stop = get_char cm j in
3131+ Cset.union (Cset.cseq start stop) l)
3232+ ;;
3333+end
3434+3535+let make () = Bytes.make 257 '\000'
3636+3737+let flatten cm =
3838+ let c = Bytes.create 256 in
3939+ let color_repr = Bytes.create 256 in
4040+ let v = ref 0 in
4141+ Bytes.set c 0 '\000';
4242+ Bytes.set color_repr 0 '\000';
4343+ for i = 1 to 255 do
4444+ if Bytes.get cm i <> '\000' then incr v;
4545+ Bytes.set c i (Char.chr !v);
4646+ Bytes.set color_repr !v (Char.chr i)
4747+ done;
4848+ Bytes.unsafe_to_string c, Bytes.sub_string color_repr 0 (!v + 1)
4949+;;
5050+5151+(* mark all the endpoints of the intervals of the char set with the 1 byte *)
5252+let split t set =
5353+ Cset.iter set ~f:(fun i j ->
5454+ Bytes.set t (Cset.to_int i) '\001';
5555+ Bytes.set t (Cset.to_int j + 1) '\001')
5656+;;
+27
vendor/opam/re/lib/color_map.mli
···11+(* Color maps exists to provide an optimization for the regex engine. The fact
22+ that some characters are entirely equivalent for some regexes means that we
33+ can use them interchangeably.
44+55+ A color map assigns a color to every character in our character set. Any two
66+ characters with the same color will be treated equivalently by the automaton.
77+*)
88+type t
99+1010+module Repr : sig
1111+ type t
1212+1313+ val repr : t -> Cset.c -> char
1414+ val length : t -> int
1515+end
1616+1717+module Table : sig
1818+ type t
1919+2020+ val get_char : t -> Cset.c -> char
2121+ val get : t -> char -> Cset.c
2222+ val translate_colors : t -> Cset.t -> Cset.t
2323+end
2424+2525+val make : unit -> t
2626+val flatten : t -> Table.t * Repr.t
2727+val split : t -> Cset.t -> unit
+835
vendor/opam/re/lib/compile.ml
···11+open Import
22+33+let rec iter n f v = if Int.equal n 0 then v else iter (n - 1) f (f v)
44+55+module Idx : sig
66+ type t [@@immediate]
77+88+ val unknown : t
99+ val make_break : Automata.Idx.t -> t
1010+ val of_idx : Automata.Idx.t -> t
1111+ val is_idx : t -> bool
1212+ val is_break : t -> bool
1313+ val is_unknown : t -> bool
1414+ val idx : t -> int
1515+ val break_idx : t -> int
1616+end = struct
1717+ type t = int
1818+1919+ let unknown = -2
2020+ let break = -3
2121+ let of_idx (x : Automata.Idx.t) = Automata.Idx.to_int x [@@inline always]
2222+ let is_idx t = t >= 0 [@@inline always]
2323+ let is_break x = x <= break [@@inline always]
2424+ let is_unknown x = x = unknown [@@inline always]
2525+ let idx t = t [@@inline always]
2626+ let make_break (idx : Automata.Idx.t) = -5 - Automata.Idx.to_int idx [@@inline always]
2727+ let break_idx t = (t + 5) * -1 [@@inline always]
2828+end
2929+3030+type match_info =
3131+ | Match of Group.t
3232+ | Failed
3333+ | Running of { no_match_starts_before : int }
3434+3535+type state_info =
3636+ { idx : Idx.t
3737+ ; (* Index of the current position in the position table.
3838+ Not yet computed transitions point to a dummy state where
3939+ [idx] is set to [unknown];
4040+ If [idx] is set to [break] for states that either always
4141+ succeed or always fail. *)
4242+ mutable final : (Category.t * (Automata.Idx.t * Automata.Status.t)) list
4343+ ; (* Mapping from the category of the next character to
4444+ - the index where the next position should be saved
4545+ - possibly, the list of marks (and the corresponding indices)
4646+ corresponding to the best match *)
4747+ desc : Automata.State.t (* Description of this state of the automata *)
4848+ }
4949+5050+(* Thread-safety: we use double-checked locking to access field [final]. *)
5151+5252+(* A state [t] is a pair composed of some information about the
5353+ state [state_info] and a transition table [t array], indexed by
5454+ color. For performance reason, to avoid an indirection, we manually
5555+ unbox the transition table: we allocate a single array, with the
5656+ state information at index 0, followed by the transitions. *)
5757+module State : sig
5858+ type t
5959+6060+ val make : ncol:int -> state_info -> t
6161+ val make_break : state_info -> t
6262+ val get_info : t -> state_info
6363+ val follow_transition : t -> color:Cset.c -> t
6464+ val set_transition : t -> color:Cset.c -> t -> unit
6565+ val is_unknown_transition : t -> color:Cset.c -> bool
6666+end = struct
6767+ type t = Table of t array [@@unboxed]
6868+6969+ (* Thread-safety:
7070+ We store the state information at index 0. For other elements
7171+ of the transition table, which are lazily computed, we use
7272+ double-checked locking. *)
7373+7474+ let get_info (Table st) : state_info = Obj.magic (Array.unsafe_get st 0)
7575+ [@@inline always]
7676+ ;;
7777+7878+ let set_info (Table st) (info : state_info) = st.(0) <- Obj.magic info
7979+8080+ let follow_transition (Table st) ~color = Array.unsafe_get st (1 + Cset.to_int color)
8181+ [@@inline always]
8282+ ;;
8383+8484+ let set_transition (Table st) ~color st' = st.(1 + Cset.to_int color) <- st'
8585+8686+ let is_unknown_transition st ~color =
8787+ let st' = follow_transition st ~color in
8888+ let info = get_info st' in
8989+ Idx.is_unknown info.idx
9090+ ;;
9191+9292+ let dummy (info : state_info) = Table [| Obj.magic info |]
9393+ let unknown_state = dummy { idx = Idx.unknown; final = []; desc = Automata.State.dummy }
9494+9595+ let make ~ncol state =
9696+ let st = Table (Array.make (ncol + 1) unknown_state) in
9797+ set_info st state;
9898+ st
9999+ ;;
100100+101101+ let make_break state = Table [| Obj.magic state |]
102102+end
103103+104104+(* Automata (compiled regular expression) *)
105105+type re =
106106+ { initial : Automata.expr
107107+ ; (* The whole regular expression *)
108108+ mutable initial_states : (Category.t * State.t) list
109109+ ; (* Initial states, indexed by initial category *)
110110+ colors : Color_map.Table.t
111111+ ; (* Color table *)
112112+ color_repr : Color_map.Repr.t
113113+ ; (* Table from colors to one character of this color *)
114114+ ncolor : int
115115+ ; (* Number of colors. *)
116116+ lnl : Cset.c
117117+ ; (* Color of the last newline. [Cset.null_char] if unnecessary *)
118118+ tbl : Automata.Working_area.t
119119+ ; (* Temporary table used to compute the first available index
120120+ when computing a new state *)
121121+ states : State.t Automata.State.Table.t
122122+ ; (* States of the deterministic automata *)
123123+ group_names : (string * int) list
124124+ ; (* Named groups in the regular expression *)
125125+ group_count : int
126126+ ; (* Number of groups in the regular expression *)
127127+ mutex : Mutex.t
128128+ }
129129+130130+(* Thread-safety:
131131+ We use double-checked locking to access field [initial_states]. The
132132+ state table [states] and the working area [tbl] are only accessed
133133+ with the mutex [mutex] locked.
134134+ The working area is shared between all threads. This might be
135135+ inefficient if many threads are updating the automaton. It seems
136136+ complicated to manage a working area per domain and per regular
137137+ expression. So, if this becomes an issue, it might just be simpler
138138+ to allocate a fresh working area whenever needed.
139139+*)
140140+141141+let pp_re ch re = Automata.pp ch re.initial
142142+let group_count re = re.group_count
143143+let group_names re = re.group_names
144144+145145+module Positions = struct
146146+ (* Information used during matching *)
147147+ type t =
148148+ { mutable positions : int array
149149+ ; (* Array of mark positions
150150+ The mark are off by one for performance reasons *)
151151+ mutable length : int
152152+ }
153153+154154+ let empty = { positions = [||]; length = 0 }
155155+ let length t = t.length
156156+ let unsafe_set t idx pos = Array.unsafe_set t.positions idx pos
157157+158158+ let rec resize idx t =
159159+ t.length <- 2 * t.length;
160160+ if idx >= t.length
161161+ then resize idx t
162162+ else (
163163+ let pos = t.positions in
164164+ t.positions <- Array.make t.length 0;
165165+ Array.blit pos 0 t.positions 0 (Array.length pos))
166166+ ;;
167167+168168+ let set t idx pos =
169169+ if idx >= length t then resize idx t;
170170+ unsafe_set t idx pos
171171+ ;;
172172+173173+ let all t = t.positions
174174+ let first t = t.positions.(0)
175175+176176+ let make ~groups re =
177177+ if groups
178178+ then (
179179+ (* We initialize this table with a reasonable size. The required
180180+ size may change when the automaton gets updated. So we are
181181+ always checking whether it is large enough before modifying it. *)
182182+ let length = Automata.Working_area.index_count re.tbl + 1 in
183183+ { positions = Array.make length 0; length })
184184+ else empty
185185+ ;;
186186+end
187187+188188+(****)
189189+190190+let category re ~color =
191191+ if Cset.equal_c color Cset.null_char
192192+ then Category.inexistant (* Special category for the last newline *)
193193+ else if Cset.equal_c color re.lnl
194194+ then Category.(lastnewline ++ newline ++ not_letter)
195195+ else Category.from_char (Color_map.Repr.repr re.color_repr color)
196196+;;
197197+198198+(****)
199199+200200+let find_state re desc =
201201+ try Automata.State.Table.find re.states desc with
202202+ | Not_found ->
203203+ let st =
204204+ let break_state =
205205+ match Automata.State.status_no_mutex desc with
206206+ | Running -> false
207207+ | Failed | Match _ -> true
208208+ in
209209+ let st =
210210+ { idx =
211211+ (let idx = Automata.State.idx desc in
212212+ if break_state then Idx.make_break idx else Idx.of_idx idx)
213213+ ; final = []
214214+ ; desc
215215+ }
216216+ in
217217+ if break_state then State.make_break st else State.make ~ncol:re.ncolor st
218218+ in
219219+ Automata.State.Table.add re.states desc st;
220220+ st
221221+;;
222222+223223+(**** Match with marks ****)
224224+225225+let delta re cat ~color st = Automata.delta re.tbl cat color st.desc
226226+227227+let validate re (s : string) ~pos st =
228228+ let color = Color_map.Table.get re.colors s.[pos] in
229229+ Mutex.lock re.mutex;
230230+ if State.is_unknown_transition st ~color
231231+ then (
232232+ let st' =
233233+ let desc' =
234234+ let cat = category re ~color in
235235+ delta re cat ~color (State.get_info st)
236236+ in
237237+ find_state re desc'
238238+ in
239239+ State.set_transition st ~color st');
240240+ Mutex.unlock re.mutex
241241+;;
242242+243243+let next colors st s pos =
244244+ State.follow_transition st ~color:(Color_map.Table.get colors (String.unsafe_get s pos))
245245+;;
246246+247247+let rec loop re ~colors ~positions s ~pos ~last st0 st =
248248+ if pos < last
249249+ then (
250250+ let st' = next colors st s pos in
251251+ let idx = (State.get_info st').idx in
252252+ if Idx.is_idx idx
253253+ then
254254+ if Idx.idx idx < Positions.length positions
255255+ then (
256256+ Positions.unsafe_set positions (Idx.idx idx) pos;
257257+ loop re ~colors ~positions s ~pos:(pos + 1) ~last st' st')
258258+ else (
259259+ (* Resize position array *)
260260+ Positions.set positions (Idx.idx idx) pos;
261261+ loop re ~colors ~positions s ~pos:(pos + 1) ~last st' st')
262262+ else if Idx.is_break idx
263263+ then (
264264+ Positions.set positions (Idx.break_idx idx) pos;
265265+ st')
266266+ else (
267267+ (* Unknown *)
268268+ validate re s ~pos st0;
269269+ loop re ~colors ~positions s ~pos ~last st0 st0))
270270+ else st
271271+;;
272272+273273+let rec loop_no_mark re ~colors s ~pos ~last st0 st =
274274+ if pos < last
275275+ then (
276276+ let st' = next colors st s pos in
277277+ let idx = (State.get_info st').idx in
278278+ if Idx.is_idx idx
279279+ then loop_no_mark re ~colors s ~pos:(pos + 1) ~last st' st'
280280+ else if Idx.is_break idx
281281+ then st'
282282+ else (
283283+ (* Unknown *)
284284+ validate re s ~pos st0;
285285+ loop_no_mark re ~colors s ~pos ~last st0 st0))
286286+ else st
287287+;;
288288+289289+let final re st cat =
290290+ try List.assq cat st.final with
291291+ | Not_found ->
292292+ Mutex.lock re.mutex;
293293+ let res =
294294+ try List.assq cat st.final with
295295+ | Not_found ->
296296+ let st' = delta re cat ~color:Cset.null_char st in
297297+ let res = Automata.State.idx st', Automata.State.status_no_mutex st' in
298298+ st.final <- (cat, res) :: st.final;
299299+ res
300300+ in
301301+ Mutex.unlock re.mutex;
302302+ res
303303+;;
304304+305305+let find_initial_state re cat =
306306+ try List.assq cat re.initial_states with
307307+ | Not_found ->
308308+ Mutex.lock re.mutex;
309309+ let res =
310310+ try List.assq cat re.initial_states with
311311+ | Not_found ->
312312+ let st = find_state re (Automata.State.create cat re.initial) in
313313+ re.initial_states <- (cat, st) :: re.initial_states;
314314+ st
315315+ in
316316+ Mutex.unlock re.mutex;
317317+ res
318318+;;
319319+320320+let get_color re (s : string) pos =
321321+ if pos < 0
322322+ then Cset.null_char
323323+ else (
324324+ let slen = String.length s in
325325+ if pos >= slen
326326+ then Cset.null_char
327327+ else if pos = slen - 1
328328+ && (not (Cset.equal_c re.lnl Cset.null_char))
329329+ && Char.equal (String.unsafe_get s pos) '\n'
330330+ then (* Special case for the last newline *)
331331+ re.lnl
332332+ else Color_map.Table.get re.colors (String.unsafe_get s pos))
333333+;;
334334+335335+let rec handle_last_newline re positions ~pos st ~groups =
336336+ let st' = State.follow_transition st ~color:re.lnl in
337337+ let info = State.get_info st' in
338338+ if Idx.is_idx info.idx
339339+ then (
340340+ if groups then Positions.set positions (Idx.idx info.idx) pos;
341341+ st')
342342+ else if Idx.is_break info.idx
343343+ then (
344344+ if groups then Positions.set positions (Idx.break_idx info.idx) pos;
345345+ st')
346346+ else (
347347+ (* Unknown *)
348348+ let color = re.lnl in
349349+ Mutex.lock re.mutex;
350350+ if State.is_unknown_transition st ~color
351351+ then (
352352+ let st' =
353353+ let desc =
354354+ let cat = category re ~color in
355355+ let real_c = Color_map.Table.get re.colors '\n' in
356356+ delta re cat ~color:real_c (State.get_info st)
357357+ in
358358+ find_state re desc
359359+ in
360360+ State.set_transition st ~color st');
361361+ Mutex.unlock re.mutex;
362362+ handle_last_newline re positions ~pos st ~groups)
363363+;;
364364+365365+let rec scan_str re positions (s : string) initial_state ~last ~pos ~groups =
366366+ if last = String.length s
367367+ && (not (Cset.equal_c re.lnl Cset.null_char))
368368+ && last > pos
369369+ && Char.equal (String.get s (last - 1)) '\n'
370370+ then (
371371+ let last = last - 1 in
372372+ let st = scan_str re positions ~pos s initial_state ~last ~groups in
373373+ if Idx.is_break (State.get_info st).idx
374374+ then st
375375+ else handle_last_newline re positions ~pos:last st ~groups)
376376+ else if groups
377377+ then loop re ~colors:re.colors ~positions s ~pos ~last initial_state initial_state
378378+ else loop_no_mark re ~colors:re.colors s ~pos ~last initial_state initial_state
379379+;;
380380+381381+(* This function adds a final boundary check on the input.
382382+ This is useful to indicate that the output failed because
383383+ of insufficient input, or to verify that the output actually
384384+ matches for regex that have boundary conditions with respect
385385+ to the input string.
386386+*)
387387+let final_boundary_check re positions ~last ~slen s state_info ~groups =
388388+ let idx, res =
389389+ let final_cat =
390390+ Category.(
391391+ search_boundary
392392+ ++ if last = slen then inexistant else category re ~color:(get_color re s last))
393393+ in
394394+ final re state_info final_cat
395395+ in
396396+ (match groups, res with
397397+ | true, Match _ -> Positions.set positions (Automata.Idx.to_int idx) last
398398+ | _ -> ());
399399+ res
400400+;;
401401+402402+let make_match_str re positions ~len ~groups ~partial s ~pos =
403403+ let slen = String.length s in
404404+ let last = if len = -1 then slen else pos + len in
405405+ let st =
406406+ let initial_state =
407407+ let initial_cat =
408408+ Category.(
409409+ search_boundary
410410+ ++ if pos = 0 then inexistant else category re ~color:(get_color re s (pos - 1)))
411411+ in
412412+ find_initial_state re initial_cat
413413+ in
414414+ scan_str re positions s initial_state ~pos ~last ~groups
415415+ in
416416+ let state_info = State.get_info st in
417417+ if Idx.is_break state_info.idx || (partial && not groups)
418418+ then Automata.State.status re.mutex state_info.desc
419419+ else if partial && groups
420420+ then (
421421+ match Automata.State.status re.mutex state_info.desc with
422422+ | (Match _ | Failed) as status -> status
423423+ | Running ->
424424+ (* This could be because it's still not fully matched, or it
425425+ could be that because we need to run special end of input
426426+ checks. *)
427427+ (match final_boundary_check re positions ~last ~slen s state_info ~groups with
428428+ | Match _ as status -> status
429429+ | Failed | Running ->
430430+ (* A failure here just means that we need more data, i.e.
431431+ it's a partial match. *)
432432+ Running))
433433+ else final_boundary_check re positions ~last ~slen s state_info ~groups
434434+;;
435435+436436+module Stream = struct
437437+ type nonrec t =
438438+ { state : State.t
439439+ ; re : re
440440+ }
441441+442442+ type 'a feed =
443443+ | Ok of 'a
444444+ | No_match
445445+446446+ let create re =
447447+ let category = Category.(search_boundary ++ inexistant) in
448448+ let state = find_initial_state re category in
449449+ { state; re }
450450+ ;;
451451+452452+ let feed t s ~pos ~len =
453453+ (* TODO bound checks? *)
454454+ let last = pos + len in
455455+ let state = loop_no_mark t.re ~colors:t.re.colors s ~last ~pos t.state t.state in
456456+ let info = State.get_info state in
457457+ if Idx.is_break info.idx
458458+ &&
459459+ match Automata.State.status t.re.mutex info.desc with
460460+ | Failed -> true
461461+ | Match _ | Running -> false
462462+ then No_match
463463+ else Ok { t with state }
464464+ ;;
465465+466466+ let finalize t s ~pos ~len =
467467+ (* TODO bound checks? *)
468468+ let last = pos + len in
469469+ let state = scan_str t.re Positions.empty s t.state ~last ~pos ~groups:false in
470470+ let info = State.get_info state in
471471+ match
472472+ let _idx, res =
473473+ let final_cat = Category.(search_boundary ++ inexistant) in
474474+ final t.re info final_cat
475475+ in
476476+ res
477477+ with
478478+ | Running | Failed -> false
479479+ | Match _ -> true
480480+ ;;
481481+482482+ module Group = struct
483483+ type nonrec t =
484484+ { t : t
485485+ ; positions : Positions.t
486486+ ; slices : Slice.L.t
487487+ ; abs_pos : int
488488+ ; first_match_pos : int
489489+ }
490490+491491+ let no_match_starts_before t = t.first_match_pos
492492+493493+ let create t =
494494+ { t
495495+ ; positions = Positions.make ~groups:true t.re
496496+ ; slices = []
497497+ ; abs_pos = 0
498498+ ; first_match_pos = 0
499499+ }
500500+ ;;
501501+502502+ module Match = struct
503503+ type t =
504504+ { pmarks : Pmark.Set.t
505505+ ; slices : Slice.L.t
506506+ ; marks : Mark_infos.t
507507+ ; positions : int array
508508+ ; start_pos : int
509509+ }
510510+511511+ let test_mark t mark = Pmark.Set.mem mark t.pmarks
512512+513513+ let get t i =
514514+ Mark_infos.offset t.marks i
515515+ |> Option.map (fun (start, stop) ->
516516+ let start = t.positions.(start) - t.start_pos in
517517+ let stop = t.positions.(stop) - t.start_pos in
518518+ Slice.L.get_substring t.slices ~start ~stop)
519519+ ;;
520520+521521+ let make ~start_pos ~pmarks ~slices ~marks ~positions =
522522+ let positions = Positions.all positions in
523523+ { pmarks; slices; positions; marks; start_pos }
524524+ ;;
525525+ end
526526+527527+ let rec loop re ~abs_pos ~colors ~positions s ~pos ~last st0 st =
528528+ if pos < last
529529+ then (
530530+ let st' = next colors st s pos in
531531+ let idx = (State.get_info st').idx in
532532+ if Idx.is_idx idx
533533+ then
534534+ if Idx.idx idx < Positions.length positions
535535+ then (
536536+ Positions.unsafe_set positions (Idx.idx idx) (abs_pos + pos);
537537+ loop re ~abs_pos ~colors ~positions s ~pos:(pos + 1) ~last st' st')
538538+ else (
539539+ (* Resize position array *)
540540+ Positions.set positions (Idx.idx idx) (abs_pos + pos);
541541+ loop re ~abs_pos ~colors ~positions s ~pos:(pos + 1) ~last st' st')
542542+ else if Idx.is_break idx
543543+ then (
544544+ Positions.set positions (Idx.break_idx idx) (abs_pos + pos);
545545+ st')
546546+ else (
547547+ (* Unknown *)
548548+ validate re s ~pos st0;
549549+ loop re ~abs_pos ~colors ~positions s ~pos ~last st0 st0))
550550+ else st
551551+ ;;
552552+553553+ let feed ({ t; positions; slices; abs_pos; first_match_pos = _ } as tt) s ~pos ~len =
554554+ let state =
555555+ (* TODO bound checks? *)
556556+ let last = pos + len in
557557+ loop t.re ~abs_pos ~colors:t.re.colors s ~positions ~last ~pos t.state t.state
558558+ in
559559+ let info = State.get_info state in
560560+ if Idx.is_break info.idx
561561+ &&
562562+ match Automata.State.status t.re.mutex info.desc with
563563+ | Failed -> true
564564+ | Match _ | Running -> false
565565+ then No_match
566566+ else (
567567+ let t = { t with state } in
568568+ let slices = { Slice.s; pos; len } :: slices in
569569+ let first_match_pos = Positions.first positions in
570570+ let slices = Slice.L.drop_rev slices (first_match_pos - tt.first_match_pos) in
571571+ let abs_pos = abs_pos + len in
572572+ Ok { tt with t; slices; abs_pos; first_match_pos })
573573+ ;;
574574+575575+ let finalize
576576+ ({ t; positions; slices; abs_pos; first_match_pos = _ } as tt)
577577+ s
578578+ ~pos
579579+ ~len
580580+ : Match.t feed
581581+ =
582582+ (* TODO bound checks? *)
583583+ let last = pos + len in
584584+ let info =
585585+ let state =
586586+ loop t.re ~abs_pos ~colors:t.re.colors s ~positions ~last ~pos t.state t.state
587587+ in
588588+ State.get_info state
589589+ in
590590+ match
591591+ match Automata.State.status t.re.mutex info.desc with
592592+ | (Match _ | Failed) as s -> s
593593+ | Running ->
594594+ let idx, res =
595595+ let final_cat = Category.(search_boundary ++ inexistant) in
596596+ final t.re info final_cat
597597+ in
598598+ (match res with
599599+ | Running | Failed -> ()
600600+ | Match _ -> Positions.set positions (Automata.Idx.to_int idx) (abs_pos + last));
601601+ res
602602+ with
603603+ | Running | Failed -> No_match
604604+ | Match (marks, pmarks) ->
605605+ let first_match_position = Positions.first positions in
606606+ let slices =
607607+ let slices =
608608+ let slices = { Slice.s; pos; len } :: slices in
609609+ Slice.L.drop_rev slices (first_match_position - tt.first_match_pos)
610610+ in
611611+ List.rev slices
612612+ in
613613+ Ok (Match.make ~start_pos:first_match_position ~pmarks ~marks ~slices ~positions)
614614+ ;;
615615+ end
616616+end
617617+618618+let match_str_no_bounds ~groups ~partial re s ~pos ~len =
619619+ let positions = Positions.make ~groups re in
620620+ match make_match_str re positions ~len ~groups ~partial s ~pos with
621621+ | Match (marks, pmarks) ->
622622+ Match
623623+ (Group.create s marks pmarks ~gpos:(Positions.all positions) ~gcount:re.group_count)
624624+ | Failed -> Failed
625625+ | Running ->
626626+ let no_match_starts_before = if groups then Positions.first positions else 0 in
627627+ Running { no_match_starts_before }
628628+;;
629629+630630+let match_str_p re s ~pos ~len =
631631+ if pos < 0 || len < -1 || pos + len > String.length s
632632+ then invalid_arg "Re.exec: out of bounds";
633633+ match make_match_str re Positions.empty ~len ~groups:false ~partial:false s ~pos with
634634+ | Match _ -> true
635635+ | _ -> false
636636+;;
637637+638638+let match_str ~groups ~partial re s ~pos ~len =
639639+ if pos < 0 || len < -1 || pos + len > String.length s
640640+ then invalid_arg "Re.exec: out of bounds";
641641+ match_str_no_bounds ~groups ~partial re s ~pos ~len
642642+;;
643643+644644+let mk_re ~initial ~colors ~color_repr ~ncolor ~lnl ~group_names ~group_count =
645645+ { initial
646646+ ; initial_states = []
647647+ ; colors
648648+ ; color_repr
649649+ ; ncolor
650650+ ; lnl
651651+ ; tbl = Automata.Working_area.create ()
652652+ ; states = Automata.State.Table.create 97
653653+ ; group_names
654654+ ; group_count
655655+ ; mutex = Mutex.create ()
656656+ }
657657+;;
658658+659659+(**** Compilation ****)
660660+661661+module A = Automata
662662+663663+let enforce_kind ids kind kind' cr =
664664+ match kind, kind' with
665665+ | `First, `First -> cr
666666+ | `First, k -> A.seq ids k cr (A.eps ids)
667667+ | _ -> cr
668668+;;
669669+670670+type context =
671671+ { ids : A.Ids.t
672672+ ; kind : A.Sem.t
673673+ ; ign_group : bool
674674+ ; greedy : A.Rep_kind.t
675675+ ; pos : A.Mark.t ref
676676+ ; names : (string * int) list ref
677677+ ; cache : Cset.t Cset.CSetMap.t ref
678678+ ; colors : Color_map.Table.t
679679+ }
680680+681681+let trans_set cache (cm : Color_map.Table.t) s =
682682+ match Cset.one_char s with
683683+ | Some i -> Cset.csingle (Color_map.Table.get_char cm i)
684684+ | None ->
685685+ let v = Cset.hash s, s in
686686+ (try Cset.CSetMap.find v !cache with
687687+ | Not_found ->
688688+ let l = Color_map.Table.translate_colors cm s in
689689+ cache := Cset.CSetMap.add v l !cache;
690690+ l)
691691+;;
692692+693693+let make_repeater ids cr kind greedy =
694694+ match greedy with
695695+ | `Greedy -> fun rem -> A.alt ids [ A.seq ids kind (A.rename ids cr) rem; A.eps ids ]
696696+ | `Non_greedy ->
697697+ fun rem -> A.alt ids [ A.eps ids; A.seq ids kind (A.rename ids cr) rem ]
698698+;;
699699+700700+(* XXX should probably compute a category mask *)
701701+let rec translate
702702+ ({ ids; kind; ign_group; greedy; pos; names; cache; colors } as ctx)
703703+ (ast : Ast.no_case)
704704+ =
705705+ match ast with
706706+ | Set s -> A.cst ids (trans_set cache colors s), kind
707707+ | Sequence l -> trans_seq ctx l, kind
708708+ | Ast (Alternative l) ->
709709+ (match Ast.merge_sequences l with
710710+ | [ r' ] ->
711711+ let cr, kind' = translate ctx r' in
712712+ enforce_kind ids kind kind' cr, kind
713713+ | merged_sequences ->
714714+ ( A.alt
715715+ ids
716716+ (List.map merged_sequences ~f:(fun r' ->
717717+ let cr, kind' = translate ctx r' in
718718+ enforce_kind ids kind kind' cr))
719719+ , kind ))
720720+ | Repeat (r', i, j) ->
721721+ let cr, kind' = translate ctx r' in
722722+ let rem =
723723+ match j with
724724+ | None -> A.rep ids greedy kind' cr
725725+ | Some j ->
726726+ let f = make_repeater ids cr kind' greedy in
727727+ iter (j - i) f (A.eps ids)
728728+ in
729729+ iter i (fun rem -> A.seq ids kind' (A.rename ids cr) rem) rem, kind
730730+ | Beg_of_line -> A.after ids Category.(inexistant ++ newline), kind
731731+ | End_of_line -> A.before ids Category.(inexistant ++ newline), kind
732732+ | Beg_of_word ->
733733+ ( A.seq
734734+ ids
735735+ `First
736736+ (A.after ids Category.(inexistant ++ not_letter))
737737+ (A.before ids Category.letter)
738738+ , kind )
739739+ | End_of_word ->
740740+ ( A.seq
741741+ ids
742742+ `First
743743+ (A.after ids Category.letter)
744744+ (A.before ids Category.(inexistant ++ not_letter))
745745+ , kind )
746746+ | Not_bound ->
747747+ ( A.alt
748748+ ids
749749+ [ A.seq ids `First (A.after ids Category.letter) (A.before ids Category.letter)
750750+ ; (let cat = Category.(inexistant ++ not_letter) in
751751+ A.seq ids `First (A.after ids cat) (A.before ids cat))
752752+ ]
753753+ , kind )
754754+ | Beg_of_str -> A.after ids Category.inexistant, kind
755755+ | End_of_str -> A.before ids Category.inexistant, kind
756756+ | Last_end_of_line -> A.before ids Category.(inexistant ++ lastnewline), kind
757757+ | Start -> A.after ids Category.search_boundary, kind
758758+ | Stop -> A.before ids Category.search_boundary, kind
759759+ | Sem (kind', r') ->
760760+ let cr, kind'' = translate { ctx with kind = kind' } r' in
761761+ enforce_kind ids kind' kind'' cr, kind'
762762+ | Sem_greedy (greedy', r') -> translate { ctx with greedy = greedy' } r'
763763+ | Group (n, r') ->
764764+ if ign_group
765765+ then translate ctx r'
766766+ else (
767767+ let p = !pos in
768768+ let () =
769769+ match n with
770770+ | Some name -> names := (name, A.Mark.group_count p) :: !names
771771+ | None -> ()
772772+ in
773773+ pos := A.Mark.next2 !pos;
774774+ let cr, kind' = translate ctx r' in
775775+ ( A.seq ids `First (A.mark ids p) (A.seq ids `First cr (A.mark ids (A.Mark.next p)))
776776+ , kind' ))
777777+ | No_group r' -> translate { ctx with ign_group = true } r'
778778+ | Nest r' ->
779779+ let b = !pos in
780780+ let cr, kind' = translate ctx r' in
781781+ let e = A.Mark.prev !pos in
782782+ if A.Mark.compare e b = -1
783783+ then cr, kind'
784784+ else A.seq ids `First (A.erase ids b e) cr, kind'
785785+ | Pmark (i, r') ->
786786+ let cr, kind' = translate ctx r' in
787787+ A.seq ids `First (A.pmark ids i) cr, kind'
788788+789789+and trans_seq ({ ids; kind; _ } as ctx) = function
790790+ | [] -> A.eps ids
791791+ | [ r ] ->
792792+ let cr', kind' = translate ctx r in
793793+ enforce_kind ids kind kind' cr'
794794+ | r :: rem ->
795795+ let cr', kind' = translate ctx r in
796796+ let cr'' = trans_seq ctx rem in
797797+ if A.is_eps cr'' then cr' else if A.is_eps cr' then cr'' else A.seq ids kind' cr' cr''
798798+;;
799799+800800+let compile_1 regexp =
801801+ let regexp = Ast.handle_case false regexp in
802802+ let color_map = Color_map.make () in
803803+ let need_lnl = Ast.colorize color_map regexp in
804804+ let colors, color_repr = Color_map.flatten color_map in
805805+ let ncolor = Color_map.Repr.length color_repr in
806806+ let lnl = if need_lnl then Cset.of_int ncolor else Cset.null_char in
807807+ let ncolor = if need_lnl then ncolor + 1 else ncolor in
808808+ let ctx =
809809+ { ids = A.Ids.create ()
810810+ ; kind = `First
811811+ ; ign_group = false
812812+ ; greedy = `Greedy
813813+ ; pos = ref A.Mark.start
814814+ ; names = ref []
815815+ ; cache = ref Cset.CSetMap.empty
816816+ ; colors
817817+ }
818818+ in
819819+ let r, kind = translate ctx regexp in
820820+ let r = enforce_kind ctx.ids `First kind r in
821821+ (*Format.eprintf "<%d %d>@." !ids ncol;*)
822822+ mk_re
823823+ ~initial:r
824824+ ~colors
825825+ ~color_repr
826826+ ~ncolor
827827+ ~lnl
828828+ ~group_names:(List.rev !(ctx.names))
829829+ ~group_count:(A.Mark.group_count !(ctx.pos))
830830+;;
831831+832832+let compile r =
833833+ let open Ast.Export in
834834+ compile_1 (if Ast.anchored r then group r else seq [ shortest (rep any); group r ])
835835+;;
+59
vendor/opam/re/lib/compile.mli
···11+type re
22+33+module Stream : sig
44+ type t
55+66+ type 'a feed =
77+ | Ok of 'a
88+ | No_match
99+1010+ val create : re -> t
1111+ val feed : t -> string -> pos:int -> len:int -> t feed
1212+ val finalize : t -> string -> pos:int -> len:int -> bool
1313+1414+ module Group : sig
1515+ type stream := t
1616+ type t
1717+1818+ module Match : sig
1919+ type t
2020+2121+ val get : t -> int -> string option
2222+ val test_mark : t -> Pmark.t -> bool
2323+ end
2424+2525+ val create : stream -> t
2626+ val feed : t -> string -> pos:int -> len:int -> t feed
2727+ val finalize : t -> string -> pos:int -> len:int -> Match.t feed
2828+ val no_match_starts_before : t -> int
2929+ end
3030+end
3131+3232+type match_info =
3333+ | Match of Group.t
3434+ | Failed
3535+ | Running of { no_match_starts_before : int }
3636+3737+val match_str_no_bounds
3838+ : groups:bool
3939+ -> partial:bool
4040+ -> re
4141+ -> string
4242+ -> pos:int
4343+ -> len:int
4444+ -> match_info
4545+4646+val match_str
4747+ : groups:bool
4848+ -> partial:bool
4949+ -> re
5050+ -> string
5151+ -> pos:int
5252+ -> len:int
5353+ -> match_info
5454+5555+val match_str_p : re -> string -> pos:int -> len:int -> bool
5656+val compile : Ast.t -> re
5757+val group_count : re -> int
5858+val group_names : re -> (string * int) list
5959+val pp_re : re Fmt.t
+173
vendor/opam/re/lib/core.ml
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+open Import
2424+2525+include struct
2626+ let cset = Ast.cset
2727+ let rg c c' = cset (Cset.cseq c c')
2828+ let notnl = cset Cset.notnl
2929+ let lower = cset Cset.lower
3030+ let upper = cset Cset.upper
3131+ let alpha = cset Cset.alpha
3232+ let digit = cset Cset.cdigit
3333+ let alnum = cset Cset.alnum
3434+ let wordc = cset Cset.wordc
3535+ let ascii = cset Cset.ascii
3636+ let blank = cset Cset.blank
3737+ let cntrl = cset Cset.cntrl
3838+ let graph = cset Cset.graph
3939+ let print = cset Cset.print
4040+ let punct = cset Cset.punct
4141+ let space = cset Cset.space
4242+ let xdigit = cset Cset.xdigit
4343+end
4444+4545+include Ast.Export
4646+4747+let exec_internal ?(pos = 0) ?(len = -1) ~partial ~groups re s =
4848+ Compile.match_str ~groups ~partial re s ~pos ~len
4949+;;
5050+5151+let exec ?pos ?len re s =
5252+ match exec_internal ?pos ?len ~groups:true ~partial:false re s with
5353+ | Match substr -> substr
5454+ | _ -> raise Not_found
5555+;;
5656+5757+let exec_opt ?pos ?len re s =
5858+ match exec_internal ?pos ?len ~groups:true ~partial:false re s with
5959+ | Match substr -> Some substr
6060+ | _ -> None
6161+;;
6262+6363+let execp ?(pos = 0) ?(len = -1) re s = Compile.match_str_p ~pos ~len re s
6464+6565+let exec_partial ?pos ?len re s =
6666+ match exec_internal ~groups:false ~partial:true ?pos ?len re s with
6767+ | Match _ -> `Full
6868+ | Running _ -> `Partial
6969+ | Failed -> `Mismatch
7070+;;
7171+7272+let exec_partial_detailed ?pos ?len re s =
7373+ match exec_internal ~groups:true ~partial:true ?pos ?len re s with
7474+ | Match group -> `Full group
7575+ | Running { no_match_starts_before } -> `Partial no_match_starts_before
7676+ | Failed -> `Mismatch
7777+;;
7878+7979+module Mark = struct
8080+ type t = Pmark.t
8181+8282+ let test (g : Group.t) p = Pmark.Set.mem p (Group.pmarks g)
8383+ let all (g : Group.t) = Group.pmarks g
8484+8585+ module Set = Pmark.Set
8686+8787+ let equal = Pmark.equal
8888+ let compare = Pmark.compare
8989+end
9090+9191+type split_token =
9292+ [ `Text of string
9393+ | `Delim of Group.t
9494+ ]
9595+9696+module Gen = struct
9797+ type 'a gen = unit -> 'a option
9898+9999+ let gen_of_seq (s : 'a Seq.t) : 'a gen =
100100+ let r = ref s in
101101+ fun () ->
102102+ match !r () with
103103+ | Seq.Nil -> None
104104+ | Seq.Cons (x, tl) ->
105105+ r := tl;
106106+ Some x
107107+ ;;
108108+109109+ let split ?pos ?len re s : _ gen = Search.split ?pos ?len re s |> gen_of_seq
110110+ let split_full ?pos ?len re s : _ gen = Search.split_full ?pos ?len re s |> gen_of_seq
111111+ let all ?pos ?len re s = Search.all ?pos ?len re s |> gen_of_seq
112112+ let matches ?pos ?len re s = Search.matches ?pos ?len re s |> gen_of_seq
113113+end
114114+115115+module Group = Group
116116+117117+(** {2 Deprecated functions} *)
118118+119119+let split_full_seq = Search.split_full
120120+let split_seq = Search.split
121121+let matches_seq = Search.matches
122122+let all_seq = Search.all
123123+124124+type 'a gen = 'a Gen.gen
125125+126126+let all_gen = Gen.all
127127+let matches_gen = Gen.matches
128128+let split_gen = Gen.split
129129+let split_full_gen = Gen.split_full
130130+131131+type substrings = Group.t
132132+133133+let get = Group.get
134134+let get_ofs = Group.offset
135135+let get_all = Group.all
136136+let get_all_ofs = Group.all_offset
137137+let test = Group.test
138138+139139+type markid = Mark.t
140140+141141+let marked = Mark.test
142142+let mark_set = Mark.all
143143+144144+type groups = Group.t
145145+146146+module List = struct
147147+ let list_of_seq (s : 'a Seq.t) : 'a list =
148148+ Seq.fold_left (fun l x -> x :: l) [] s |> List.rev
149149+ ;;
150150+151151+ let all ?pos ?len re s = Search.all ?pos ?len re s |> list_of_seq
152152+ let matches ?pos ?len re s = Search.matches ?pos ?len re s |> list_of_seq
153153+ let split_full ?pos ?len re s = Search.split_full ?pos ?len re s |> list_of_seq
154154+ let split ?pos ?len re s = Search.split ?pos ?len re s |> list_of_seq
155155+ let split_delim ?pos ?len re s = Search.split_delim ?pos ?len re s |> list_of_seq
156156+end
157157+158158+include List
159159+160160+include struct
161161+ open Compile
162162+163163+ type nonrec re = re
164164+165165+ let compile = compile
166166+ let pp_re = pp_re
167167+ let print_re = pp_re
168168+ let group_names = group_names
169169+ let group_count = group_count
170170+end
171171+172172+module Seq = Search
173173+module Stream = Compile.Stream
+813
vendor/opam/re/lib/core.mli
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+(** Module [Re]: code for creating and using regular expressions,
2424+ independently of regular expression syntax. *)
2525+2626+(** Regular expression *)
2727+type t = Ast.t
2828+2929+(** Compiled regular expression *)
3030+type re = Compile.re
3131+3232+(** Manipulate matching groups. *)
3333+module Group : sig
3434+ (** Information about groups in a match. As is conventional, every
3535+ match implicitly has a group 0 that covers the whole match, and
3636+ explicit groups are numbered from 1. *)
3737+ type t = Group.t
3838+3939+ (** Raise [Not_found] if the group did not match *)
4040+ val get : t -> int -> string
4141+4242+ (** Similar to {!get}, but returns an option instead of using an exception. *)
4343+ val get_opt : t -> int -> string option
4444+4545+ (** Raise [Not_found] if the group did not match *)
4646+ val offset : t -> int -> int * int
4747+4848+ (** Similar to {!offset}, but returns an option instead of using an exception. *)
4949+ val offset_opt : t -> int -> (int * int) option
5050+5151+ (** Return the start of the match. Raise [Not_found] if the group did not match. *)
5252+ val start : t -> int -> int
5353+5454+ (** Similar to {!start_opt}, but returns an option instead of using an exception. *)
5555+ val start_opt : t -> int -> int option
5656+5757+ (** Return the end of the match. Raise [Not_found] if the group did not match. *)
5858+ val stop : t -> int -> int
5959+6060+ (** Similar to {!stop_opt}, but returns an option instead of using an exception. *)
6161+ val stop_opt : t -> int -> int option
6262+6363+ (** Return the empty string for each group which did not match *)
6464+ val all : t -> string array
6565+6666+ (** Return [(-1,-1)] for each group which did not match *)
6767+ val all_offset : t -> (int * int) array
6868+6969+ (** Test whether a group matched *)
7070+ val test : t -> int -> bool
7171+7272+ (** Returns the total number of groups defined - matched or not.
7373+ This function is experimental. *)
7474+ val nb_groups : t -> int
7575+7676+ val pp : Format.formatter -> t -> unit
7777+end
7878+7979+type groups = Group.t [@@ocaml.deprecated "Use Group.t"]
8080+8181+(** {2 Compilation and execution of a regular expression} *)
8282+8383+(** Compile a regular expression into an executable version that can be
8484+ used to match strings, e.g. with {!exec}. *)
8585+val compile : t -> re
8686+8787+(** Return the number of capture groups (including the one
8888+ corresponding to the entire regexp). *)
8989+val group_count : re -> int
9090+9191+(** Return named capture groups with their index. *)
9292+val group_names : re -> (string * int) list
9393+9494+(** [exec re str] searches [str] for a match of the compiled expression [re],
9595+ and returns the matched groups if any.
9696+9797+ More specifically, when a match exists, [exec] returns a match that
9898+ starts at the earliest position possible. If multiple such matches are
9999+ possible, the one specified by the match semantics described below is
100100+ returned.
101101+102102+ {5 Examples:}
103103+ {[
104104+ # let regex = Re.compile Re.(seq [str "//"; rep print ]);;
105105+ val regex : re = <abstr>
106106+107107+ # Re.exec regex "// a C comment";;
108108+ - : Re.Group.t = <abstr>
109109+110110+ # Re.exec regex "# a C comment?";;
111111+ Exception: Not_found
112112+113113+ # Re.exec ~pos:1 regex "// a C comment";;
114114+ Exception: Not_found
115115+ ]}
116116+117117+ @param pos optional beginning of the string (default 0)
118118+ @param len
119119+ length of the substring of [str] that can be matched (default [-1],
120120+ meaning to the end of the string)
121121+ @raise Not_found if the regular expression can't be found in [str] *)
122122+val exec
123123+ : ?pos:int (** Default: 0 *)
124124+ -> ?len:int (** Default: -1 (until end of string) *)
125125+ -> re
126126+ -> string
127127+ -> Group.t
128128+129129+(** Similar to {!exec}, but returns an option instead of using an exception.
130130+131131+ {5 Examples:}
132132+ {[
133133+ # let regex = Re.compile Re.(seq [str "//"; rep print ]);;
134134+ val regex : re = <abstr>
135135+136136+ # Re.exec_opt regex "// a C comment";;
137137+ - : Re.Group.t option = Some <abstr>
138138+139139+ # Re.exec_opt regex "# a C comment?";;
140140+ - : Re.Group.t option = None
141141+142142+ # Re.exec_opt ~pos:1 regex "// a C comment";;
143143+ - : Re.Group.t option = None
144144+ ]} *)
145145+val exec_opt
146146+ : ?pos:int (** Default: 0 *)
147147+ -> ?len:int (** Default: -1 (until end of string) *)
148148+ -> re
149149+ -> string
150150+ -> Group.t option
151151+152152+(** Similar to {!exec}, but returns [true] if the expression matches,
153153+ and [false] if it doesn't. This function is more efficient than
154154+ calling {!exec} or {!exec_opt} and ignoring the returned group.
155155+156156+ {5 Examples:}
157157+ {[
158158+ # let regex = Re.compile Re.(seq [str "//"; rep print ]);;
159159+ val regex : re = <abstr>
160160+161161+ # Re.execp regex "// a C comment";;
162162+ - : bool = true
163163+164164+ # Re.execp ~pos:1 regex "// a C comment";;
165165+ - : bool = false
166166+ ]} *)
167167+val execp
168168+ : ?pos:int (** Default: 0 *)
169169+ -> ?len:int (** Default: -1 (until end of string) *)
170170+ -> re
171171+ -> string
172172+ -> bool
173173+174174+(** More detailed version of {!execp}. [`Full] is equivalent to [true],
175175+ while [`Mismatch] and [`Partial] are equivalent to [false], but [`Partial]
176176+ indicates the input string could be extended to create a match.
177177+178178+ {5 Examples:}
179179+ {[
180180+ # let regex = Re.compile Re.(seq [bos; str "// a C comment"]);;
181181+ val regex : re = <abstr>
182182+183183+ # Re.exec_partial regex "// a C comment here.";;
184184+ - : [ `Full | `Mismatch | `Partial ] = `Full
185185+186186+ # Re.exec_partial regex "// a C comment";;
187187+ - : [ `Full | `Mismatch | `Partial ] = `Partial
188188+189189+ # Re.exec_partial regex "//";;
190190+ - : [ `Full | `Mismatch | `Partial ] = `Partial
191191+192192+ # Re.exec_partial regex "# a C comment?";;
193193+ - : [ `Full | `Mismatch | `Partial ] = `Mismatch
194194+ ]} *)
195195+val exec_partial
196196+ : ?pos:int (** Default: 0 *)
197197+ -> ?len:int (** Default: -1 (until end of string) *)
198198+ -> re
199199+ -> string
200200+ -> [ `Full | `Partial | `Mismatch ]
201201+202202+(** More detailed version of {!exec_opt}. [`Full group] is equivalent to [Some group],
203203+ while [`Mismatch] and [`Partial _] are equivalent to [None], but [`Partial position]
204204+ indicates that the input string could be extended to create a match, and no match could
205205+ start in the input string before the given position.
206206+ This could be used to not have to search the entirety of the input if more
207207+ becomes available, and use the given position as the [?pos] argument. *)
208208+val exec_partial_detailed
209209+ : ?pos:int (** Default: 0 *)
210210+ -> ?len:int (** Default: -1 (until end of string) *)
211211+ -> re
212212+ -> string
213213+ -> [ `Full of Group.t | `Partial of int | `Mismatch ]
214214+215215+(** Marks *)
216216+module Mark : sig
217217+ (** Mark id *)
218218+ type t = Pmark.t
219219+220220+ (** Tell if a mark was matched. *)
221221+ val test : Group.t -> t -> bool
222222+223223+ module Set : Set.S with type elt = t
224224+225225+ (** Return all the mark matched. *)
226226+ val all : Group.t -> Set.t
227227+228228+ val equal : t -> t -> bool
229229+ val compare : t -> t -> int
230230+end
231231+232232+(** {2 High Level Operations} *)
233233+234234+type split_token =
235235+ [ `Text of string (** Text between delimiters *)
236236+ | `Delim of Group.t (** Delimiter *)
237237+ ]
238238+239239+(** Repeatedly calls {!exec} on the given string, starting at given position and
240240+ length.
241241+242242+ {5 Examples:}
243243+ {[
244244+ # let regex = Re.compile Re.(seq [str "my"; blank; word(rep alpha)]);;
245245+ val regex : re = <abstr>
246246+247247+ # Re.all regex "my head, my shoulders, my knees, my toes ...";;
248248+ - : Re.Group.t list = [<abstr>; <abstr>; <abstr>; <abstr>]
249249+250250+ # Re.all regex "My head, My shoulders, My knees, My toes ...";;
251251+ - : Re.Group.t list = []
252252+ ]} *)
253253+val all : ?pos:int -> ?len:int -> re -> string -> Group.t list
254254+255255+type 'a gen = unit -> 'a option
256256+257257+(** @deprecated Use {!module-Seq.all} instead. *)
258258+val all_gen : ?pos:int -> ?len:int -> re -> string -> Group.t gen
259259+[@@ocaml.deprecated "Use Seq.all"]
260260+261261+(** @deprecated Use {!module-Seq.all} instead. *)
262262+val all_seq : ?pos:int -> ?len:int -> re -> string -> Group.t Seq.t
263263+[@@ocaml.deprecated "Use Seq.all"]
264264+265265+(** Same as {!all}, but extracts the matched substring rather than returning
266266+ the whole group. This basically iterates over matched strings.
267267+268268+ {5 Examples:}
269269+ {[
270270+ # let regex = Re.compile Re.(seq [str "my"; blank; word(rep alpha)]);;
271271+ val regex : re = <abstr>
272272+273273+ # Re.matches regex "my head, my shoulders, my knees, my toes ...";;
274274+ - : string list = ["my head"; "my shoulders"; "my knees"; "my toes"]
275275+276276+ # Re.matches regex "My head, My shoulders, My knees, My toes ...";;
277277+ - : string list = []
278278+279279+ # Re.matches regex "my my my my head my 1 toe my ...";;
280280+ - : string list = ["my my"; "my my"]
281281+282282+ # Re.matches ~pos:2 regex "my my my my head my +1 toe my ...";;
283283+ - : string list = ["my my"; "my head"]
284284+ ]} *)
285285+val matches : ?pos:int -> ?len:int -> re -> string -> string list
286286+287287+(** @deprecated Use {!module-Seq.matches} instead. *)
288288+val matches_gen : ?pos:int -> ?len:int -> re -> string -> string gen
289289+[@@ocaml.deprecated "Use Seq.matches"]
290290+291291+(** @deprecated Use {!module-Seq.matches} instead. *)
292292+val matches_seq : ?pos:int -> ?len:int -> re -> string -> string Seq.t
293293+[@@ocaml.deprecated "Use Seq.matches"]
294294+295295+(** [split re s] splits [s] into chunks separated by [re]. It yields
296296+ the chunks themselves, not the separator. An occurence of the
297297+ separator at the beginning or the end of the string is ignoring.
298298+299299+ {5 Examples:}
300300+ {[
301301+ # let regex = Re.compile (Re.char ',');;
302302+ val regex : re = <abstr>
303303+304304+ # Re.split regex "Re,Ocaml,Jerome Vouillon";;
305305+ - : string list = ["Re"; "Ocaml"; "Jerome Vouillon"]
306306+307307+ # Re.split regex "No commas in this sentence.";;
308308+ - : string list = ["No commas in this sentence."]
309309+310310+ # Re.split regex ",1,2,";;
311311+ - : string list = ["1"; "2"]
312312+313313+ # Re.split ~pos:3 regex "1,2,3,4. Commas go brrr.";;
314314+ - : string list = ["3"; "4. Commas go brrr."]
315315+ ]}
316316+317317+ {6 Zero-length patterns:}
318318+319319+ Be careful when using [split] with zero-length patterns like [eol], [bow],
320320+ and [eow]. Because they don't have any width, they will still be present in
321321+ the result. (Note the position of the [\n] and space characters in the
322322+ output.)
323323+324324+ {[
325325+ # Re.split (Re.compile Re.eol) "a\nb";;
326326+ - : string list = ["a"; "\nb"]
327327+328328+ # Re.split (Re.compile Re.bow) "a b";;
329329+ - : string list = ["a "; "b"]
330330+331331+ # Re.split (Re.compile Re.eow) "a b";;
332332+ - : string list = ["a"; " b"]
333333+ ]}
334334+335335+ Compare this to the behavior of splitting on the char itself. (Note that
336336+ the delimiters are not present in the output.)
337337+338338+ {[
339339+ # Re.split (Re.compile (Re.char '\n')) "a\nb";;
340340+ - : string list = ["a"; "b"]
341341+342342+ # Re.split (Re.compile (Re.char ' ')) "a b";;
343343+ - : string list = ["a"; "b"]
344344+ ]} *)
345345+val split : ?pos:int -> ?len:int -> re -> string -> string list
346346+347347+(** [split_delim re s] splits [s] into chunks separated by [re]. It
348348+ yields the chunks themselves, not the separator. Occurences of the
349349+ separator at the beginning or the end of the string will produce
350350+ empty chunks.
351351+352352+ {5 Examples:}
353353+ {[
354354+ # let regex = Re.compile (Re.char ',');;
355355+ val regex : re = <abstr>
356356+357357+ # Re.split regex "Re,Ocaml,Jerome Vouillon";;
358358+ - : string list = ["Re"; "Ocaml"; "Jerome Vouillon"]
359359+360360+ # Re.split regex "No commas in this sentence.";;
361361+ - : string list = ["No commas in this sentence."]
362362+363363+ # Re.split regex ",1,2,";;
364364+ - : string list = [""; "1"; "2"; ""]
365365+366366+ # Re.split ~pos:3 regex "1,2,3,4. Commas go brrr.";;
367367+ - : string list = ["3"; "4. Commas go brrr."]
368368+ ]}
369369+370370+ {6 Zero-length patterns:}
371371+372372+ Be careful when using [split_delim] with zero-length patterns like [eol],
373373+ [bow], and [eow]. Because they don't have any width, they will still be
374374+ present in the result. (Note the position of the [\n] and space characters
375375+ in the output.)
376376+377377+ {[
378378+ # Re.split_delim (Re.compile Re.eol) "a\nb";;
379379+ - : string list = ["a"; "\nb"; ""]
380380+381381+ # Re.split_delim (Re.compile Re.bow) "a b";;
382382+ - : string list = [""; "a "; "b"]
383383+384384+ # Re.split_delim (Re.compile Re.eow) "a b";;
385385+ - : string list = ["a"; " b"; ""]
386386+ ]}
387387+388388+ Compare this to the behavior of splitting on the char itself. (Note that
389389+ the delimiters are not present in the output.)
390390+391391+ {[
392392+ # Re.split_delim (Re.compile (Re.char '\n')) "a\nb";;
393393+ - : string list = ["a"; "b"]
394394+395395+ # Re.split_delim (Re.compile (Re.char ' ')) "a b";;
396396+ - : string list = ["a"; "b"]
397397+ ]} *)
398398+val split_delim : ?pos:int -> ?len:int -> re -> string -> string list
399399+400400+(** @deprecated Use {!module-Seq.split} instead. *)
401401+val split_gen : ?pos:int -> ?len:int -> re -> string -> string gen
402402+[@@ocaml.deprecated "Use Seq.split"]
403403+404404+(** @deprecated Use {!module-Seq.split} instead. *)
405405+val split_seq : ?pos:int -> ?len:int -> re -> string -> string Seq.t
406406+[@@ocaml.deprecated "Use Seq.split"]
407407+408408+(** [split re s] splits [s] into chunks separated by [re]. It yields the chunks
409409+ along with the separators. For instance this can be used with a
410410+ whitespace-matching re such as ["[\t ]+"].
411411+412412+ {5 Examples:}
413413+ {[
414414+ # let regex = Re.compile (Re.char ',');;
415415+ val regex : re = <abstr>
416416+417417+ # Re.split_full regex "Re,Ocaml,Jerome Vouillon";;
418418+ - : Re.split_token list =
419419+ [`Text "Re"; `Delim <abstr>; `Text "Ocaml"; `Delim <abstr>;
420420+ `Text "Jerome Vouillon"]
421421+422422+ # Re.split_full regex "No commas in this sentence.";;
423423+ - : Re.split_token list = [`Text "No commas in this sentence."]
424424+425425+ # Re.split_full ~pos:3 regex "1,2,3,4. Commas go brrr.";;
426426+ - : Re.split_token list =
427427+ [`Delim <abstr>; `Text "3"; `Delim <abstr>; `Text "4. Commas go brrr."]
428428+ ]} *)
429429+val split_full : ?pos:int -> ?len:int -> re -> string -> split_token list
430430+431431+(** @deprecated Use {!module-Seq.split_full} instead. *)
432432+val split_full_gen : ?pos:int -> ?len:int -> re -> string -> split_token gen
433433+[@@ocaml.deprecated "Use Seq.split_full"]
434434+435435+(** @deprecated Use {!module-Seq.split_full} instead. *)
436436+val split_full_seq : ?pos:int -> ?len:int -> re -> string -> split_token Seq.t
437437+[@@ocaml.deprecated "Use Seq.split_full"]
438438+439439+module Seq : sig
440440+ (** Same as {!module-Re.val-all} but returns an iterator.
441441+442442+ {5 Examples:}
443443+ {[
444444+ # let regex = Re.compile Re.(seq [str "my"; blank; word(rep alpha)]);;
445445+ val regex : re = <abstr>
446446+447447+ # Re.Seq.all regex "my head, my shoulders, my knees, my toes ...";;
448448+ - : Re.Group.t Seq.t = <fun>
449449+ ]}
450450+ @since 1.10.0 *)
451451+ val all : ?pos:int (** Default: 0 *) -> ?len:int -> re -> string -> Group.t Seq.t
452452+453453+ (** Same as {!module-Re.val-matches}, but returns an iterator.
454454+455455+ {5 Example:}
456456+ {[
457457+ # let regex = Re.compile Re.(seq [str "my"; blank; word(rep alpha)]);;
458458+ val regex : re = <abstr>
459459+460460+ # Re.Seq.matches regex "my head, my shoulders, my knees, my toes ...";;
461461+ - : string Seq.t = <fun>
462462+ ]}
463463+ @since 1.10.0 *)
464464+ val matches : ?pos:int (** Default: 0 *) -> ?len:int -> re -> string -> string Seq.t
465465+466466+ (** Same as {!module-Re.val-split} but returns an iterator.
467467+468468+ {5 Example:}
469469+ {[
470470+ # let regex = Re.compile (Re.char ',');;
471471+ val regex : re = <abstr>
472472+473473+ # Re.Seq.split regex "Re,Ocaml,Jerome Vouillon";;
474474+ - : string Seq.t = <fun>
475475+ ]}
476476+ @since 1.10.0 *)
477477+ val split : ?pos:int (** Default: 0 *) -> ?len:int -> re -> string -> string Seq.t
478478+479479+ (** Same as {!module-Re.val-split_delim} but returns an iterator.
480480+481481+ {5 Example:}
482482+ {[
483483+ # let regex = Re.compile (Re.char ',');;
484484+ val regex : re = <abstr>
485485+486486+ # Re.Seq.split regex "Re,Ocaml,Jerome Vouillon";;
487487+ - : string Seq.t = <fun>
488488+ ]}
489489+ @since 1.11.1 *)
490490+ val split_delim : ?pos:int (** Default: 0 *) -> ?len:int -> re -> string -> string Seq.t
491491+492492+ (** Same as {!module-Re.val-split_full} but returns an iterator.
493493+494494+ {5 Example:}
495495+ {[
496496+ # let regex = Re.compile (Re.char ',');;
497497+ val regex : re = <abstr>
498498+499499+ # Re.Seq.split_full regex "Re,Ocaml,Jerome Vouillon";;
500500+ - : Re.split_token Seq.t = <fun>
501501+ ]}
502502+ @since 1.10.0 *)
503503+ val split_full
504504+ : ?pos:int (** Default: 0 *)
505505+ -> ?len:int
506506+ -> re
507507+ -> string
508508+ -> split_token Seq.t
509509+end
510510+511511+(** {2 String expressions (literal match)} *)
512512+513513+val str : string -> t
514514+val char : char -> t
515515+516516+(** {2 Basic operations on regular expressions} *)
517517+518518+(** Alternative.
519519+520520+ [alt []] is equivalent to {!empty}.
521521+522522+ By default, the leftmost match is preferred (see match semantics below). *)
523523+val alt : t list -> t
524524+525525+(** Sequence *)
526526+val seq : t list -> t
527527+528528+(** Match nothing *)
529529+val empty : t
530530+531531+(** Empty word *)
532532+val epsilon : t
533533+534534+(** 0 or more matches *)
535535+val rep : t -> t
536536+537537+(** 1 or more matches *)
538538+val rep1 : t -> t
539539+540540+(** [repn re i j] matches [re] at least [i] times
541541+ and at most [j] times, bounds included.
542542+ [j = None] means no upper bound. *)
543543+val repn : t -> int -> int option -> t
544544+545545+(** 0 or 1 matches *)
546546+val opt : t -> t
547547+548548+(** {2 String, line, word}
549549+550550+ We define a word as a sequence of latin1 letters, digits and underscore. *)
551551+552552+(** Beginning of line *)
553553+val bol : t
554554+555555+(** End of line *)
556556+val eol : t
557557+558558+(** Beginning of word *)
559559+val bow : t
560560+561561+(** End of word *)
562562+val eow : t
563563+564564+(** Beginning of string. This differs from {!start} because it matches
565565+ the beginning of the input string even when using [~pos] arguments:
566566+567567+ {[
568568+ let b = execp (compile (seq [ bos; str "a" ])) "aa" ~pos:1 in
569569+ assert (not b)
570570+ ]} *)
571571+val bos : t
572572+573573+(** End of string. This is different from {!stop} in the way described
574574+ in {!bos}. *)
575575+val eos : t
576576+577577+(** Last end of line or end of string *)
578578+val leol : t
579579+580580+(** Initial position. This differs from {!bos} because it takes into
581581+ account the [~pos] arguments:
582582+583583+ {[
584584+ let b = execp (compile (seq [ start; str "a" ])) "aa" ~pos:1 in
585585+ assert b
586586+ ]} *)
587587+val start : t
588588+589589+(** Final position. This is different from {!eos} in the way described
590590+ in {!start}. *)
591591+val stop : t
592592+593593+(** Word *)
594594+val word : t -> t
595595+596596+(** Not at a word boundary *)
597597+val not_boundary : t
598598+599599+(** Only matches the whole string, i.e. [fun t -> seq [ bos; t; eos ]]. *)
600600+val whole_string : t -> t
601601+602602+(** {2 Match semantics}
603603+604604+ A regular expression frequently matches a string in multiple ways. For
605605+ instance [exec (compile (opt (str "a"))) "ab"] can match "" or "a". Match
606606+ semantic can be modified with the functions below, allowing one to choose
607607+ which of these is preferable.
608608+609609+ By default, the leftmost branch of alternations is preferred, and repetitions
610610+ are greedy.
611611+612612+ Note that the existence of matches cannot be changed by specifying match
613613+ semantics. [seq [ bos; str "a"; non_greedy (opt (str "b")); eos ]] will
614614+ match when applied to "ab". However if [seq [ bos; str "a"; non_greedy (opt
615615+ (str "b")) ]] is applied to "ab", it will match "a" rather than "ab".
616616+617617+ Also note that multiple match semantics can conflict. In this case, the one
618618+ executed earlier takes precedence. For instance, any match of [shortest (seq
619619+ [ bos; group (rep (str "a")); group (rep (str "a")); eos ])] will always have
620620+ an empty first group. Conversely, if we use [longest] instead of [shortest],
621621+ the second group will always be empty. *)
622622+623623+(** Longest match semantics. That is, matches will match as many bytes as
624624+ possible. If multiple choices match the maximum amount of bytes, the one
625625+ respecting the inner match semantics is preferred. *)
626626+val longest : t -> t
627627+628628+(** Same as {!longest}, but matching the least number of bytes. *)
629629+val shortest : t -> t
630630+631631+(** First match semantics for alternations (not repetitions). That is, matches
632632+ will prefer the leftmost branch of the alternation that matches the text. *)
633633+val first : t -> t
634634+635635+(** Greedy matches for repetitions ({!opt}, {!rep}, {!rep1}, {!repn}): they will
636636+ match as many times as possible. *)
637637+val greedy : t -> t
638638+639639+(** Non-greedy matches for repetitions ({!opt}, {!rep}, {!rep1}, {!repn}): they
640640+ will match as few times as possible. *)
641641+val non_greedy : t -> t
642642+643643+(** {2 Groups (or submatches)} *)
644644+645645+(** Delimit a group. The group is considered as matching if it is used at least
646646+ once (it may be used multiple times if is nested inside {!rep} for
647647+ instance). If it is used multiple times, the last match is what gets
648648+ captured. *)
649649+val group : ?name:string -> t -> t
650650+651651+(** Remove all groups *)
652652+val no_group : t -> t
653653+654654+(** When matching against [nest e], only the group matching in the
655655+ last match of e will be considered as matching.
656656+657657+ For instance:
658658+ {[
659659+ let re = compile (rep1 (nest (alt [ group (str "a"); str "b" ]))) in
660660+ let group = Re.exec re "ab" in
661661+ assert (Group.get_opt group 1 = None);
662662+ (* same thing but without [nest] *)
663663+ let re = compile (rep1 (alt [ group (str "a"); str "b" ])) in
664664+ let group = Re.exec re "ab" in
665665+ assert (Group.get_opt group 1 = Some "a")
666666+ ]} *)
667667+val nest : t -> t
668668+669669+(** Mark a regexp. the markid can then be used to know if this regexp was used. *)
670670+val mark : t -> Mark.t * t
671671+672672+(** {2 Character sets} *)
673673+674674+(** Any character of the string *)
675675+val set : string -> t
676676+677677+(** Character ranges *)
678678+val rg : char -> char -> t
679679+680680+(** Intersection of character sets *)
681681+val inter : t list -> t
682682+683683+(** Difference of character sets *)
684684+val diff : t -> t -> t
685685+686686+(** Complement of union *)
687687+val compl : t list -> t
688688+689689+(** {2 Predefined character sets} *)
690690+691691+(** Any character *)
692692+val any : t
693693+694694+(** Any character but a newline *)
695695+val notnl : t
696696+697697+val alnum : t
698698+val wordc : t
699699+val alpha : t
700700+val ascii : t
701701+val blank : t
702702+val cntrl : t
703703+val digit : t
704704+val graph : t
705705+val lower : t
706706+val print : t
707707+val punct : t
708708+val space : t
709709+val upper : t
710710+val xdigit : t
711711+712712+(** {2 Case modifiers} *)
713713+714714+(** Case sensitive matching. Note that this works on latin1, not ascii and not
715715+ utf8. *)
716716+val case : t -> t
717717+718718+(** Case insensitive matching. Note that this works on latin1, not ascii and not
719719+ utf8. *)
720720+val no_case : t -> t
721721+722722+(****)
723723+724724+(** {2 Internal debugging} *)
725725+726726+val pp : Format.formatter -> t -> unit
727727+val pp_re : Format.formatter -> re -> unit
728728+729729+(** Alias for {!pp_re}. Deprecated *)
730730+val print_re : Format.formatter -> re -> unit
731731+732732+(** {2 Experimental functions} *)
733733+734734+(** [witness r] generates a string [s] such that [execp (compile r) s] is true.
735735+736736+ Be warned that this function is buggy because it ignores zero-width
737737+ assertions like beginning of words. As a result it can generate incorrect
738738+ results. *)
739739+val witness : t -> string
740740+741741+(** {2 Deprecated functions} *)
742742+743743+(** Alias for {!Group.t}. Deprecated *)
744744+type substrings = Group.t [@@ocaml.deprecated "Use Group.t"]
745745+746746+(** Same as {!Group.get}. Deprecated *)
747747+val get : Group.t -> int -> string
748748+[@@ocaml.deprecated "Use Group.get"]
749749+750750+(** Same as {!Group.offset}. Deprecated *)
751751+val get_ofs : Group.t -> int -> int * int
752752+[@@ocaml.deprecated "Use Group.offset"]
753753+754754+(** Same as {!Group.all}. Deprecated *)
755755+val get_all : Group.t -> string array
756756+[@@ocaml.deprecated "Use Group.all"]
757757+758758+(** Same as {!Group.all_offset}. Deprecated *)
759759+val get_all_ofs : Group.t -> (int * int) array
760760+[@@ocaml.deprecated "Use Group.all_offset"]
761761+762762+(** Same as {!Group.test}. Deprecated *)
763763+val test : Group.t -> int -> bool
764764+[@@ocaml.deprecated "Use Group.test"]
765765+766766+(** Alias for {!Mark.t}. Deprecated *)
767767+type markid = Mark.t [@@ocaml.deprecated "Use Mark."]
768768+769769+(** Same as {!Mark.test}. Deprecated *)
770770+val marked : Group.t -> Mark.t -> bool
771771+[@@ocaml.deprecated "Use Mark.test"]
772772+773773+(** Same as {!Mark.all}. Deprecated *)
774774+val mark_set : Group.t -> Mark.Set.t
775775+[@@ocaml.deprecated "Use Mark.all"]
776776+777777+module Stream : sig
778778+ (** An experimental for matching a regular expression by feeding individual
779779+ string chunks.
780780+781781+ This module is not covered by semver's stability guarantee. *)
782782+783783+ type t
784784+785785+ type 'a feed =
786786+ | Ok of 'a
787787+ | No_match
788788+789789+ val create : re -> t
790790+ val feed : t -> string -> pos:int -> len:int -> t feed
791791+792792+ (** [finalize s ~pos ~len] feed [s] from [pos] to [len] and return whether
793793+ the regular expression matched. *)
794794+ val finalize : t -> string -> pos:int -> len:int -> bool
795795+796796+ module Group : sig
797797+ (** Match a string against a regular expression with capture groups *)
798798+799799+ type stream := t
800800+ type t
801801+802802+ module Match : sig
803803+ type t
804804+805805+ val get : t -> int -> string option
806806+ val test_mark : t -> Pmark.t -> bool
807807+ end
808808+809809+ val create : stream -> t
810810+ val feed : t -> string -> pos:int -> len:int -> t feed
811811+ val finalize : t -> string -> pos:int -> len:int -> Match.t feed
812812+ end
813813+end
+250
vendor/opam/re/lib/cset.ml
···11+module List = struct end
22+open Import
33+44+(*
55+ RE - A regular expression library
66+77+ Copyright (C) 2001 Jerome Vouillon
88+ email: Jerome.Vouillon@pps.jussieu.fr
99+1010+ This library is free software; you can redistribute it and/or
1111+ modify it under the terms of the GNU Lesser General Public
1212+ License as published by the Free Software Foundation, with
1313+ linking exception; either version 2.1 of the License, or (at
1414+ your option) any later version.
1515+1616+ This library is distributed in the hope that it will be useful,
1717+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1818+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1919+ Lesser General Public License for more details.
2020+2121+ You should have received a copy of the GNU Lesser General Public
2222+ License along with this library; if not, write to the Free Software
2323+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2424+*)
2525+2626+type c = int
2727+2828+let equal_c = Int.equal
2929+let to_int x = x
3030+let of_int x = x
3131+let to_char t = Char.chr t
3232+let of_char c = Char.code c
3333+let null_char = -1
3434+3535+type t = (c * c) list
3636+3737+let compare_pair (x, y) (x', y') =
3838+ match Int.compare x x' with
3939+ | 0 -> Int.compare y y'
4040+ | x -> x
4141+;;
4242+4343+let equal_pair (x, y) (x', y') = Int.equal x x' && Int.equal y y'
4444+let equal x y = List.equal ~eq:equal_pair x y
4545+let compare x y = List.compare ~cmp:compare_pair x y
4646+4747+let rec union l l' =
4848+ match l, l' with
4949+ | _, [] -> l
5050+ | [], _ -> l'
5151+ | (c1, c2) :: r, (c1', c2') :: r' ->
5252+ if c2 + 1 < c1'
5353+ then (c1, c2) :: union r l'
5454+ else if c2' + 1 < c1
5555+ then (c1', c2') :: union l r'
5656+ else if c2 < c2'
5757+ then union r ((min c1 c1', c2') :: r')
5858+ else union ((min c1 c1', c2) :: r) r'
5959+;;
6060+6161+let rec inter l l' =
6262+ match l, l' with
6363+ | _, [] -> []
6464+ | [], _ -> []
6565+ | (c1, c2) :: r, (c1', c2') :: r' ->
6666+ if c2 < c1'
6767+ then inter r l'
6868+ else if c2' < c1
6969+ then inter l r'
7070+ else if c2 < c2'
7171+ then (max c1 c1', c2) :: inter r l'
7272+ else (max c1 c1', c2') :: inter l r'
7373+;;
7474+7575+let rec diff l l' =
7676+ match l, l' with
7777+ | _, [] -> l
7878+ | [], _ -> []
7979+ | (c1, c2) :: r, (c1', c2') :: r' ->
8080+ if c2 < c1'
8181+ then (c1, c2) :: diff r l'
8282+ else if c2' < c1
8383+ then diff l r'
8484+ else (
8585+ let r'' = if c2' < c2 then (c2' + 1, c2) :: r else r in
8686+ if c1 < c1' then (c1, c1' - 1) :: diff r'' r' else diff r'' r')
8787+;;
8888+8989+let single =
9090+ let single c = [ c, c ] in
9191+ Dense_map.make (* an extra color for lnl *) ~size:257 ~f:single
9292+;;
9393+9494+let csingle i = single (Char.code i)
9595+let add c l = union (single c) l
9696+let seq c c' = if c <= c' then [ c, c' ] else [ c', c ]
9797+9898+let rec offset o l =
9999+ match l with
100100+ | [] -> []
101101+ | (c1, c2) :: r -> (c1 + o, c2 + o) :: offset o r
102102+;;
103103+104104+let empty : t = []
105105+let cany = [ 0, 255 ]
106106+let union_all ts = List.fold_left ~init:empty ~f:union ts
107107+let intersect_all ts = List.fold_left ~init:cany ~f:inter ts
108108+109109+let rec mem (c : int) s =
110110+ match s with
111111+ | [] -> false
112112+ | (c1, c2) :: rem -> if c <= c2 then c >= c1 else mem c rem
113113+;;
114114+115115+(****)
116116+117117+let rec hash_rec = function
118118+ | [] -> 0
119119+ | (i, j) :: r -> i + (13 * j) + (257 * hash_rec r)
120120+;;
121121+122122+let hash l = hash_rec l land 0x3FFFFFFF
123123+124124+(****)
125125+126126+let print_one ch (c1, c2) =
127127+ if Int.equal c1 c2 then Format.fprintf ch "%d" c1 else Format.fprintf ch "%d-%d" c1 c2
128128+;;
129129+130130+let pp ts = Fmt.list ~pp_sep:(Fmt.lit ", ") print_one ts
131131+132132+let to_dyn t =
133133+ let open Dyn in
134134+ match t with
135135+ | [ (x, y) ] when Int.equal x y -> int x
136136+ | _ -> List.map t ~f:(fun (x, y) -> pair (int x) (int y)) |> list
137137+;;
138138+139139+let rec iter t ~f =
140140+ match t with
141141+ | [] -> ()
142142+ | (x, y) :: xs ->
143143+ f x y;
144144+ iter xs ~f
145145+;;
146146+147147+let one_char = function
148148+ | [ (i, j) ] when Int.equal i j -> Some i
149149+ | _ -> None
150150+;;
151151+152152+module CSetMap = Map.Make (struct
153153+ type t = int * (int * int) list
154154+155155+ let compare (i, u) (j, v) =
156156+ let c = Int.compare i j in
157157+ if c <> 0 then c else compare u v
158158+ ;;
159159+ end)
160160+161161+let fold_right t ~init ~f = List.fold_right ~f:(fun (x, y) acc -> f x y acc) t ~init
162162+163163+let is_empty = function
164164+ | [] -> true
165165+ | _ -> false
166166+;;
167167+168168+let rec prepend s x l =
169169+ match s, l with
170170+ | [], _ -> l
171171+ | _r, [] -> []
172172+ | (_c, c') :: r, ([ (d, _d') ], _x') :: _r' when c' < d -> prepend r x l
173173+ | (c, c') :: r, ([ (d, d') ], x') :: r' ->
174174+ if c <= d
175175+ then
176176+ if c' < d'
177177+ then ([ d, c' ], x @ x') :: prepend r x (([ c' + 1, d' ], x') :: r')
178178+ else ([ d, d' ], x @ x') :: prepend s x r'
179179+ else if c > d'
180180+ then ([ d, d' ], x') :: prepend s x r'
181181+ else ([ d, c - 1 ], x') :: prepend s x (([ c, d' ], x') :: r')
182182+ | _ -> assert false
183183+;;
184184+185185+let pick = function
186186+ | [] -> invalid_arg "Re_cset.pick"
187187+ | (x, _) :: _ -> x
188188+;;
189189+190190+let cseq c c' = seq (of_char c) (of_char c')
191191+let rg = cseq
192192+let char = csingle
193193+let upper = union_all [ cseq 'A' 'Z'; cseq '\192' '\214'; cseq '\216' '\222' ]
194194+let clower = offset 32 upper
195195+let cdigit = cseq '0' '9'
196196+let ascii = cseq '\000' '\127'
197197+let cadd c s = add (of_char c) s
198198+let space = add (of_char ' ') (cseq '\009' '\013')
199199+let xdigit = union_all [ cdigit; cseq 'a' 'f'; cseq 'A' 'F' ]
200200+201201+let calpha =
202202+ List.fold_right
203203+ ~f:cadd
204204+ [ '\170'; '\181'; '\186'; '\223'; '\255' ]
205205+ ~init:(union clower upper)
206206+;;
207207+208208+let calnum = union calpha cdigit
209209+210210+let case_insens s =
211211+ union_all [ s; offset 32 (inter s upper); offset (-32) (inter s clower) ]
212212+;;
213213+214214+let cword = cadd '_' calnum
215215+let notnl = diff cany (csingle '\n')
216216+let nl = csingle '\n'
217217+218218+let set str =
219219+ let s = ref empty in
220220+ for i = 0 to String.length str - 1 do
221221+ s := union (csingle str.[i]) !s
222222+ done;
223223+ !s
224224+;;
225225+226226+let blank = set "\t "
227227+228228+(* CR-someday rgrinberg: this [lower] doesn't match [clower] *)
229229+let lower = union_all [ rg 'a' 'z'; char '\181'; rg '\223' '\246'; rg '\248' '\255' ]
230230+let alpha = union_all [ lower; upper; char '\170'; char '\186' ]
231231+let alnum = union_all [ alpha; cdigit ]
232232+let wordc = union_all [ alnum; char '_' ]
233233+let cntrl = union_all [ rg '\000' '\031'; rg '\127' '\159' ]
234234+let graph = union_all [ rg '\033' '\126'; rg '\160' '\255' ]
235235+let print = union_all [ rg '\032' '\126'; rg '\160' '\255' ]
236236+237237+let punct =
238238+ union_all
239239+ [ rg '\033' '\047'
240240+ ; rg '\058' '\064'
241241+ ; rg '\091' '\096'
242242+ ; rg '\123' '\126'
243243+ ; rg '\160' '\169'
244244+ ; rg '\171' '\180'
245245+ ; rg '\182' '\185'
246246+ ; rg '\187' '\191'
247247+ ; char '\215'
248248+ ; char '\247'
249249+ ]
250250+;;
+84
vendor/opam/re/lib/cset.mli
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+(* Character sets, represented as sorted list of intervals *)
2424+2525+type c [@@immediate]
2626+2727+val equal_c : c -> c -> bool
2828+val to_int : c -> int
2929+val of_int : int -> c
3030+val to_char : c -> char
3131+val of_char : char -> c
3232+3333+type t
3434+3535+(** special characters which isn't present in any set (not even in [cany]) *)
3636+val null_char : c
3737+3838+val equal : t -> t -> bool
3939+val iter : t -> f:(c -> c -> unit) -> unit
4040+val union : t -> t -> t
4141+val union_all : t list -> t
4242+val intersect_all : t list -> t
4343+val inter : t -> t -> t
4444+val diff : t -> t -> t
4545+val empty : t
4646+val single : c -> t
4747+val add : c -> t -> t
4848+val mem : c -> t -> bool
4949+val case_insens : t -> t
5050+val cdigit : t
5151+val calpha : t
5252+val cword : t
5353+val notnl : t
5454+val ascii : t
5555+val nl : t
5656+val cseq : char -> char -> t
5757+val set : string -> t
5858+val blank : t
5959+val space : t
6060+val xdigit : t
6161+val lower : t
6262+val upper : t
6363+val alpha : t
6464+val alnum : t
6565+val wordc : t
6666+val cntrl : t
6767+val graph : t
6868+val print : t
6969+val punct : t
7070+val pp : t Fmt.t
7171+val one_char : t -> c option
7272+val fold_right : t -> init:'acc -> f:(c -> c -> 'acc -> 'acc) -> 'acc
7373+val hash : t -> int
7474+val compare : t -> t -> int
7575+7676+module CSetMap : Map.S with type key = int * t
7777+7878+val cany : t
7979+val csingle : char -> t
8080+val is_empty : t -> bool
8181+val prepend : t -> 'a list -> (t * 'a list) list -> (t * 'a list) list
8282+val pick : t -> c
8383+val offset : int -> t -> t
8484+val to_dyn : t -> Dyn.t
+4
vendor/opam/re/lib/dense_map.ml
···11+let make ~size ~f =
22+ let cache = Array.init size f in
33+ fun i -> cache.(i)
44+;;
+1
vendor/opam/re/lib/dense_map.mli
···11+val make : size:int -> f:(int -> 'a) -> int -> 'a
···11+type t =
22+ | Int of int
33+ | Tuple of t list
44+ | Enum of string
55+ | String of string
66+ | List of t list
77+ | Variant of string * t list
88+ | Record of (string * t) list
99+1010+let variant x y = Variant (x, y)
1111+let list x = List x
1212+let int x = Int x
1313+let pair x y = Tuple [ x; y ]
1414+let record fields = Record fields
1515+let enum x = Enum x
1616+let string s = String s
1717+1818+let result ok err = function
1919+ | Ok s -> variant "Ok" [ ok s ]
2020+ | Error e -> variant "Error" [ err e ]
2121+;;
2222+2323+let option f = function
2424+ | None -> enum "None"
2525+ | Some s -> variant "Some" [ f s ]
2626+;;
+146
vendor/opam/re/lib/emacs.ml
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+module Re = Core
2424+open Import
2525+2626+exception Parse_error
2727+exception Not_supported
2828+2929+let by_code f c c' =
3030+ let c = Char.code c in
3131+ let c' = Char.code c' in
3232+ Char.chr (f c c')
3333+;;
3434+3535+let parse ~emacs_only s =
3636+ let buf = Parse_buffer.create s in
3737+ let accept = Parse_buffer.accept buf in
3838+ let eos () = Parse_buffer.eos buf in
3939+ let test2 = Parse_buffer.test2 buf in
4040+ let get () = Parse_buffer.get buf in
4141+ let rec regexp () = regexp' [ branch () ]
4242+ and regexp' left =
4343+ if Parse_buffer.accept_s buf {|\||}
4444+ then regexp' (branch () :: left)
4545+ else Re.alt (List.rev left)
4646+ and branch () = branch' []
4747+ and branch' left =
4848+ if eos () || test2 '\\' '|' || test2 '\\' ')'
4949+ then Re.seq (List.rev left)
5050+ else branch' (piece () :: left)
5151+ and piece () =
5252+ let r = atom () in
5353+ if accept '*'
5454+ then Re.rep r
5555+ else if accept '+'
5656+ then Re.rep1 r
5757+ else if accept '?'
5858+ then Re.opt r
5959+ else r
6060+ and atom () =
6161+ if accept '.'
6262+ then Re.notnl
6363+ else if accept '^'
6464+ then Re.bol
6565+ else if accept '$'
6666+ then Re.eol
6767+ else if accept '['
6868+ then if accept '^' then Re.compl (bracket []) else Re.alt (bracket [])
6969+ else if accept '\\'
7070+ then
7171+ if accept '('
7272+ then (
7373+ let r = regexp () in
7474+ if not (Parse_buffer.accept_s buf {|\)|}) then raise Parse_error;
7575+ Re.group r)
7676+ else if emacs_only && accept '`'
7777+ then Re.bos
7878+ else if emacs_only && accept '\''
7979+ then Re.eos
8080+ else if accept '='
8181+ then Re.start
8282+ else if accept 'b'
8383+ then Re.alt [ Re.bow; Re.eow ]
8484+ else if emacs_only && accept 'B'
8585+ then Re.not_boundary
8686+ else if emacs_only && accept '<'
8787+ then Re.bow
8888+ else if emacs_only && accept '>'
8989+ then Re.eow
9090+ else if accept 'w'
9191+ then Re.alt [ Re.alnum; Re.char '_' ]
9292+ else if accept 'W'
9393+ then Re.compl [ Re.alnum; Re.char '_' ]
9494+ else (
9595+ if eos () then raise Parse_error;
9696+ match get () with
9797+ | ('*' | '+' | '?' | '[' | ']' | '.' | '^' | '$' | '\\') as c -> Re.char c
9898+ | '0' .. '9' -> raise Not_supported
9999+ | c -> if emacs_only then raise Parse_error else Re.char c)
100100+ else (
101101+ if eos () then raise Parse_error;
102102+ match get () with
103103+ | '*' | '+' | '?' -> raise Parse_error
104104+ | c -> Re.char c)
105105+ and bracket s =
106106+ if s <> [] && accept ']'
107107+ then s
108108+ else (
109109+ let c = char () in
110110+ if accept '-'
111111+ then
112112+ if accept ']'
113113+ then Re.char c :: Re.char '-' :: s
114114+ else (
115115+ let c' = char () in
116116+ let c' = by_code Int.max c c' in
117117+ bracket (Re.rg c c' :: s))
118118+ else bracket (Re.char c :: s))
119119+ and char () =
120120+ if eos () then raise Parse_error;
121121+ get ()
122122+ in
123123+ let res = regexp () in
124124+ if not (eos ()) then raise Parse_error;
125125+ res
126126+;;
127127+128128+let re ?(case = true) s =
129129+ let r = parse s ~emacs_only:true in
130130+ if case then r else Re.no_case r
131131+;;
132132+133133+let re_no_emacs ~case s =
134134+ let r = parse s ~emacs_only:false in
135135+ if case then r else Re.no_case r
136136+;;
137137+138138+let re_result ?case s =
139139+ match re ?case s with
140140+ | s -> Ok s
141141+ | exception Not_supported -> Error `Not_supported
142142+ | exception Parse_error -> Error `Parse_error
143143+;;
144144+145145+let compile = Re.compile
146146+let compile_pat ?(case = true) s = compile (re ~case s)
+41
vendor/opam/re/lib/emacs.mli
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+(** Emacs-style regular expressions *)
2424+2525+exception Parse_error
2626+2727+(** Errors that can be raised during the parsing of the regular expression *)
2828+exception Not_supported
2929+3030+(** Parsing of an Emacs-style regular expression *)
3131+val re : ?case:bool -> string -> Core.t
3232+3333+val re_result : ?case:bool -> string -> (Core.t, [ `Not_supported | `Parse_error ]) result
3434+3535+(** Regular expression compilation *)
3636+val compile : Core.t -> Core.re
3737+3838+(** Same as [Core.compile] *)
3939+val compile_pat : ?case:bool -> string -> Core.re
4040+4141+val re_no_emacs : case:bool -> string -> Core.t
+10
vendor/opam/re/lib/fake/atomic.ml
···11+type 'a t = 'a ref
22+33+let make x = ref x
44+let get x = !x
55+let set atomic v = atomic := v
66+77+let fetch_and_add atomic n =
88+ let v = !atomic in
99+ atomic := v + n;
1010+ v
+5
vendor/opam/re/lib/fake/domain.ml
···11+module DLS = struct
22+ let new_key f = ref (f())
33+ let set x y = x := y
44+ let get x = !x
55+end
+26
vendor/opam/re/lib/fake/mutex.ml
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2025 Jerome Vouillon
55+66+ This library is free software; you can redistribute it and/or
77+ modify it under the terms of the GNU Lesser General Public
88+ License as published by the Free Software Foundation, with
99+ linking exception; either version 2.1 of the License, or (at
1010+ your option) any later version.
1111+1212+ This library is distributed in the hope that it will be useful,
1313+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1414+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1515+ Lesser General Public License for more details.
1616+1717+ You should have received a copy of the GNU Lesser General Public
1818+ License along with this library; if not, write to the Free Software
1919+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2020+*)
2121+2222+type t = unit
2323+2424+let create _ = ()
2525+let lock _ = ()
2626+let unlock _ = ()
+51
vendor/opam/re/lib/fmt.ml
···11+(** Very small tooling for format printers. *)
22+33+include Format
44+55+type 'a t = Format.formatter -> 'a -> unit
66+77+let list = pp_print_list
88+let array ?pp_sep f fmt arr = list ?pp_sep f fmt (Array.to_list arr)
99+let str = pp_print_string
1010+let sexp fmt s pp x = fprintf fmt "@[<3>(%s@ %a)@]" s pp x
1111+let bytes fmt t = Format.fprintf fmt "%S" (Bytes.to_string t)
1212+1313+let pair pp1 pp2 fmt (v1, v2) =
1414+ pp1 fmt v1;
1515+ pp_print_space fmt ();
1616+ pp2 fmt v2
1717+;;
1818+1919+let triple pp1 pp2 pp3 fmt (v1, v2, v3) =
2020+ pp1 fmt v1;
2121+ pp_print_space fmt ();
2222+ pp2 fmt v2;
2323+ pp_print_space fmt ();
2424+ pp3 fmt v3
2525+;;
2626+2727+let opt f fmt x =
2828+ match x with
2929+ | None -> pp_print_string fmt "<None>"
3030+ | Some x -> fprintf fmt "%a" f x
3131+;;
3232+3333+let int = pp_print_int
3434+3535+let optint fmt = function
3636+ | None -> ()
3737+ | Some i -> fprintf fmt "@ %d" i
3838+;;
3939+4040+let char fmt c = Format.fprintf fmt "%c" c
4141+let bool = Format.pp_print_bool
4242+let lit s fmt () = pp_print_string fmt s
4343+4444+let to_to_string pp x =
4545+ let b = Buffer.create 16 in
4646+ let fmt = Format.formatter_of_buffer b in
4747+ pp fmt x;
4848+ Buffer.contents b
4949+;;
5050+5151+let quoted_string fmt s = Format.fprintf fmt "%S" s
+18
vendor/opam/re/lib/fmt.mli
···11+type formatter := Format.formatter
22+type 'a t = formatter -> 'a -> unit
33+44+val sexp : formatter -> string -> 'a t -> 'a -> unit
55+val str : string t
66+val optint : int option t
77+val opt : 'a t -> 'a option t
88+val char : char t
99+val bool : bool t
1010+val int : int t
1111+val pair : 'a t -> 'b t -> ('a * 'b) t
1212+val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t
1313+val list : ?pp_sep:unit t -> 'a t -> 'a list t
1414+val bytes : Bytes.t t
1515+val array : ?pp_sep:unit t -> 'a t -> 'a array t
1616+val lit : string -> unit t
1717+val to_to_string : 'a t -> 'a -> string
1818+val quoted_string : string t
+337
vendor/opam/re/lib/glob.ml
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+module Re = Core
2424+2525+exception Parse_error = Parse_buffer.Parse_error
2626+2727+type enclosed =
2828+ | Char of char
2929+ | Range of char * char
3030+3131+type piece =
3232+ | Exactly of char
3333+ | Any_of of enclosed list
3434+ | Any_but of enclosed list
3535+ | One
3636+ | Many
3737+ | ManyMany
3838+3939+type t = piece list
4040+4141+let of_string ~double_asterisk s : t =
4242+ let buf = Parse_buffer.create s in
4343+ let eos () = Parse_buffer.eos buf in
4444+ let read c = Parse_buffer.accept buf c in
4545+ let char () =
4646+ ignore (read '\\' : bool);
4747+ if eos () then raise Parse_error;
4848+ Parse_buffer.get buf
4949+ in
5050+ let enclosed () : enclosed list =
5151+ let rec loop s =
5252+ (* This returns the list in reverse order, but order isn't important
5353+ anyway *)
5454+ if s <> [] && read ']'
5555+ then s
5656+ else (
5757+ let c = char () in
5858+ if not (read '-')
5959+ then loop (Char c :: s)
6060+ else if read ']'
6161+ then Char c :: Char '-' :: s
6262+ else (
6363+ let c' = char () in
6464+ loop (Range (c, c') :: s)))
6565+ in
6666+ loop []
6767+ in
6868+ let piece acc =
6969+ if double_asterisk && Parse_buffer.accept_s buf "/**"
7070+ then ManyMany :: (if eos () then Exactly '/' :: acc else acc)
7171+ else if read '*'
7272+ then (if double_asterisk && read '*' then ManyMany else Many) :: acc
7373+ else if read '?'
7474+ then One :: acc
7575+ else if not (read '[')
7676+ then Exactly (char ()) :: acc
7777+ else if read '^' || read '!'
7878+ then Any_but (enclosed ()) :: acc
7979+ else Any_of (enclosed ()) :: acc
8080+ in
8181+ let rec loop pieces = if eos () then List.rev pieces else loop (piece pieces) in
8282+ loop []
8383+;;
8484+8585+let mul l l' = List.flatten (List.map (fun s -> List.map (fun s' -> s ^ s') l') l)
8686+8787+let explode str =
8888+ let l = String.length str in
8989+ let rec expl inner s i acc beg =
9090+ if i >= l
9191+ then (
9292+ if inner then raise Parse_error;
9393+ mul beg [ String.sub str s (i - s) ], i)
9494+ else (
9595+ match str.[i] with
9696+ | '\\' -> expl inner s (i + 2) acc beg
9797+ | '{' ->
9898+ let t, i' = expl true (i + 1) (i + 1) [] [ "" ] in
9999+ expl inner i' i' acc (mul beg (mul [ String.sub str s (i - s) ] t))
100100+ | ',' when inner ->
101101+ expl inner (i + 1) (i + 1) (mul beg [ String.sub str s (i - s) ] @ acc) [ "" ]
102102+ | '}' when inner -> mul beg [ String.sub str s (i - s) ] @ acc, i + 1
103103+ | _ -> expl inner s (i + 1) acc beg)
104104+ in
105105+ List.rev (fst (expl false 0 0 [] [ "" ]))
106106+;;
107107+108108+module State = struct
109109+ type t =
110110+ { re_pieces : Re.t list (* last piece at head of list. *)
111111+ ; remaining : piece list (* last piece at tail of list. *)
112112+ ; am_at_start_of_pattern : bool (* true at start of pattern *)
113113+ ; am_at_start_of_component : bool
114114+ (* true at start of pattern or immediately
115115+ after '/' *)
116116+ ; pathname : bool
117117+ ; match_backslashes : bool
118118+ ; period : bool
119119+ }
120120+121121+ let create ~period ~pathname ~match_backslashes remaining =
122122+ { re_pieces = []
123123+ ; am_at_start_of_pattern = true
124124+ ; am_at_start_of_component = true
125125+ ; pathname
126126+ ; match_backslashes
127127+ ; period
128128+ ; remaining
129129+ }
130130+ ;;
131131+132132+ let explicit_period t =
133133+ t.period && (t.am_at_start_of_pattern || (t.am_at_start_of_component && t.pathname))
134134+ ;;
135135+136136+ let explicit_slash t = t.pathname
137137+ let slashes t = if t.match_backslashes then [ '/'; '\\' ] else [ '/' ]
138138+139139+ let append ?(am_at_start_of_component = false) t piece =
140140+ { t with
141141+ re_pieces = piece :: t.re_pieces
142142+ ; am_at_start_of_pattern = false
143143+ ; am_at_start_of_component
144144+ }
145145+ ;;
146146+147147+ let to_re t = Re.seq (List.rev t.re_pieces)
148148+149149+ let next t =
150150+ match t.remaining with
151151+ | [] -> None
152152+ | piece :: remaining -> Some (piece, { t with remaining })
153153+ ;;
154154+end
155155+156156+let one ~explicit_slash ~slashes ~explicit_period =
157157+ Re.compl
158158+ (List.concat
159159+ [ (if explicit_slash then List.map Re.char slashes else [])
160160+ ; (if explicit_period then [ Re.char '.' ] else [])
161161+ ])
162162+;;
163163+164164+let enclosed enclosed =
165165+ match enclosed with
166166+ | Char c -> Re.char c
167167+ | Range (low, high) -> Re.rg low high
168168+;;
169169+170170+let enclosed_set ~explicit_slash ~slashes ~explicit_period kind set =
171171+ let set = List.map enclosed set in
172172+ let enclosure =
173173+ match kind with
174174+ | `Any_of -> Re.alt set
175175+ | `Any_but -> Re.compl set
176176+ in
177177+ Re.inter [ enclosure; one ~explicit_slash ~slashes ~explicit_period ]
178178+;;
179179+180180+let exactly state c =
181181+ let slashes = State.slashes state in
182182+ let am_at_start_of_component = List.mem c slashes in
183183+ let chars = if am_at_start_of_component then slashes else [ c ] in
184184+ State.append state (Re.alt (List.map Re.char chars)) ~am_at_start_of_component
185185+;;
186186+187187+let many_many state =
188188+ let explicit_period = state.State.period && state.State.pathname in
189189+ let first_explicit_period = State.explicit_period state in
190190+ let slashes = State.slashes state in
191191+ let match_component ~explicit_period =
192192+ Re.seq
193193+ [ one ~explicit_slash:true ~slashes ~explicit_period
194194+ ; Re.rep (one ~explicit_slash:true ~slashes ~explicit_period:false)
195195+ ]
196196+ in
197197+ (* We must match components individually when [period] flag is set,
198198+ making sure to not match ["foo/.bar"]. *)
199199+ State.append
200200+ state
201201+ (Re.seq
202202+ [ Re.opt (match_component ~explicit_period:first_explicit_period)
203203+ ; Re.rep
204204+ (Re.seq
205205+ [ Re.alt (List.map Re.char slashes)
206206+ ; Re.opt (match_component ~explicit_period)
207207+ ])
208208+ ])
209209+;;
210210+211211+let many (state : State.t) =
212212+ let explicit_slash = State.explicit_slash state in
213213+ let explicit_period = State.explicit_period state in
214214+ let slashes = State.slashes state in
215215+ (* Whether we must explicitly match period depends on the surrounding
216216+ characters, but slashes are easy to explicit match. This conditional
217217+ splits out some simple cases. *)
218218+ if not explicit_period
219219+ then State.append state (Re.rep (one ~explicit_slash ~slashes ~explicit_period))
220220+ else if not explicit_slash
221221+ then
222222+ (* In this state, we explicitly match periods only at the very beginning *)
223223+ State.append
224224+ state
225225+ (Re.opt
226226+ (Re.seq
227227+ [ one ~explicit_slash:false ~slashes ~explicit_period
228228+ ; Re.rep (one ~explicit_slash:false ~slashes ~explicit_period:false)
229229+ ]))
230230+ else (
231231+ let not_empty =
232232+ Re.seq
233233+ [ one ~explicit_slash:true ~slashes ~explicit_period:true
234234+ ; Re.rep (one ~explicit_slash:true ~slashes ~explicit_period:false)
235235+ ]
236236+ in
237237+ (* [maybe_empty] is the default translation of Many, except in some special
238238+ cases. *)
239239+ let maybe_empty = Re.opt not_empty in
240240+ let enclosed_set state kind set =
241241+ State.append
242242+ state
243243+ (Re.alt
244244+ [ enclosed_set kind set ~explicit_slash:true ~slashes ~explicit_period:true
245245+ ; Re.seq
246246+ [ not_empty
247247+ ; (* Since [not_empty] matched, subsequent dots are not leading. *)
248248+ enclosed_set
249249+ kind
250250+ set
251251+ ~explicit_slash:true
252252+ ~slashes
253253+ ~explicit_period:false
254254+ ]
255255+ ])
256256+ in
257257+ let rec lookahead state =
258258+ match State.next state with
259259+ | None -> State.append state maybe_empty
260260+ (* glob ** === glob * . *)
261261+ | Some (Many, state) -> lookahead state
262262+ | Some (Exactly c, state) ->
263263+ let state = State.append state (if c = '.' then not_empty else maybe_empty) in
264264+ exactly state c
265265+ (* glob *? === glob ?* *)
266266+ | Some (One, state) -> State.append state not_empty
267267+ | Some (Any_of enclosed, state) -> enclosed_set state `Any_of enclosed
268268+ | Some (Any_but enclosed, state) -> enclosed_set state `Any_but enclosed
269269+ (* * then ** === ** *)
270270+ | Some (ManyMany, state) -> many_many state
271271+ in
272272+ lookahead state)
273273+;;
274274+275275+let piece state piece =
276276+ let explicit_slash = State.explicit_slash state in
277277+ let explicit_period = State.explicit_period state in
278278+ let slashes = State.slashes state in
279279+ match piece with
280280+ | One -> State.append state (one ~explicit_slash ~slashes ~explicit_period)
281281+ | Many -> many state
282282+ | Any_of enclosed ->
283283+ State.append
284284+ state
285285+ (enclosed_set `Any_of ~explicit_slash ~slashes ~explicit_period enclosed)
286286+ | Any_but enclosed ->
287287+ State.append
288288+ state
289289+ (enclosed_set `Any_but ~explicit_slash ~slashes ~explicit_period enclosed)
290290+ | Exactly c -> exactly state c
291291+ | ManyMany -> many_many state
292292+;;
293293+294294+let glob ~pathname ~match_backslashes ~period glob =
295295+ let rec loop state =
296296+ match State.next state with
297297+ | None -> State.to_re state
298298+ | Some (p, state) -> loop (piece state p)
299299+ in
300300+ loop (State.create ~pathname ~match_backslashes ~period glob)
301301+;;
302302+303303+let glob
304304+ ?(anchored = false)
305305+ ?(pathname = true)
306306+ ?(match_backslashes = false)
307307+ ?(period = true)
308308+ ?(expand_braces = false)
309309+ ?(double_asterisk = true)
310310+ s
311311+ =
312312+ let to_re s =
313313+ let re = glob ~pathname ~match_backslashes ~period (of_string ~double_asterisk s) in
314314+ if anchored then Re.whole_string re else re
315315+ in
316316+ if expand_braces then Re.alt (List.map to_re (explode s)) else to_re s
317317+;;
318318+319319+let glob_result
320320+ ?anchored
321321+ ?pathname
322322+ ?match_backslashes
323323+ ?period
324324+ ?expand_braces
325325+ ?double_asterisk
326326+ s
327327+ =
328328+ match
329329+ glob ?anchored ?pathname ?match_backslashes ?period ?expand_braces ?double_asterisk s
330330+ with
331331+ | re -> Ok re
332332+ | exception Parse_error -> Error `Parse_error
333333+;;
334334+335335+let glob' ?anchored period s = glob ?anchored ~period s
336336+let globx ?anchored s = glob ?anchored ~expand_braces:true s
337337+let globx' ?anchored period s = glob ?anchored ~expand_braces:true ~period s
+95
vendor/opam/re/lib/glob.mli
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+(** Shell-style regular expressions *)
2424+2525+exception Parse_error
2626+2727+(** Implements the semantics of shells patterns. The returned regular
2828+ expression is unanchored by default.
2929+3030+ Character '*' matches any sequence of characters and character
3131+ '?' matches a single character.
3232+ A sequence '[...]' matches any one of the enclosed characters.
3333+ A sequence '[^...]' or '[!...]' matches any character *but* the enclosed characters.
3434+ A backslash escapes the following character. The last character of the string cannot
3535+ be a backslash.
3636+3737+ [anchored] controls whether the regular expression will only match entire
3838+ strings. Defaults to false.
3939+4040+ [pathname]: If this flag is set, match a slash in string only with a slash in pattern
4141+ and not by an asterisk ('*') or a question mark ('?') metacharacter, nor by a bracket
4242+ expression ('[]') containing a slash. Defaults to true.
4343+4444+ [match_backslashes]: If this flag is set, a forward slash will also match a
4545+ backslash (useful when globbing Windows paths). Note that a backslash in the
4646+ pattern will continue to escape the following character. Defaults to
4747+ [false].
4848+4949+ [period]: If this flag is set, a leading period in string has to be matched exactly by
5050+ a period in pattern. A period is considered to be leading if it is the first
5151+ character in string, or if both [pathname] is set and the period immediately follows a
5252+ slash. Defaults to true.
5353+5454+ If [expand_braces] is true, braced sets will expand into multiple globs,
5555+ e.g. a\{x,y\}b\{1,2\} matches axb1, axb2, ayb1, ayb2. As specified for bash, brace
5656+ expansion is purely textual and can be nested. Defaults to false.
5757+5858+ [double_asterisk]: If this flag is set, double asterisks ('**') will match slash
5959+ characters, even if [pathname] is set. The [period] flag still applies. Default to
6060+ true. *)
6161+val glob
6262+ : ?anchored:bool
6363+ -> ?pathname:bool
6464+ -> ?match_backslashes:bool
6565+ -> ?period:bool
6666+ -> ?expand_braces:bool
6767+ -> ?double_asterisk:bool
6868+ -> string
6969+ -> Core.t
7070+7171+val glob_result
7272+ : ?anchored:bool
7373+ -> ?pathname:bool
7474+ -> ?match_backslashes:bool
7575+ -> ?period:bool
7676+ -> ?expand_braces:bool
7777+ -> ?double_asterisk:bool
7878+ -> string
7979+ -> (Core.t, [ `Parse_error ]) result
8080+8181+(** Same, but allows to choose whether dots at the beginning of a
8282+ file name need to be explicitly matched (true) or not (false)
8383+8484+ @deprecated Use [glob ~period]. *)
8585+val glob' : ?anchored:bool -> bool -> string -> Core.t
8686+8787+(** This version of [glob] also recognizes the pattern \{..,..\}
8888+8989+ @deprecated Prefer [glob ~expand_braces:true]. *)
9090+val globx : ?anchored:bool -> string -> Core.t
9191+9292+(** This version of [glob'] also recognizes the pattern \{..,..\}
9393+9494+ @deprecated Prefer [glob ~expand_braces:true ~period]. *)
9595+val globx' : ?anchored:bool -> bool -> string -> Core.t
+103
vendor/opam/re/lib/group.ml
···11+(* Result of a successful match. *)
22+type t =
33+ { (* Input string. Matched strings are substrings of s *)
44+ s : string
55+ (* Mapping from group indices to positions in gpos. group i has positions 2*i
66+ - 1, 2*i + 1 in gpos. If the group wasn't matched, then its corresponding
77+ values in marks will be -1,-1 *)
88+ ; marks : Mark_infos.t
99+ ; (* Marks positions. i.e. those marks created with Re.marks *)
1010+ pmarks : Pmark.Set.t
1111+ ; (* Group positions. Adjacent elements are (start, stop) of group match.
1212+ indexed by the values in marks. So group i in an re would be the substring:
1313+1414+ start = t.gpos.(marks.(2*i)) - 1
1515+ stop = t.gpos.(marks.(2*i + 1)) - 1 *)
1616+ gpos : int array
1717+ ; (* Number of groups the regular expression contains. Matched or not *)
1818+ gcount : int
1919+ }
2020+2121+let create s ~gcount ~gpos marks pmarks = { s; gcount; gpos; marks; pmarks }
2222+2323+module Offset = struct
2424+ type t = int
2525+2626+ let absent = -1
2727+ let is_present t = t >= 0
2828+ let get_no_check t = t
2929+end
3030+3131+let start_offset t i =
3232+ let i = Mark_infos.start_offset t.marks i in
3333+ if Mark_infos.Offset.is_present i
3434+ then t.gpos.(Mark_infos.Offset.get_no_check i)
3535+ else Offset.absent
3636+;;
3737+3838+let stop_offset t i =
3939+ let i = Mark_infos.stop_offset t.marks i in
4040+ if Mark_infos.Offset.is_present i
4141+ then t.gpos.(Mark_infos.Offset.get_no_check i)
4242+ else Offset.absent
4343+;;
4444+4545+let offset_opt t i =
4646+ Mark_infos.offset t.marks i
4747+ |> Option.map (fun (start, stop) -> t.gpos.(start), t.gpos.(stop))
4848+;;
4949+5050+let or_not_found = function
5151+ | None -> raise Not_found
5252+ | Some s -> s
5353+;;
5454+5555+let offset t i = offset_opt t i |> or_not_found
5656+5757+let get_opt t i =
5858+ offset_opt t i |> Option.map (fun (p1, p2) -> String.sub t.s p1 (p2 - p1))
5959+;;
6060+6161+let pmarks t = t.pmarks
6262+let get t i = get_opt t i |> or_not_found
6363+let start_opt subs i = offset_opt subs i |> Option.map fst
6464+let start subs i = start_opt subs i |> or_not_found
6565+let stop_opt subs i = offset_opt subs i |> Option.map snd
6666+let stop subs i = stop_opt subs i |> or_not_found
6767+let test t i = Mark_infos.test t.marks i
6868+let get_opt t i = if test t i then Some (get t i) else None
6969+let dummy_offset = -1, -1
7070+7171+let all_offset t =
7272+ let res = Array.make t.gcount dummy_offset in
7373+ Mark_infos.iteri t.marks ~f:(fun i start stop ->
7474+ let p1 = t.gpos.(start) in
7575+ let p2 = t.gpos.(stop) in
7676+ res.(i) <- p1, p2);
7777+ res
7878+;;
7979+8080+let dummy_string = ""
8181+8282+let all t =
8383+ let res = Array.make t.gcount dummy_string in
8484+ Mark_infos.iteri t.marks ~f:(fun i start stop ->
8585+ let p1 = t.gpos.(start) in
8686+ let p2 = t.gpos.(stop) in
8787+ res.(i) <- String.sub t.s p1 (p2 - p1));
8888+ res
8989+;;
9090+9191+let pp fmt t =
9292+ let matches =
9393+ let offsets = all_offset t in
9494+ let strs = all t in
9595+ Array.to_list (Array.init (Array.length strs) (fun i -> strs.(i), offsets.(i)))
9696+ in
9797+ let open Format in
9898+ let open Fmt in
9999+ let pp_match fmt (str, (start, stop)) = fprintf fmt "@[(%s (%d %d))@]" str start stop in
100100+ sexp fmt "Group" (list pp_match) matches
101101+;;
102102+103103+let nb_groups t = t.gcount
+54
vendor/opam/re/lib/group.mli
···11+(** Information about groups in a match. *)
22+33+(** Result of a successful match. *)
44+type t
55+66+val create : string -> gcount:int -> gpos:int array -> Mark_infos.t -> Pmark.Set.t -> t
77+88+(** Raise [Not_found] if the group did not match *)
99+val get : t -> int -> string
1010+1111+(** Similar to {!get}, but returns an option instead of using an exception. *)
1212+val get_opt : t -> int -> string option
1313+1414+(** Raise [Not_found] if the group did not match *)
1515+val offset : t -> int -> int * int
1616+1717+val offset_opt : t -> int -> (int * int) option
1818+1919+(** Return the start of the match. Raise [Not_found] if the group did not match. *)
2020+val start : t -> int -> int
2121+2222+val start_opt : t -> int -> int option
2323+2424+(** Return the end of the match. Raise [Not_found] if the group did not match. *)
2525+val stop : t -> int -> int
2626+2727+val stop_opt : t -> int -> int option
2828+2929+(** Return the empty string for each group which did not match *)
3030+val all : t -> string array
3131+3232+(** Return [(-1,-1)] for each group which did not match *)
3333+val all_offset : t -> (int * int) array
3434+3535+(** Test whether a group matched *)
3636+val test : t -> int -> bool
3737+3838+val pmarks : t -> Pmark.Set.t
3939+4040+(** Returns the total number of groups defined - matched or not.
4141+ This function is experimental. *)
4242+val nb_groups : t -> int
4343+4444+val pp : t Fmt.t
4545+4646+module Offset : sig
4747+ type t
4848+4949+ val is_present : t -> bool
5050+ val get_no_check : t -> int
5151+end
5252+5353+val start_offset : t -> int -> Offset.t
5454+val stop_offset : t -> int -> Offset.t
+155
vendor/opam/re/lib/hash_set.ml
···11+open Import
22+33+module Array = struct
44+ type nonrec t = Bytes.t
55+66+ let words = 8
77+ let[@inline] length t = Bytes.length t / words
88+ let[@inline] unsafe_get t i = Int64.to_int (Bytes.get_int64_ne t (i * words))
99+ let[@inline] unsafe_set t i x = Bytes.set_int64_ne t (i * words) (Int64.of_int x)
1010+1111+ let[@inline] make len x =
1212+ let t = Bytes.create (len * words) in
1313+ for i = 0 to length t - 1 do
1414+ unsafe_set t i x
1515+ done;
1616+ t
1717+ ;;
1818+1919+ let[@inline] make_absent len = Bytes.make (len * words) '\255'
2020+ let clear t = Bytes.fill t 0 (Bytes.length t) '\255'
2121+2222+ let fold_left t ~init ~f =
2323+ let init = ref init in
2424+ for i = 0 to length t - 1 do
2525+ init := f !init (unsafe_get t i)
2626+ done;
2727+ !init
2828+ ;;
2929+end
3030+3131+(* A specialized hash table that makes the following trade-offs:
3232+ - Open addresing. Bucketing is quite memory intensive and dune is already
3333+ a memory hog.
3434+ - No boxing for empty slots. We make use of the fact that id's are never
3535+ negative to achieve this.
3636+ - No saving of the hash. Recomputing the hash for id's is a no-op.
3737+*)
3838+3939+type nonrec table =
4040+ { mutable table : Array.t
4141+ ; mutable size : int
4242+ }
4343+4444+type t = table Option.t ref
4545+4646+let init t =
4747+ if Option.is_none !t then t := Option.some { size = 0; table = Array.make 0 (-1) };
4848+ Option.get !t
4949+;;
5050+5151+let[@inline] should_grow t =
5252+ let slots = Array.length t.table in
5353+ slots = 0 || (t.size > 0 && slots / t.size < 2)
5454+;;
5555+5656+let absent = -1
5757+5858+let () =
5959+ let x = Array.make_absent 1 in
6060+ assert (Array.unsafe_get x 0 = absent)
6161+;;
6262+6363+let create () = ref None
6464+6565+let[@inline] index_of_offset slots index i =
6666+ let i = index + !i in
6767+ if i >= slots then i - slots else i
6868+;;
6969+7070+let clear t =
7171+ match !t with
7272+ | None -> ()
7373+ | Some t ->
7474+ t.size <- 0;
7575+ Array.clear t.table
7676+;;
7777+7878+let add t x =
7979+ let hash = Int.hash x in
8080+ let slots = Array.length t.table in
8181+ let index = hash land (slots - 1) in
8282+ let inserting = ref true in
8383+ let i = ref 0 in
8484+ while !inserting do
8585+ let idx = index_of_offset slots index i in
8686+ let elem = Array.unsafe_get t.table idx in
8787+ if elem = absent
8888+ then (
8989+ Array.unsafe_set t.table idx x;
9090+ inserting := false)
9191+ else incr i
9292+ done;
9393+ t.size <- t.size + 1
9494+;;
9595+9696+let resize t =
9797+ let old_table = t.table in
9898+ let slots = Array.length old_table in
9999+ let table = Array.make_absent (if slots = 0 then 1 else slots lsl 1) in
100100+ t.table <- table;
101101+ for i = 0 to slots - 1 do
102102+ let elem = Array.unsafe_get old_table i in
103103+ if elem <> absent then add t elem
104104+ done
105105+;;
106106+107107+let add t x =
108108+ let t = init t in
109109+ if should_grow t then resize t;
110110+ add t x
111111+;;
112112+113113+let[@inline] is_empty t =
114114+ let t = !t in
115115+ if Option.is_none t
116116+ then true
117117+ else (
118118+ let t = Option.get t in
119119+ t.size = 0)
120120+;;
121121+122122+let mem t x =
123123+ let t = !t in
124124+ if Option.is_none t || (Option.get t).size = 0
125125+ then false
126126+ else (
127127+ let t = Option.get t in
128128+ let hash = Int.hash x in
129129+ let slots = Array.length t.table in
130130+ let index = hash land (slots - 1) in
131131+ let i = ref 0 in
132132+ let found = ref false in
133133+ while (not !found) && !i < slots do
134134+ let idx = index_of_offset slots index i in
135135+ let elem = Array.unsafe_get t.table idx in
136136+ if Int.equal elem x
137137+ then found := true
138138+ else if Int.equal elem absent
139139+ then i := slots
140140+ else incr i
141141+ done;
142142+ !found)
143143+;;
144144+145145+let pp fmt t =
146146+ let { table; size } = init t in
147147+ let table =
148148+ Array.fold_left table ~init:[] ~f:(fun acc i -> if i = absent then acc else i :: acc)
149149+ |> List.rev
150150+ |> Stdlib.Array.of_list
151151+ in
152152+ let table fmt () = Fmt.sexp fmt "table" Fmt.(array int) table in
153153+ let size fmt () = Fmt.sexp fmt "size" Fmt.int size in
154154+ Format.fprintf fmt "%a@.%a@." table () size ()
155155+;;
+8
vendor/opam/re/lib/hash_set.mli
···11+type t
22+33+val create : unit -> t
44+val is_empty : t -> bool
55+val add : t -> int -> unit
66+val mem : t -> int -> bool
77+val clear : t -> unit
88+val pp : t Fmt.t
+40
vendor/opam/re/lib/import.ml
···11+module List = struct
22+ let[@warning "-32"] rec equal ~eq l1 l2 = match l1, l2 with
33+ | [], [] -> true
44+ | [], _::_ | _::_, [] -> false
55+ | x::xs, y::ys -> if eq x y then equal ~eq xs ys else false
66+77+ let[@warning "-32"] rec compare ~cmp l1 l2 = match l1, l2 with
88+ | [], [] -> 0
99+ | [], _::_ -> -1
1010+ | _::_, [] -> 1
1111+ | x::xs, y::ys ->
1212+ let r = cmp x y in
1313+ if r = 0 then compare ~cmp xs ys else r
1414+1515+ include Stdlib.ListLabels
1616+end
1717+1818+module Poly = struct
1919+ let equal = ( = )
2020+ let compare = compare
2121+end
2222+2323+module Phys_equal = struct
2424+ let equal = ( == )
2525+end
2626+2727+let ( = ) = Int.equal
2828+let ( == ) = [ `Use_phys_equal ]
2929+let ( < ) (x : int) (y : int) = x < y
3030+let ( > ) (x : int) (y : int) = x > y
3131+let min (x : int) (y : int) = if x <= y then x else y
3232+let max (x : int) (y : int) = if x >= y then x else y
3333+let compare = Int.compare
3434+3535+module Int = struct
3636+ let[@warning "-32"] hash (x : int) = Hashtbl.hash x
3737+ let[@warning "-32"] max (x : int) (y : int) = if x >= y then x else y
3838+3939+ include Stdlib.Int
4040+end
+55
vendor/opam/re/lib/mark_infos.ml
···11+open Import
22+33+type t = int array
44+55+let make marks =
66+ let len = 1 + List.fold_left ~f:(fun ma (i, _) -> max ma i) ~init:(-1) marks in
77+ let t = Array.make len (-1) in
88+ let set (i, v) = t.(i) <- v in
99+ List.iter ~f:set marks;
1010+ t
1111+;;
1212+1313+let test t i = if 2 * i >= Array.length t then false else t.(2 * i) <> -1
1414+1515+module Offset = struct
1616+ type t = int
1717+1818+ let is_present t = t >= 0
1919+ let get_no_check t = t
2020+end
2121+2222+let start_offset t i =
2323+ let start_i = 2 * i in
2424+ if start_i + 1 >= Array.length t then -1 else t.(start_i)
2525+;;
2626+2727+let stop_offset t i =
2828+ let stop_i = (2 * i) + 1 in
2929+ if stop_i >= Array.length t then -1 else t.(stop_i)
3030+;;
3131+3232+let offset t i =
3333+ let start_i = 2 * i in
3434+ let stop_i = start_i + 1 in
3535+ if stop_i >= Array.length t
3636+ then None
3737+ else (
3838+ let start = t.(start_i) in
3939+ if start = -1
4040+ then None
4141+ else (
4242+ let stop = t.(stop_i) in
4343+ Some (start, stop)))
4444+;;
4545+4646+let iteri t ~f =
4747+ for i = 0 to (Array.length t / 2) - 1 do
4848+ let idx = 2 * i in
4949+ let start = t.(idx) in
5050+ if start <> -1
5151+ then (
5252+ let stop = t.(idx + 1) in
5353+ f i start stop)
5454+ done
5555+;;
+17
vendor/opam/re/lib/mark_infos.mli
···11+(** store mark information for groups in an array *)
22+type t
33+44+val make : (int * int) list -> t
55+val offset : t -> int -> (int * int) option
66+val test : t -> int -> bool
77+val iteri : t -> f:(int -> int -> int -> unit) -> unit
88+99+module Offset : sig
1010+ type t
1111+1212+ val is_present : t -> bool
1313+ val get_no_check : t -> int
1414+end
1515+1616+val start_offset : t -> int -> Offset.t
1717+val stop_offset : t -> int -> Offset.t
+67
vendor/opam/re/lib/parse_buffer.ml
···11+type t =
22+ { str : string
33+ ; mutable pos : int
44+ }
55+66+exception Parse_error
77+88+let create str = { str; pos = 0 }
99+let unget t = t.pos <- t.pos - 1
1010+let junk t = t.pos <- t.pos + 1
1111+let eos t = t.pos = String.length t.str
1212+let test t c = (not (eos t)) && t.str.[t.pos] = c
1313+1414+let test2 t c c' =
1515+ t.pos + 1 < String.length t.str && t.str.[t.pos] = c && t.str.[t.pos + 1] = c'
1616+;;
1717+1818+let accept t c =
1919+ let r = test t c in
2020+ if r then t.pos <- t.pos + 1;
2121+ r
2222+;;
2323+2424+let get t =
2525+ let r = t.str.[t.pos] in
2626+ t.pos <- t.pos + 1;
2727+ r
2828+;;
2929+3030+let accept_s t s' =
3131+ let len = String.length s' in
3232+ try
3333+ for j = 0 to len - 1 do
3434+ (* CR-someday rgrinberg: stop relying on bound checks *)
3535+ try if s'.[j] <> t.str.[t.pos + j] then raise_notrace Exit with
3636+ | _ -> raise_notrace Exit
3737+ done;
3838+ t.pos <- t.pos + len;
3939+ true
4040+ with
4141+ | Exit -> false
4242+;;
4343+4444+let rec integer' t i =
4545+ if eos t
4646+ then Some i
4747+ else (
4848+ match get t with
4949+ | '0' .. '9' as d ->
5050+ let i' = (10 * i) + (Char.code d - Char.code '0') in
5151+ if i' < i then raise Parse_error;
5252+ integer' t i'
5353+ | _ ->
5454+ unget t;
5555+ Some i)
5656+;;
5757+5858+let integer t =
5959+ if eos t
6060+ then None
6161+ else (
6262+ match get t with
6363+ | '0' .. '9' as d -> integer' t (Char.code d - Char.code '0')
6464+ | _ ->
6565+ unget t;
6666+ None)
6767+;;
+14
vendor/opam/re/lib/parse_buffer.mli
···11+type t
22+33+exception Parse_error
44+55+val create : string -> t
66+val junk : t -> unit
77+val unget : t -> unit
88+val eos : t -> bool
99+val test : t -> char -> bool
1010+val test2 : t -> char -> char -> bool
1111+val get : t -> char
1212+val accept : t -> char -> bool
1313+val accept_s : t -> string -> bool
1414+val integer : t -> int option
+179
vendor/opam/re/lib/pcre.ml
···11+module Re = Core
22+33+exception Parse_error = Perl.Parse_error
44+exception Not_supported = Perl.Not_supported
55+66+type regexp = Re.re
77+88+type flag =
99+ [ `CASELESS
1010+ | `MULTILINE
1111+ | `ANCHORED
1212+ | `DOTALL
1313+ ]
1414+1515+type split_result =
1616+ | Text of string
1717+ | Delim of string
1818+ | Group of int * string
1919+ | NoGroup
2020+2121+type groups = Core.Group.t
2222+2323+let re ?(flags = []) pat =
2424+ let opts =
2525+ List.map
2626+ (function
2727+ | `CASELESS -> `Caseless
2828+ | `MULTILINE -> `Multiline
2929+ | `ANCHORED -> `Anchored
3030+ | `DOTALL -> `Dotall)
3131+ flags
3232+ in
3333+ Perl.re ~opts pat
3434+;;
3535+3636+let re_result ?flags s =
3737+ match re ?flags s with
3838+ | s -> Ok s
3939+ | exception Not_supported -> Error `Not_supported
4040+ | exception Parse_error -> Error `Parse_error
4141+;;
4242+4343+let regexp ?flags pat = Re.compile (re ?flags pat)
4444+let extract ~rex s = Re.Group.all (Re.exec rex s)
4545+let exec ~rex ?pos s = Re.exec rex ?pos s
4646+let names rex = Re.group_names rex |> List.map fst |> Array.of_list
4747+4848+let get_named_substring_opt rex name s =
4949+ let rec loop = function
5050+ | [] -> None
5151+ | (n, i) :: rem when n = name ->
5252+ (match Re.Group.get_opt s i with
5353+ | None -> loop rem
5454+ | Some _ as s -> s)
5555+ | _ :: rem -> loop rem
5656+ in
5757+ loop (Re.group_names rex)
5858+;;
5959+6060+let get_substring_ofs s i = Re.Group.offset s i
6161+let pmatch ~rex s = Re.execp rex s
6262+6363+let substitute ~rex ~subst str =
6464+ let b = Buffer.create 1024 in
6565+ let rec loop pos on_match =
6666+ if Re.execp ~pos rex str
6767+ then (
6868+ let ss = Re.exec ~pos rex str in
6969+ let start, fin = Re.Group.offset ss 0 in
7070+ if on_match && start = pos && start = fin
7171+ then (
7272+ if (* Empty match following a match *)
7373+ pos < String.length str
7474+ then (
7575+ Buffer.add_char b str.[pos];
7676+ loop (pos + 1) false))
7777+ else (
7878+ let pat = Re.Group.get ss 0 in
7979+ Buffer.add_substring b str pos (start - pos);
8080+ Buffer.add_string b (subst pat);
8181+ if start = fin
8282+ then (
8383+ if (* Manually advance by one after an empty match *)
8484+ fin < String.length str
8585+ then (
8686+ Buffer.add_char b str.[fin];
8787+ loop (fin + 1) false))
8888+ else loop fin true))
8989+ else Buffer.add_substring b str pos (String.length str - pos)
9090+ in
9191+ loop 0 false;
9292+ Buffer.contents b
9393+;;
9494+9595+let split ~rex s =
9696+ let rec split accu start =
9797+ if start = String.length s
9898+ then accu
9999+ else (
100100+ match
101101+ let g = Re.exec rex s ~pos:start in
102102+ if Group.stop g 0 = start then Re.exec rex s ~pos:(start + 1) else g
103103+ with
104104+ | exception Not_found -> String.sub s start (String.length s - start) :: accu
105105+ | g ->
106106+ let next = Group.stop g 0 in
107107+ split (String.sub s start (Group.start g 0 - start) :: accu) next)
108108+ in
109109+ match Re.exec rex s ~pos:0 with
110110+ | g ->
111111+ List.rev
112112+ (if Group.start g 0 = 0
113113+ then split [] (Group.stop g 0)
114114+ else split [ String.sub s 0 (Group.start g 0) ] (Group.stop g 0))
115115+ | exception Not_found -> if s = "" then [] else [ s ]
116116+;;
117117+118118+(* From PCRE *)
119119+let string_unsafe_sub s ofs len =
120120+ let r = Bytes.create len in
121121+ Bytes.unsafe_blit s ofs r 0 len;
122122+ Bytes.unsafe_to_string r
123123+;;
124124+125125+let quote s =
126126+ let len = String.length s in
127127+ let buf = Bytes.create (len lsl 1) in
128128+ let pos = ref 0 in
129129+ for i = 0 to len - 1 do
130130+ match String.unsafe_get s i with
131131+ | ('\\' | '^' | '$' | '.' | '[' | '|' | '(' | ')' | '?' | '*' | '+' | '{') as c ->
132132+ Bytes.unsafe_set buf !pos '\\';
133133+ incr pos;
134134+ Bytes.unsafe_set buf !pos c;
135135+ incr pos
136136+ | c ->
137137+ Bytes.unsafe_set buf !pos c;
138138+ incr pos
139139+ done;
140140+ string_unsafe_sub buf 0 !pos
141141+;;
142142+143143+let full_split ?(max = 0) ~rex s =
144144+ if String.length s = 0
145145+ then []
146146+ else if max = 1
147147+ then [ Text s ]
148148+ else (
149149+ let results = Re.split_full rex s in
150150+ let matches =
151151+ List.map
152152+ (function
153153+ | `Text s -> [ Text s ]
154154+ | `Delim d ->
155155+ let matches = Re.Group.all_offset d in
156156+ let delim = Re.Group.get d 0 in
157157+ Delim delim
158158+ ::
159159+ (let l = ref [] in
160160+ for i = 1 to Array.length matches - 1 do
161161+ l
162162+ := (if matches.(i) = (-1, -1) then NoGroup else Group (i, Re.Group.get d i))
163163+ :: !l
164164+ done;
165165+ List.rev !l))
166166+ results
167167+ in
168168+ List.concat matches)
169169+;;
170170+171171+type substrings = Group.t
172172+173173+let get_substring s i = Re.Group.get s i
174174+175175+let get_named_substring rex name s =
176176+ match get_named_substring_opt rex name s with
177177+ | None -> raise Not_found
178178+ | Some s -> s
179179+;;
+67
vendor/opam/re/lib/pcre.mli
···11+(** NOTE: Only a subset of the PCRE spec is supported *)
22+33+exception Parse_error
44+exception Not_supported
55+66+type regexp = Core.re
77+88+type flag =
99+ [ `CASELESS
1010+ | `MULTILINE
1111+ | `ANCHORED
1212+ | `DOTALL
1313+ ]
1414+1515+type groups = Core.Group.t
1616+1717+(** Result of a {!Pcre.full_split} *)
1818+type split_result =
1919+ | Text of string (** Text part of splitted string *)
2020+ | Delim of string (** Delimiter part of splitted string *)
2121+ | Group of int * string (** Subgroup of matched delimiter (subgroup_nr, subgroup_str) *)
2222+ | NoGroup (** Unmatched subgroup *)
2323+2424+(** [re ~flags s] creates the regexp [s] using the pcre syntax. *)
2525+val re : ?flags:flag list -> string -> Core.t
2626+2727+val re_result
2828+ : ?flags:flag list
2929+ -> string
3030+ -> (Core.t, [ `Not_supported | `Parse_error ]) result
3131+3232+(** [re ~flags s] compiles the regexp [s] using the pcre syntax. *)
3333+val regexp : ?flags:flag list -> string -> regexp
3434+3535+(** [extract ~rex s] executes [rex] on [s] and returns the matching groups. *)
3636+val extract : rex:regexp -> string -> string array
3737+3838+(** Equivalent to {!Core.exec}. *)
3939+val exec : rex:regexp -> ?pos:int -> string -> groups
4040+4141+(** Equivalent to {!Core.Group.get}. *)
4242+val get_substring : groups -> int -> string
4343+4444+(** Return the names of named groups. *)
4545+val names : regexp -> string array
4646+4747+(** Return the first matched named group, or raise [Not_found]. Prefer to use
4848+ the non-raising version [get_named_substring_opt] *)
4949+val get_named_substring : regexp -> string -> groups -> string
5050+5151+(** Return the first matched named group, or raise [Not_found]. *)
5252+val get_named_substring_opt : regexp -> string -> groups -> string option
5353+5454+(** Equivalent to {!Core.Group.offset}. *)
5555+val get_substring_ofs : groups -> int -> int * int
5656+5757+(** Equivalent to {!Core.execp}. *)
5858+val pmatch : rex:regexp -> string -> bool
5959+6060+val substitute : rex:Core.re -> subst:(string -> string) -> string -> string
6161+val full_split : ?max:int -> rex:regexp -> string -> split_result list
6262+val split : rex:regexp -> string -> string list
6363+val quote : string -> string
6464+6565+(** {2 Deprecated} *)
6666+6767+type substrings = Group.t
+360
vendor/opam/re/lib/perl.ml
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+module Re = Core
2424+2525+exception Parse_error = Parse_buffer.Parse_error
2626+exception Not_supported
2727+2828+let acc_digits =
2929+ let rec loop base digits acc i =
3030+ match digits with
3131+ | [] -> acc
3232+ | d :: digits ->
3333+ let acc = acc + (d * i) in
3434+ let i = i * i in
3535+ loop base digits acc i
3636+ in
3737+ fun ~base ~digits -> loop base digits 0 1
3838+;;
3939+4040+let char_of_int x =
4141+ match char_of_int x with
4242+ | x -> x
4343+ | exception _ -> raise Parse_error
4444+;;
4545+4646+type elem =
4747+ | Char of char
4848+ | Set of Ast.t
4949+5050+let char_b = Char '\008'
5151+let char_newline = Char '\n'
5252+let char_cr = Char '\r'
5353+let char_tab = Char '\t'
5454+let word_char = [ Re.alnum; Re.char '_' ]
5555+let word = Set (Re.alt word_char)
5656+let not_word = Set (Re.alt word_char)
5757+let space = Set Re.space
5858+let not_space = Set (Re.compl [ Re.space ])
5959+let digit = Set Re.digit
6060+let not_digit = Set (Re.compl [ Re.digit ])
6161+6262+let parse ~multiline ~dollar_endonly ~dotall ~ungreedy s =
6363+ let buf = Parse_buffer.create s in
6464+ let accept = Parse_buffer.accept buf in
6565+ let eos () = Parse_buffer.eos buf in
6666+ let test c = Parse_buffer.test buf c in
6767+ let unget () = Parse_buffer.unget buf in
6868+ let get () = Parse_buffer.get buf in
6969+ let greedy_mod r =
7070+ let gr = accept '?' in
7171+ let gr = if ungreedy then not gr else gr in
7272+ if gr then Re.non_greedy r else Re.greedy r
7373+ in
7474+ let rec regexp () = regexp' [ branch () ]
7575+ and regexp' left =
7676+ if accept '|' then regexp' (branch () :: left) else Re.alt (List.rev left)
7777+ and branch () = branch' []
7878+ and branch' left =
7979+ if eos () || test '|' || test ')'
8080+ then Re.seq (List.rev left)
8181+ else branch' (piece () :: left)
8282+ and in_brace ~f ~init =
8383+ match accept '{' with
8484+ | false -> None
8585+ | true ->
8686+ let rec loop acc =
8787+ if accept '}'
8888+ then acc
8989+ else (
9090+ let acc = f acc in
9191+ loop acc)
9292+ in
9393+ Some (loop init)
9494+ and piece () =
9595+ let r = atom () in
9696+ if accept '*'
9797+ then greedy_mod (Re.rep r)
9898+ else if accept '+'
9999+ then greedy_mod (Re.rep1 r)
100100+ else if accept '?'
101101+ then greedy_mod (Re.opt r)
102102+ else if accept '{'
103103+ then (
104104+ match Parse_buffer.integer buf with
105105+ | Some i ->
106106+ let j = if accept ',' then Parse_buffer.integer buf else Some i in
107107+ if not (accept '}') then raise Parse_error;
108108+ (match j with
109109+ | Some j when j < i -> raise Parse_error
110110+ | _ -> ());
111111+ greedy_mod (Re.repn r i j)
112112+ | None ->
113113+ unget ();
114114+ r)
115115+ else r
116116+ and atom () =
117117+ if accept '.'
118118+ then if dotall then Re.any else Re.notnl
119119+ else if accept '('
120120+ then
121121+ if accept '?'
122122+ then
123123+ if accept ':'
124124+ then (
125125+ let r = regexp () in
126126+ if not (accept ')') then raise Parse_error;
127127+ r)
128128+ else if accept '#'
129129+ then comment ()
130130+ else if accept '<'
131131+ then (
132132+ let name = name () in
133133+ let r = regexp () in
134134+ if not (accept ')') then raise Parse_error;
135135+ Re.group ~name r)
136136+ else raise Parse_error
137137+ else (
138138+ let r = regexp () in
139139+ if not (accept ')') then raise Parse_error;
140140+ Re.group r)
141141+ else if accept '^'
142142+ then if multiline then Re.bol else Re.bos
143143+ else if accept '$'
144144+ then if multiline then Re.eol else if dollar_endonly then Re.leol else Re.eos
145145+ else if accept '['
146146+ then if accept '^' then Re.compl (bracket []) else Re.alt (bracket [])
147147+ else if accept '\\'
148148+ then (
149149+ (* XXX
150150+ - Back-references
151151+ - \cx (control-x), \ddd
152152+ *)
153153+ if eos () then raise Parse_error;
154154+ match get () with
155155+ | 'w' -> Re.alt [ Re.alnum; Re.char '_' ]
156156+ | 'W' -> Re.compl [ Re.alnum; Re.char '_' ]
157157+ | 's' -> Re.space
158158+ | 'S' -> Re.compl [ Re.space ]
159159+ | 'd' -> Re.digit
160160+ | 'D' -> Re.compl [ Re.digit ]
161161+ | 'b' -> Re.alt [ Re.bow; Re.eow ]
162162+ | 'B' -> Re.not_boundary
163163+ | 'A' -> Re.bos
164164+ | 'Z' -> Re.leol
165165+ | 'z' -> Re.eos
166166+ | 'G' -> Re.start
167167+ | 'e' -> Re.char '\x1b'
168168+ | 'f' -> Re.char '\x0c'
169169+ | 'n' -> Re.char '\n'
170170+ | 'r' -> Re.char '\r'
171171+ | 't' -> Re.char '\t'
172172+ | 'Q' -> quote (Buffer.create 12)
173173+ | 'E' -> raise Parse_error
174174+ | 'x' ->
175175+ let c1, c2 =
176176+ match in_brace ~init:[] ~f:(fun acc -> hexdigit () :: acc) with
177177+ | Some [ c1; c2 ] -> c1, c2
178178+ | Some [ c2 ] -> 0, c2
179179+ | Some _ -> raise Parse_error
180180+ | None ->
181181+ let c1 = hexdigit () in
182182+ let c2 = hexdigit () in
183183+ c1, c2
184184+ in
185185+ let code = (c1 * 16) + c2 in
186186+ Re.char (char_of_int code)
187187+ | 'o' ->
188188+ (match
189189+ in_brace ~init:[] ~f:(fun acc ->
190190+ match maybe_octaldigit () with
191191+ | None -> raise Parse_error
192192+ | Some p -> p :: acc)
193193+ with
194194+ | None -> raise Parse_error
195195+ | Some digits -> Re.char (char_of_int (acc_digits ~base:8 ~digits)))
196196+ | 'a' .. 'z' | 'A' .. 'Z' -> raise Parse_error
197197+ | '0' .. '7' as n1 ->
198198+ let n2 = maybe_octaldigit () in
199199+ let n3 = maybe_octaldigit () in
200200+ (match n2, n3 with
201201+ | Some n2, Some n3 ->
202202+ let n1 = Char.code n1 - Char.code '0' in
203203+ Re.char (char_of_int ((n1 * (8 * 8)) + (n2 * 8) + n3))
204204+ | _, _ -> raise Not_supported)
205205+ | '8' .. '9' -> raise Not_supported
206206+ | c -> Re.char c)
207207+ else (
208208+ if eos () then raise Parse_error;
209209+ match get () with
210210+ | '*' | '+' | '?' | '{' | '\\' -> raise Parse_error
211211+ | c -> Re.char c)
212212+ and quote buf =
213213+ if accept '\\'
214214+ then (
215215+ if eos () then raise Parse_error;
216216+ match get () with
217217+ | 'E' -> Re.str (Buffer.contents buf)
218218+ | c ->
219219+ Buffer.add_char buf '\\';
220220+ Buffer.add_char buf c;
221221+ quote buf)
222222+ else (
223223+ if eos () then raise Parse_error;
224224+ Buffer.add_char buf (get ());
225225+ quote buf)
226226+ and hexdigit () =
227227+ if eos () then raise Parse_error;
228228+ match get () with
229229+ | '0' .. '9' as d -> Char.code d - Char.code '0'
230230+ | 'a' .. 'f' as d -> Char.code d - Char.code 'a' + 10
231231+ | 'A' .. 'F' as d -> Char.code d - Char.code 'A' + 10
232232+ | _ -> raise Parse_error
233233+ and maybe_octaldigit () =
234234+ if eos ()
235235+ then None
236236+ else (
237237+ match get () with
238238+ | '0' .. '7' as d -> Some (Char.code d - Char.code '0')
239239+ | _ -> None)
240240+ and name () =
241241+ if eos ()
242242+ then raise Parse_error
243243+ else (
244244+ match get () with
245245+ | ('_' | 'a' .. 'z' | 'A' .. 'Z') as c ->
246246+ let b = Buffer.create 32 in
247247+ Buffer.add_char b c;
248248+ name' b
249249+ | _ -> raise Parse_error)
250250+ and name' b =
251251+ if eos ()
252252+ then raise Parse_error
253253+ else (
254254+ match get () with
255255+ | ('_' | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9') as c ->
256256+ Buffer.add_char b c;
257257+ name' b
258258+ | '>' -> Buffer.contents b
259259+ | _ -> raise Parse_error)
260260+ and bracket s =
261261+ if s <> [] && accept ']'
262262+ then s
263263+ else (
264264+ match char () with
265265+ | Set st -> bracket (st :: s)
266266+ | Char c ->
267267+ if accept '-'
268268+ then
269269+ if accept ']'
270270+ then Re.char c :: Re.char '-' :: s
271271+ else
272272+ bracket
273273+ (match char () with
274274+ | Char c' -> Re.rg c c' :: s
275275+ | Set st' -> Re.char c :: Re.char '-' :: st' :: s)
276276+ else bracket (Re.char c :: s))
277277+ and char () =
278278+ if eos () then raise Parse_error;
279279+ let c = get () in
280280+ if c = '['
281281+ then (
282282+ if accept '=' then raise Not_supported;
283283+ match Posix_class.parse buf with
284284+ | Some set -> Set set
285285+ | None ->
286286+ if accept '.'
287287+ then (
288288+ if eos () then raise Parse_error;
289289+ let c = get () in
290290+ if not (accept '.') then raise Not_supported;
291291+ if not (accept ']') then raise Parse_error;
292292+ Char c)
293293+ else Char c)
294294+ else if c = '\\'
295295+ then (
296296+ if eos () then raise Parse_error;
297297+ let c = get () in
298298+ (* XXX
299299+ \127, ...
300300+ *)
301301+ match c with
302302+ | 'b' -> char_b
303303+ | 'n' -> char_newline (*XXX*)
304304+ | 'r' -> char_cr (*XXX*)
305305+ | 't' -> char_tab (*XXX*)
306306+ | 'w' -> word
307307+ | 'W' -> not_word
308308+ | 's' -> space
309309+ | 'S' -> not_space
310310+ | 'd' -> digit
311311+ | 'D' -> not_digit
312312+ | 'a' .. 'z' | 'A' .. 'Z' -> raise Parse_error
313313+ | '0' .. '9' -> raise Not_supported
314314+ | _ -> Char c)
315315+ else Char c
316316+ and comment () =
317317+ if eos () then raise Parse_error;
318318+ if accept ')'
319319+ then Re.epsilon
320320+ else (
321321+ Parse_buffer.junk buf;
322322+ comment ())
323323+ in
324324+ let res = regexp () in
325325+ if not (eos ()) then raise Parse_error;
326326+ res
327327+;;
328328+329329+type opt =
330330+ [ `Ungreedy
331331+ | `Dotall
332332+ | `Dollar_endonly
333333+ | `Multiline
334334+ | `Anchored
335335+ | `Caseless
336336+ ]
337337+338338+let re ?(opts = []) s =
339339+ let r =
340340+ parse
341341+ ~multiline:(List.memq `Multiline opts)
342342+ ~dollar_endonly:(List.memq `Dollar_endonly opts)
343343+ ~dotall:(List.memq `Dotall opts)
344344+ ~ungreedy:(List.memq `Ungreedy opts)
345345+ s
346346+ in
347347+ let r = if List.memq `Anchored opts then Re.seq [ Re.start; r ] else r in
348348+ let r = if List.memq `Caseless opts then Re.no_case r else r in
349349+ r
350350+;;
351351+352352+let compile = Re.compile
353353+let compile_pat ?(opts = []) s = compile (re ~opts s)
354354+355355+let re_result ?opts s =
356356+ match re ?opts s with
357357+ | s -> Ok s
358358+ | exception Not_supported -> Error `Not_supported
359359+ | exception Parse_error -> Error `Parse_error
360360+;;
+51
vendor/opam/re/lib/perl.mli
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+(** Perl-style regular expressions *)
2424+2525+exception Parse_error
2626+2727+(** Errors that can be raised during the parsing of the regular expression *)
2828+exception Not_supported
2929+3030+type opt =
3131+ [ `Ungreedy
3232+ | `Dotall
3333+ | `Dollar_endonly
3434+ | `Multiline
3535+ | `Anchored
3636+ | `Caseless
3737+ ]
3838+3939+(** Parsing of a Perl-style regular expression *)
4040+val re : ?opts:opt list -> string -> Core.t
4141+4242+val re_result
4343+ : ?opts:opt list
4444+ -> string
4545+ -> (Core.t, [ `Not_supported | `Parse_error ]) result
4646+4747+(** (Same as [Re.compile]) *)
4848+val compile : Core.t -> Core.re
4949+5050+(** Regular expression compilation *)
5151+val compile_pat : ?opts:opt list -> string -> Core.re
+24
vendor/opam/re/lib/pmark.ml
···11+module Pmark = struct
22+ type t = int
33+44+ let equal (x : int) (y : int) = x = y
55+ let compare (x : int) (y : int) = compare x y
66+ let r = Atomic.make 1
77+ let gen () = Atomic.fetch_and_add r 1
88+ let pp = Format.pp_print_int
99+end
1010+1111+include Pmark
1212+1313+module Set = struct
1414+ module Set = Set.Make (Pmark)
1515+1616+ let[@warning "-32"] to_list x =
1717+ let open Set in
1818+ to_seq x |> List.of_seq
1919+ ;;
2020+2121+ include Set
2222+end
2323+2424+let to_dyn = Dyn.int
+13
vendor/opam/re/lib/pmark.mli
···11+type t = private int
22+33+val equal : t -> t -> bool
44+val compare : t -> t -> int
55+val gen : unit -> t
66+val pp : t Fmt.t
77+val to_dyn : t -> Dyn.t
88+99+module Set : sig
1010+ include Set.S with type elt = t
1111+1212+ val to_list : t -> elt list
1313+end
+163
vendor/opam/re/lib/posix.ml
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+(*
2424+ What we could (should?) do:
2525+ - a* ==> longest ((shortest (no_group a)* ), a | ()) (!!!)
2626+ - abc understood as (ab)c
2727+ - "((a?)|b)" against "ab" should not bind the first subpattern to anything
2828+2929+ Note that it should be possible to handle "(((ab)c)d)e" efficiently
3030+*)
3131+module Re = Core
3232+3333+exception Parse_error = Parse_buffer.Parse_error
3434+exception Not_supported
3535+3636+let parse newline s =
3737+ let buf = Parse_buffer.create s in
3838+ let accept = Parse_buffer.accept buf in
3939+ let eos () = Parse_buffer.eos buf in
4040+ let test c = Parse_buffer.test buf c in
4141+ let unget () = Parse_buffer.unget buf in
4242+ let get () = Parse_buffer.get buf in
4343+ let rec regexp () = regexp' [ branch () ]
4444+ and regexp' left =
4545+ if accept '|' then regexp' (branch () :: left) else Re.alt (List.rev left)
4646+ and branch () = branch' []
4747+ and branch' left =
4848+ if eos () || test '|' || test ')'
4949+ then Re.seq (List.rev left)
5050+ else branch' (piece () :: left)
5151+ and piece () =
5252+ let r = atom () in
5353+ if accept '*'
5454+ then Re.rep (Re.nest r)
5555+ else if accept '+'
5656+ then Re.rep1 (Re.nest r)
5757+ else if accept '?'
5858+ then Re.opt r
5959+ else if accept '{'
6060+ then (
6161+ match Parse_buffer.integer buf with
6262+ | Some i ->
6363+ let j = if accept ',' then Parse_buffer.integer buf else Some i in
6464+ if not (accept '}') then raise Parse_error;
6565+ (match j with
6666+ | Some j when j < i -> raise Parse_error
6767+ | _ -> ());
6868+ Re.repn (Re.nest r) i j
6969+ | None ->
7070+ unget ();
7171+ r)
7272+ else r
7373+ and atom () =
7474+ if accept '.'
7575+ then if newline then Re.notnl else Re.any
7676+ else if accept '('
7777+ then (
7878+ let r = regexp () in
7979+ if not (accept ')') then raise Parse_error;
8080+ Re.group r)
8181+ else if accept '^'
8282+ then if newline then Re.bol else Re.bos
8383+ else if accept '$'
8484+ then if newline then Re.eol else Re.eos
8585+ else if accept '['
8686+ then
8787+ if accept '^'
8888+ then Re.diff (Re.compl (bracket [])) (Re.char '\n')
8989+ else Re.alt (bracket [])
9090+ else if accept '\\'
9191+ then (
9292+ if eos () then raise Parse_error;
9393+ match get () with
9494+ | ('|' | '(' | ')' | '*' | '+' | '?' | '[' | '.' | '^' | '$' | '{' | '\\') as c ->
9595+ Re.char c
9696+ | _ -> raise Parse_error)
9797+ else (
9898+ if eos () then raise Parse_error;
9999+ match get () with
100100+ | '*' | '+' | '?' | '{' | '\\' -> raise Parse_error
101101+ | c -> Re.char c)
102102+ and bracket s =
103103+ if s <> [] && accept ']'
104104+ then s
105105+ else (
106106+ match char () with
107107+ | `Set st -> bracket (st :: s)
108108+ | `Char c ->
109109+ if accept '-'
110110+ then
111111+ if accept ']'
112112+ then Re.char c :: Re.char '-' :: s
113113+ else
114114+ bracket
115115+ (match char () with
116116+ | `Char c' -> Re.rg c c' :: s
117117+ | `Set st' -> Re.char c :: Re.char '-' :: st' :: s)
118118+ else bracket (Re.char c :: s))
119119+ and char () =
120120+ if eos () then raise Parse_error;
121121+ let c = get () in
122122+ if c = '['
123123+ then (
124124+ match Posix_class.parse buf with
125125+ | Some set -> `Set set
126126+ | None ->
127127+ if accept '.'
128128+ then (
129129+ if eos () then raise Parse_error;
130130+ let c = get () in
131131+ if not (accept '.') then raise Not_supported;
132132+ if not (accept ']') then raise Parse_error;
133133+ `Char c)
134134+ else `Char c)
135135+ else `Char c
136136+ in
137137+ let res = regexp () in
138138+ if not (eos ()) then raise Parse_error;
139139+ res
140140+;;
141141+142142+type opt =
143143+ [ `ICase
144144+ | `NoSub
145145+ | `Newline
146146+ ]
147147+148148+let re ?(opts = []) s =
149149+ let r = parse (List.memq `Newline opts) s in
150150+ let r = if List.memq `ICase opts then Re.no_case r else r in
151151+ let r = if List.memq `NoSub opts then Re.no_group r else r in
152152+ r
153153+;;
154154+155155+let re_result ?opts s =
156156+ match re ?opts s with
157157+ | s -> Ok s
158158+ | exception Not_supported -> Error `Not_supported
159159+ | exception Parse_error -> Error `Parse_error
160160+;;
161161+162162+let compile re = Re.compile (Re.longest re)
163163+let compile_pat ?(opts = []) s = compile (re ~opts s)
+107
vendor/opam/re/lib/posix.mli
···11+(*
22+ RE - A regular expression library
33+44+ Copyright (C) 2001 Jerome Vouillon
55+ email: Jerome.Vouillon@pps.jussieu.fr
66+77+ This library is free software; you can redistribute it and/or
88+ modify it under the terms of the GNU Lesser General Public
99+ License as published by the Free Software Foundation, with
1010+ linking exception; either version 2.1 of the License, or (at
1111+ your option) any later version.
1212+1313+ This library is distributed in the hope that it will be useful,
1414+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1515+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1616+ Lesser General Public License for more details.
1717+1818+ You should have received a copy of the GNU Lesser General Public
1919+ License along with this library; if not, write to the Free Software
2020+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2121+*)
2222+2323+(** References:
2424+ - {{:http://www.opengroup.org/onlinepubs/007908799/xbd/re.html} re}
2525+ - {{:http://www.opengroup.org/onlinepubs/007908799/xsh/regcomp.html} regcomp}
2626+2727+ Example of how to use this module (to parse some IRC logs):
2828+2929+ {[
3030+ type msg =
3131+ { time : string
3232+ ; author : string
3333+ ; content : string
3434+ }
3535+3636+ let re = Core.compile (Re_posix.re "([^:].*:[^:]*:[^:]{2})<.([^>]+)> (.+)$")
3737+3838+ (* parse a line *)
3939+ let match_line line =
4040+ try
4141+ let substrings = Core.exec re line in
4242+ let groups = Core.get_all substrings in
4343+ (* groups can be obtained directly by index within [substrings] *)
4444+ Some { time = groups.(1); author = groups.(2); content = groups.(3) }
4545+ with
4646+ | Not_found -> None (* regex didn't match *)
4747+ ;;
4848+ ]} *)
4949+5050+(* XXX Character classes *)
5151+5252+exception Parse_error
5353+5454+(** Errors that can be raised during the parsing of the regular expression *)
5555+exception Not_supported
5656+5757+type opt =
5858+ [ `ICase
5959+ | `NoSub
6060+ | `Newline
6161+ ]
6262+6363+(** Parsing of a Posix extended regular expression *)
6464+val re : ?opts:opt list -> string -> Core.t
6565+6666+val re_result
6767+ : ?opts:opt list
6868+ -> string
6969+ -> (Core.t, [ `Not_supported | `Parse_error ]) result
7070+7171+(** [compile r] is defined as [Core.compile (Core.longest r)] *)
7272+val compile : Core.t -> Core.re
7373+7474+(** [compile_pat ?opts regex] compiles the Posix extended regular expression [regexp] *)
7575+val compile_pat : ?opts:opt list -> string -> Core.re
7676+7777+(*
7878+ Deviation from the standard / ambiguities in the standard
7979+ ---------------------------------------------------------
8080+ We tested the behavior of the Linux library (glibc) and the Solaris
8181+ library.
8282+8383+ (1) An expression [efg] should be parsed as [(ef)g].
8484+ All implementations parse it as [e(fg)].
8585+ (2) When matching the pattern "((a)|b)*" against the string "ab",
8686+ the sub-expression "((a)|b)" should match "b", and the
8787+ sub-expression "(a)" should not match anything.
8888+ In both implementation, the sub-expression "(a)" matches "a".
8989+ (3) When matching the pattern "(aa?)*" against the string "aaa", it is
9090+ not clear whether the final match of the sub-expression "(aa?)" is
9191+ the last "a" (all matches of the sub-expression are successively
9292+ maximized), or "aa" (the final match is maximized).
9393+ Both implementations implements the first case.
9494+ (4) When matching the pattern "((a?)|b)*" against the string "ab",
9595+ the sub-expression "((a?)|b)" should match the empty string at the
9696+ end of the string (it is better to match the empty string than to
9797+ match nothing).
9898+ In both implementations, this sub-expression matches "b".
9999+ (Strangely, in the Linux implementation, the sub-expression "(a?)"
100100+ correctly matches the empty string at the end of the string)
101101+102102+ This library behaves the same way as the other libraries for all
103103+ points, except for (2) and (4) where it follows the standard.
104104+105105+ The behavior of this library in theses four cases may change in future
106106+ releases.
107107+*)
···11+let replace ?(pos = 0) ?len ?(all = true) re ~f s =
22+ if pos < 0 then invalid_arg "Re.replace";
33+ let limit =
44+ match len with
55+ | None -> String.length s
66+ | Some l ->
77+ if l < 0 || pos + l > String.length s then invalid_arg "Re.replace";
88+ pos + l
99+ in
1010+ (* buffer into which we write the result *)
1111+ let buf = Buffer.create (String.length s) in
1212+ (* iterate on matched substrings. *)
1313+ let rec iter pos on_match =
1414+ if pos <= limit
1515+ then (
1616+ match
1717+ Compile.match_str ~groups:true ~partial:false re s ~pos ~len:(limit - pos)
1818+ with
1919+ | Match substr ->
2020+ let p1 = Group.start_offset substr 0 |> Group.Offset.get_no_check in
2121+ let p2 = Group.stop_offset substr 0 |> Group.Offset.get_no_check in
2222+ if pos = p1 && p1 = p2 && on_match
2323+ then (
2424+ (* if we matched an empty string right after a match,
2525+ we must manually advance by 1 *)
2626+ if p2 < limit then Buffer.add_char buf s.[p2];
2727+ iter (p2 + 1) false)
2828+ else (
2929+ (* add string between previous match and current match *)
3030+ Buffer.add_substring buf s pos (p1 - pos);
3131+ (* what should we replace the matched group with? *)
3232+ let replacing = f substr in
3333+ Buffer.add_string buf replacing;
3434+ if all
3535+ then
3636+ (* if we matched an empty string, we must manually advance by 1 *)
3737+ iter
3838+ (if p1 = p2
3939+ then (
4040+ (* a non char could be past the end of string. e.g. $ *)
4141+ if p2 < limit then Buffer.add_char buf s.[p2];
4242+ p2 + 1)
4343+ else p2)
4444+ (p1 <> p2)
4545+ else Buffer.add_substring buf s p2 (limit - p2))
4646+ | Running _ -> ()
4747+ | Failed -> Buffer.add_substring buf s pos (limit - pos))
4848+ in
4949+ iter pos false;
5050+ Buffer.contents buf
5151+;;
5252+5353+let replace_string ?pos ?len ?all re ~by s = replace ?pos ?len ?all re s ~f:(fun _ -> by)
+35
vendor/opam/re/lib/replace.mli
···11+(** [replace ~all re ~f s] iterates on [s], and replaces every occurrence
22+ of [re] with [f substring] where [substring] is the current match.
33+ If [all = false], then only the first occurrence of [re] is replaced. *)
44+val replace
55+ : ?pos:int (** Default: 0 *)
66+ -> ?len:int
77+ -> ?all:bool (** Default: true. Otherwise only replace first occurrence *)
88+ -> Compile.re (** matched groups *)
99+ -> f:(Group.t -> string) (** how to replace *)
1010+ -> string (** string to replace in *)
1111+ -> string
1212+1313+(** [replace_string ~all re ~by s] iterates on [s], and replaces every
1414+ occurrence of [re] with [by]. If [all = false], then only the first
1515+ occurrence of [re] is replaced.
1616+1717+ {5 Examples:}
1818+ {[
1919+ # let regex = Re.compile (Re.char ',');;
2020+ val regex : re = <abstr>
2121+2222+ # Re.replace_string regex ~by:";" "[1,2,3,4,5,6,7]";;
2323+ - : string = "[1;2;3;4;5;6;7]"
2424+2525+ # Re.replace_string regex ~all:false ~by:";" "[1,2,3,4,5,6,7]";;
2626+ - : string = "[1;2,3,4,5,6,7]"
2727+ ]} *)
2828+val replace_string
2929+ : ?pos:int (** Default: 0 *)
3030+ -> ?len:int
3131+ -> ?all:bool (** Default: true. Otherwise only replace first occurrence *)
3232+ -> Compile.re (** matched groups *)
3333+ -> by:string (** replacement string *)
3434+ -> string (** string to replace in *)
3535+ -> string
+114
vendor/opam/re/lib/search.ml
···11+let all ?(pos = 0) ?len re s : _ Seq.t =
22+ if pos < 0 then invalid_arg "Re.all";
33+ (* index of the first position we do not consider.
44+ !pos < limit is an invariant *)
55+ let limit =
66+ match len with
77+ | None -> String.length s
88+ | Some l ->
99+ if l < 0 || pos + l > String.length s then invalid_arg "Re.all";
1010+ pos + l
1111+ in
1212+ (* iterate on matches. When a match is found, search for the next
1313+ one just after its end *)
1414+ let rec aux pos on_match () =
1515+ if pos > limit
1616+ then Seq.Nil (* no more matches *)
1717+ else (
1818+ match
1919+ Compile.match_str ~groups:true ~partial:false re s ~pos ~len:(limit - pos)
2020+ with
2121+ | Match substr ->
2222+ let p1 = Group.start_offset substr 0 |> Group.Offset.get_no_check in
2323+ let p2 = Group.stop_offset substr 0 |> Group.Offset.get_no_check in
2424+ if on_match && p1 = pos && p1 = p2
2525+ then (* skip empty match right after a match *)
2626+ aux (pos + 1) false ()
2727+ else (
2828+ let pos = if p1 = p2 then p2 + 1 else p2 in
2929+ Seq.Cons (substr, aux pos (p1 <> p2)))
3030+ | Running _ | Failed -> Seq.Nil)
3131+ in
3232+ aux pos false
3333+;;
3434+3535+let matches ?pos ?len re s : _ Seq.t =
3636+ all ?pos ?len re s |> Seq.map (fun sub -> Group.get sub 0)
3737+;;
3838+3939+let split_full ?(pos = 0) ?len re s : _ Seq.t =
4040+ if pos < 0 then invalid_arg "Re.split";
4141+ let limit =
4242+ match len with
4343+ | None -> String.length s
4444+ | Some l ->
4545+ if l < 0 || pos + l > String.length s then invalid_arg "Re.split";
4646+ pos + l
4747+ in
4848+ (* i: start of delimited string
4949+ pos: first position after last match of [re]
5050+ limit: first index we ignore (!pos < limit is an invariant) *)
5151+ let pos0 = pos in
5252+ let rec aux state i pos () =
5353+ match state with
5454+ | `Idle when pos > limit ->
5555+ (* We had an empty match at the end of the string *)
5656+ assert (i = limit);
5757+ Seq.Nil
5858+ | `Idle ->
5959+ (match
6060+ Compile.match_str ~groups:true ~partial:false re s ~pos ~len:(limit - pos)
6161+ with
6262+ | Match substr ->
6363+ let p1 = Group.start_offset substr 0 |> Group.Offset.get_no_check in
6464+ let p2 = Group.stop_offset substr 0 |> Group.Offset.get_no_check in
6565+ let pos = if p1 = p2 then p2 + 1 else p2 in
6666+ let old_i = i in
6767+ let i = p2 in
6868+ if old_i = p1 && p1 = p2 && p1 > pos0
6969+ then (* Skip empty match right after a delimiter *)
7070+ aux state i pos ()
7171+ else if p1 > pos0
7272+ then (
7373+ (* string does not start by a delimiter *)
7474+ let text = String.sub s old_i (p1 - old_i) in
7575+ let state = `Yield (`Delim substr) in
7676+ Seq.Cons (`Text text, aux state i pos))
7777+ else Seq.Cons (`Delim substr, aux state i pos)
7878+ | Running _ -> Seq.Nil
7979+ | Failed ->
8080+ if i < limit
8181+ then (
8282+ let text = String.sub s i (limit - i) in
8383+ (* yield last string *)
8484+ Seq.Cons (`Text text, aux state limit pos))
8585+ else Seq.Nil)
8686+ | `Yield x -> Seq.Cons (x, aux `Idle i pos)
8787+ in
8888+ aux `Idle pos pos
8989+;;
9090+9191+let split ?pos ?len re s : _ Seq.t =
9292+ let seq = split_full ?pos ?len re s in
9393+ let rec filter seq () =
9494+ match seq () with
9595+ | Seq.Nil -> Seq.Nil
9696+ | Seq.Cons (`Delim _, tl) -> filter tl ()
9797+ | Seq.Cons (`Text s, tl) -> Seq.Cons (s, filter tl)
9898+ in
9999+ filter seq
100100+;;
101101+102102+let split_delim ?pos ?len re s : _ Seq.t =
103103+ let seq = split_full ?pos ?len re s in
104104+ let rec filter ~delim seq () =
105105+ match seq () with
106106+ | Seq.Nil -> if delim then Seq.Cons ("", fun () -> Seq.Nil) else Seq.Nil
107107+ | Seq.Cons (`Delim _, tl) ->
108108+ if delim
109109+ then Seq.Cons ("", fun () -> filter ~delim:true tl ())
110110+ else filter ~delim:true tl ()
111111+ | Seq.Cons (`Text s, tl) -> Seq.Cons (s, filter ~delim:false tl)
112112+ in
113113+ filter ~delim:true seq
114114+;;
+70
vendor/opam/re/lib/slice.ml
···11+open Import
22+33+type t =
44+ { s : string
55+ ; pos : int
66+ ; len : int
77+ }
88+99+module L = struct
1010+ type nonrec t = t list
1111+1212+ let get_substring slices ~start ~stop =
1313+ if stop = start
1414+ then ""
1515+ else (
1616+ let slices =
1717+ let rec drop slices remains =
1818+ if remains = 0
1919+ then slices
2020+ else (
2121+ match slices with
2222+ | [] -> assert false
2323+ | ({ s = _; pos; len } as slice) :: xs ->
2424+ let remains' = remains - len in
2525+ if remains' >= 0
2626+ then drop xs remains'
2727+ else (
2828+ let pos = pos + remains in
2929+ let len = len - remains in
3030+ { slice with pos; len } :: xs))
3131+ in
3232+ drop slices start
3333+ in
3434+ let buf = Buffer.create (stop - start) in
3535+ let rec take slices remains =
3636+ if remains > 0
3737+ then (
3838+ match slices with
3939+ | [] -> assert false
4040+ | { s; pos; len } :: xs ->
4141+ let remains' = remains - len in
4242+ if remains' > 0
4343+ then (
4444+ Buffer.add_substring buf s pos len;
4545+ take xs remains')
4646+ else Buffer.add_substring buf s pos remains)
4747+ in
4848+ take slices (stop - start);
4949+ Buffer.contents buf)
5050+ ;;
5151+5252+ let rec drop t remains =
5353+ if remains = 0
5454+ then t
5555+ else (
5656+ match t with
5757+ | [] -> []
5858+ | ({ s = _; pos; len } as slice) :: t ->
5959+ if remains >= len
6060+ then drop t (remains - len)
6161+ else (
6262+ let delta = len - remains in
6363+ { slice with pos = pos + delta; len = len - delta } :: t))
6464+ ;;
6565+6666+ let drop_rev t remains =
6767+ (* TODO Use a proper functional queue *)
6868+ if remains = 0 then t else List.rev (drop (List.rev t) remains)
6969+ ;;
7070+end
+12
vendor/opam/re/lib/slice.mli
···11+type t =
22+ { s : string
33+ ; pos : int
44+ ; len : int
55+ }
66+77+module L : sig
88+ type nonrec t = t list
99+1010+ val get_substring : t -> start:int -> stop:int -> string
1111+ val drop_rev : t -> int -> t
1212+end
+299
vendor/opam/re/lib/str.ml
···11+(***********************************************************************)
22+(* *)
33+(* Objective Caml *)
44+(* *)
55+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
66+(* *)
77+(* Copyright 1996 Institut National de Recherche en Informatique et *)
88+(* en Automatique. All rights reserved. This file is distributed *)
99+(* under the terms of the GNU Library General Public License, with *)
1010+(* linking exception. *)
1111+(* *)
1212+(***********************************************************************)
1313+1414+(* Modified by Jerome.Vouillon@pps.jussieu.fr for integration in RE *)
1515+1616+(* $Id: re_str.ml,v 1.3 2002/07/03 15:47:54 vouillon Exp $ *)
1717+1818+module Ast = Ast.Export
1919+2020+include struct
2121+ open Core
2222+2323+ let exec = exec
2424+ let exec_partial = exec_partial
2525+end
2626+2727+type regexp =
2828+ { mtch : Compile.re Lazy.t
2929+ ; srch : Compile.re Lazy.t
3030+ }
3131+3232+let compile_regexp s c =
3333+ let re = Emacs.re_no_emacs ~case:(not c) s in
3434+ { mtch = lazy (Compile.compile (Ast.seq [ Ast.start; re ]))
3535+ ; srch = lazy (Compile.compile re)
3636+ }
3737+;;
3838+3939+let state = Domain.DLS.new_key (fun () -> None)
4040+4141+let string_match re s p =
4242+ match exec ~pos:p (Lazy.force re.mtch) s with
4343+ | res ->
4444+ Domain.DLS.set state (Some res);
4545+ true
4646+ | exception Not_found ->
4747+ Domain.DLS.set state None;
4848+ false
4949+;;
5050+5151+let string_partial_match re s p =
5252+ match exec_partial ~pos:p (Lazy.force re.mtch) s with
5353+ | `Full -> string_match re s p
5454+ | `Partial -> true
5555+ | `Mismatch -> false
5656+;;
5757+5858+let search_forward re s p =
5959+ match exec ~pos:p (Lazy.force re.srch) s with
6060+ | res ->
6161+ Domain.DLS.set state (Some res);
6262+ fst (Group.offset res 0)
6363+ | exception Not_found ->
6464+ Domain.DLS.set state None;
6565+ raise Not_found
6666+;;
6767+6868+let rec search_backward re s p =
6969+ match exec ~pos:p (Lazy.force re.mtch) s with
7070+ | res ->
7171+ Domain.DLS.set state (Some res);
7272+ p
7373+ | exception Not_found ->
7474+ Domain.DLS.set state None;
7575+ if p = 0 then raise Not_found else search_backward re s (p - 1)
7676+;;
7777+7878+let valid_group n =
7979+ n >= 0
8080+ && n < 10
8181+ &&
8282+ match Domain.DLS.get state with
8383+ | None -> false
8484+ | Some m -> n < Group.nb_groups m
8585+;;
8686+8787+let offset_group i =
8888+ match Domain.DLS.get state with
8989+ | Some m -> Group.offset m i
9090+ | None -> raise Not_found
9191+;;
9292+9393+let group_len i =
9494+ match offset_group i with
9595+ | b, e -> e - b
9696+ | exception Not_found -> 0
9797+;;
9898+9999+let rec repl_length repl p q len =
100100+ if p < len
101101+ then
102102+ if repl.[p] <> '\\'
103103+ then repl_length repl (p + 1) (q + 1) len
104104+ else (
105105+ let p = p + 1 in
106106+ if p = len then failwith "Str.replace: illegal backslash sequence";
107107+ let q =
108108+ match repl.[p] with
109109+ | '\\' -> q + 1
110110+ | '0' .. '9' as c -> q + group_len (Char.code c - Char.code '0')
111111+ | _ -> q + 2
112112+ in
113113+ repl_length repl (p + 1) q len)
114114+ else q
115115+;;
116116+117117+let rec replace orig repl p res q len =
118118+ if p < len
119119+ then (
120120+ let c = repl.[p] in
121121+ if c <> '\\'
122122+ then (
123123+ Bytes.set res q c;
124124+ replace orig repl (p + 1) res (q + 1) len)
125125+ else (
126126+ match repl.[p + 1] with
127127+ | '\\' ->
128128+ Bytes.set res q '\\';
129129+ replace orig repl (p + 2) res (q + 1) len
130130+ | '0' .. '9' as c ->
131131+ let d =
132132+ let group = Char.code c - Char.code '0' in
133133+ match offset_group group with
134134+ | exception Not_found -> 0
135135+ | b, e ->
136136+ let d = e - b in
137137+ if d > 0 then String.blit orig b res q d;
138138+ d
139139+ in
140140+ replace orig repl (p + 2) res (q + d) len
141141+ | c ->
142142+ Bytes.set res q '\\';
143143+ Bytes.set res (q + 1) c;
144144+ replace orig repl (p + 2) res (q + 2) len))
145145+;;
146146+147147+let replacement_text repl orig =
148148+ let len = String.length repl in
149149+ let res = Bytes.create (repl_length repl 0 0 len) in
150150+ replace orig repl 0 res 0 (String.length repl);
151151+ Bytes.unsafe_to_string res
152152+;;
153153+154154+let quote s =
155155+ let len = String.length s in
156156+ let buf = Buffer.create (2 * len) in
157157+ for i = 0 to len - 1 do
158158+ match s.[i] with
159159+ | ('[' | ']' | '*' | '.' | '\\' | '?' | '+' | '^' | '$') as c ->
160160+ Buffer.add_char buf '\\';
161161+ Buffer.add_char buf c
162162+ | c -> Buffer.add_char buf c
163163+ done;
164164+ Buffer.contents buf
165165+;;
166166+167167+let string_before s n = String.sub s 0 n
168168+let string_after s n = String.sub s n (String.length s - n)
169169+let first_chars s n = String.sub s 0 n
170170+let last_chars s n = String.sub s (String.length s - n) n
171171+let regexp e = compile_regexp e false
172172+let regexp_case_fold e = compile_regexp e true
173173+let regexp_string s = compile_regexp (quote s) false
174174+let regexp_string_case_fold s = compile_regexp (quote s) true
175175+176176+let group_beginning n =
177177+ if not (valid_group n) then invalid_arg "Str.group_beginning";
178178+ let pos = fst (offset_group n) in
179179+ if pos = -1 then raise Not_found else pos
180180+;;
181181+182182+let group_end n =
183183+ if not (valid_group n) then invalid_arg "Str.group_end";
184184+ let pos = snd (offset_group n) in
185185+ if pos = -1 then raise Not_found else pos
186186+;;
187187+188188+let matched_group n txt =
189189+ let b, e = offset_group n in
190190+ String.sub txt b (e - b)
191191+;;
192192+193193+let replace_matched repl matched = replacement_text repl matched
194194+195195+let match_beginning () = group_beginning 0
196196+and match_end () = group_end 0
197197+and matched_string txt = matched_group 0 txt
198198+199199+let substitute_first expr repl_fun text =
200200+ try
201201+ let pos = search_forward expr text 0 in
202202+ String.concat
203203+ ""
204204+ [ string_before text pos; repl_fun text; string_after text (match_end ()) ]
205205+ with
206206+ | Not_found -> text
207207+;;
208208+209209+let global_substitute expr repl_fun text =
210210+ let rec replace accu start last_was_empty =
211211+ let startpos = if last_was_empty then start + 1 else start in
212212+ if startpos > String.length text
213213+ then string_after text start :: accu
214214+ else (
215215+ match search_forward expr text startpos with
216216+ | pos ->
217217+ let end_pos = match_end () in
218218+ let repl_text = repl_fun text in
219219+ replace
220220+ (repl_text :: String.sub text start (pos - start) :: accu)
221221+ end_pos
222222+ (end_pos = pos)
223223+ | exception Not_found -> string_after text start :: accu)
224224+ in
225225+ String.concat "" (List.rev (replace [] 0 false))
226226+;;
227227+228228+let global_replace expr repl text = global_substitute expr (replacement_text repl) text
229229+and replace_first expr repl text = substitute_first expr (replacement_text repl) text
230230+231231+let search_forward_progress re s p =
232232+ let pos = search_forward re s p in
233233+ if match_end () > p
234234+ then pos
235235+ else if p < String.length s
236236+ then search_forward re s (p + 1)
237237+ else raise Not_found
238238+;;
239239+240240+let bounded_split expr text num =
241241+ let start = if string_match expr text 0 then match_end () else 0 in
242242+ let rec split accu start n =
243243+ if start >= String.length text
244244+ then accu
245245+ else if n = 1
246246+ then string_after text start :: accu
247247+ else (
248248+ match search_forward_progress expr text start with
249249+ | pos -> split (String.sub text start (pos - start) :: accu) (match_end ()) (n - 1)
250250+ | exception Not_found -> string_after text start :: accu)
251251+ in
252252+ List.rev (split [] start num)
253253+;;
254254+255255+let split expr text = bounded_split expr text 0
256256+257257+let bounded_split_delim expr text num =
258258+ let rec split accu start n =
259259+ if start > String.length text
260260+ then accu
261261+ else if n = 1
262262+ then string_after text start :: accu
263263+ else (
264264+ match search_forward_progress expr text start with
265265+ | pos -> split (String.sub text start (pos - start) :: accu) (match_end ()) (n - 1)
266266+ | exception Not_found -> string_after text start :: accu)
267267+ in
268268+ if text = "" then [] else List.rev (split [] 0 num)
269269+;;
270270+271271+let split_delim expr text = bounded_split_delim expr text 0
272272+273273+type split_result =
274274+ | Text of string
275275+ | Delim of string
276276+277277+let bounded_full_split expr text num =
278278+ let rec split accu start n =
279279+ if start >= String.length text
280280+ then accu
281281+ else if n = 1
282282+ then Text (string_after text start) :: accu
283283+ else (
284284+ match search_forward_progress expr text start with
285285+ | pos ->
286286+ let s = matched_string text in
287287+ if pos > start
288288+ then
289289+ split
290290+ (Delim s :: Text (String.sub text start (pos - start)) :: accu)
291291+ (match_end ())
292292+ (n - 1)
293293+ else split (Delim s :: accu) (match_end ()) (n - 1)
294294+ | exception Not_found -> Text (string_after text start) :: accu)
295295+ in
296296+ List.rev (split [] 0 num)
297297+;;
298298+299299+let full_split expr text = bounded_full_split expr text 0
+220
vendor/opam/re/lib/str.mli
···11+(***********************************************************************)
22+(* *)
33+(* Objective Caml *)
44+(* *)
55+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
66+(* *)
77+(* Copyright 1996 Institut National de Recherche en Informatique et *)
88+(* en Automatique. All rights reserved. This file is distributed *)
99+(* under the terms of the GNU Library General Public License, with *)
1010+(* linking exception. *)
1111+(* *)
1212+(***********************************************************************)
1313+1414+(* $Id: re_str.mli,v 1.1 2002/01/16 14:16:04 vouillon Exp $ *)
1515+1616+(** Module [Str]: regular expressions and high-level string processing *)
1717+1818+(** {2 Regular expressions} *)
1919+2020+(** The type of compiled regular expressions. *)
2121+type regexp
2222+2323+(** Compile a regular expression. The syntax for regular expressions
2424+ is the same as in Gnu Emacs. The special characters are
2525+ [$^.*+?[]]. The following constructs are recognized:
2626+ - [. ] matches any character except newline
2727+ - [* ] (postfix) matches the previous expression zero, one or
2828+ several times
2929+ - [+ ] (postfix) matches the previous expression one or
3030+ several times
3131+ - [? ] (postfix) matches the previous expression once or
3232+ not at all
3333+ - [[..] ] character set; ranges are denoted with [-], as in [[a-z]];
3434+ an initial [^], as in [[^0-9]], complements the set
3535+ - [^ ] matches at beginning of line
3636+ - [$ ] matches at end of line
3737+ - [\| ] (infix) alternative between two expressions
3838+ - [\(..\)] grouping and naming of the enclosed expression
3939+ - [\1 ] the text matched by the first [\(...\)] expression
4040+ ([\2] for the second expression, etc)
4141+ - [\b ] matches word boundaries
4242+ - [\ ] quotes special characters. *)
4343+val regexp : string -> regexp
4444+4545+(** Same as [regexp], but the compiled expression will match text
4646+ in a case-insensitive way: uppercase and lowercase letters will
4747+ be considered equivalent. *)
4848+val regexp_case_fold : string -> regexp
4949+5050+(** [Str.quote s] returns a regexp string that matches exactly
5151+ [s] and nothing else. *)
5252+val quote : string -> string
5353+5454+(** [Str.regexp_string s] returns a regular expression
5555+ that matches exactly [s] and nothing else. *)
5656+val regexp_string : string -> regexp
5757+5858+(** [Str.regexp_string_case_fold] is similar to [Str.regexp_string], but the regexp
5959+ matches in a case-insensitive way. *)
6060+val regexp_string_case_fold : string -> regexp
6161+6262+(** {2 String matching and searching} *)
6363+6464+(** [string_match r s start] tests whether the characters in [s]
6565+ starting at position [start] match the regular expression [r].
6666+ The first character of a string has position [0], as usual. *)
6767+val string_match : regexp -> string -> int -> bool
6868+6969+(** [search_forward r s start] searches the string [s] for a substring
7070+ matching the regular expression [r]. The search starts at position
7171+ [start] and proceeds towards the end of the string.
7272+ Return the position of the first character of the matched
7373+ substring, or raise [Not_found] if no substring matches. *)
7474+val search_forward : regexp -> string -> int -> int
7575+7676+(** Same as [search_forward], but the search proceeds towards the
7777+ beginning of the string. *)
7878+val search_backward : regexp -> string -> int -> int
7979+8080+(** Similar to [string_match], but succeeds whenever the argument
8181+ string is a prefix of a string that matches. This includes
8282+ the case of a true complete match. *)
8383+val string_partial_match : regexp -> string -> int -> bool
8484+8585+(** [matched_string s] returns the substring of [s] that was matched
8686+ by the latest [string_match], [search_forward] or [search_backward].
8787+ The user must make sure that the parameter [s] is the same string
8888+ that was passed to the matching or searching function. *)
8989+val matched_string : string -> string
9090+9191+(** [match_beginning ()] returns the position of the first character
9292+ of the substring that was matched by [string_match],
9393+ [search_forward] or [search_backward]. *)
9494+val match_beginning : unit -> int
9595+9696+(** [match_end ()] returns the position of the character following the
9797+ last character of the substring that was matched by [string_match],
9898+ [search_forward] or [search_backward]. *)
9999+val match_end : unit -> int
100100+101101+(** [matched_group n s] returns the substring of [s] that was matched
102102+ by the [n]th group [\(...\)] of the regular expression during
103103+ the latest [string_match], [search_forward] or [search_backward].
104104+ The user must make sure that the parameter [s] is the same string
105105+ that was passed to the matching or searching function.
106106+ [matched_group n s] raises [Not_found] if the [n]th group
107107+ of the regular expression was not matched. This can happen
108108+ with groups inside alternatives [\|], options [?]
109109+ or repetitions [*]. For instance, the empty string will match
110110+ [\(a\)*], but [matched_group 1 ""] will raise [Not_found]
111111+ because the first group itself was not matched. *)
112112+val matched_group : int -> string -> string
113113+114114+(** [group_beginning n] returns the position of the first character
115115+ of the substring that was matched by the [n]th group of the regular expression.
116116+ Raises [Not_found] if the [n]th group of the regular expression was not matched. *)
117117+val group_beginning : int -> int
118118+119119+(** [group_end n] returns the position of the character following
120120+ the last character of the matched substring.
121121+ Raises [Not_found] if the [n]th group of the regular expression was not matched. *)
122122+val group_end : int -> int
123123+124124+(** {2 Replacement} *)
125125+126126+(** [global_replace regexp templ s] returns a string identical to [s],
127127+ except that all substrings of [s] that match [regexp] have been
128128+ replaced by [templ]. The replacement template [templ] can contain
129129+ [\1], [\2], etc; these sequences will be replaced by the text
130130+ matched by the corresponding group in the regular expression.
131131+ [\0] stands for the text matched by the whole regular expression. *)
132132+val global_replace : regexp -> string -> string -> string
133133+134134+(** Same as [global_replace], except that only the first substring
135135+ matching the regular expression is replaced. *)
136136+val replace_first : regexp -> string -> string -> string
137137+138138+(** [global_substitute regexp subst s] returns a string identical
139139+ to [s], except that all substrings of [s] that match [regexp]
140140+ have been replaced by the result of function [subst]. The
141141+ function [subst] is called once for each matching substring,
142142+ and receives [s] (the whole text) as argument. *)
143143+val global_substitute : regexp -> (string -> string) -> string -> string
144144+145145+(** Same as [global_substitute], except that only the first substring
146146+ matching the regular expression is replaced. *)
147147+val substitute_first : regexp -> (string -> string) -> string -> string
148148+149149+(** [replace_matched repl s] returns the replacement text [repl]
150150+ in which [\1], [\2], etc. have been replaced by the text
151151+ matched by the corresponding groups in the most recent matching
152152+ operation. [s] must be the same string that was matched during
153153+ this matching operation. *)
154154+val replace_matched : string -> string -> string
155155+156156+(** {2 Splitting} *)
157157+158158+(** [split r s] splits [s] into substrings, taking as delimiters
159159+ the substrings that match [r], and returns the list of substrings.
160160+ For instance, [split (regexp "[ \t]+") s] splits [s] into
161161+ blank-separated words. An occurrence of the delimiter at the
162162+ beginning and at the end of the string is ignored. *)
163163+val split : regexp -> string -> string list
164164+165165+(** Same as [split], but splits into at most [n] substrings,
166166+ where [n] is the extra integer parameter. *)
167167+val bounded_split : regexp -> string -> int -> string list
168168+169169+(** Same as [split], but occurrences of the delimiter at the beginning
170170+ and at the end of the string are recognized and returned as empty strings
171171+ in the result.
172172+ For instance, [split_delim (regexp " ") " abc "] returns [[""; "abc"; ""]],
173173+ while [split] with the same arguments returns [["abc"]]. *)
174174+val split_delim : regexp -> string -> string list
175175+176176+(** Same as [bounded_split] and [split_delim], but occurrences of
177177+ the delimiter at the beginning and at the end of the string are recognized
178178+ and returned as empty strings in the result.
179179+ For instance, [split_delim (regexp " ") " abc "] returns [[""; "abc"; ""]],
180180+ while [split] with the same arguments returns [["abc"]]. *)
181181+val bounded_split_delim : regexp -> string -> int -> string list
182182+183183+type split_result =
184184+ | Text of string
185185+ | Delim of string
186186+187187+(** Same as [split_delim], but returns the delimiters
188188+ as well as the substrings contained between delimiters.
189189+ The former are tagged [Delim] in the result list;
190190+ the latter are tagged [Text].
191191+ For instance, [full_split (regexp "[{}]") "{ab}"] returns
192192+ [[Delim "{"; Text "ab"; Delim "}"]]. *)
193193+val full_split : regexp -> string -> split_result list
194194+195195+(** Same as [split_delim] and [bounded_split_delim], but returns
196196+ the delimiters as well as the substrings contained between delimiters.
197197+ The former are tagged [Delim] in the result list;
198198+ the latter are tagged [Text].
199199+ For instance, [full_split (regexp "[{}]") "{ab}"] returns
200200+ [[Delim "{"; Text "ab"; Delim "}"]]. *)
201201+val bounded_full_split : regexp -> string -> int -> split_result list
202202+203203+(** {2 Extracting substrings} *)
204204+205205+(** [string_before s n] returns the substring of all characters of [s]
206206+ that precede position [n] (excluding the character at
207207+ position [n]). *)
208208+val string_before : string -> int -> string
209209+210210+(** [string_after s n] returns the substring of all characters of [s]
211211+ that follow position [n] (including the character at
212212+ position [n]). *)
213213+val string_after : string -> int -> string
214214+215215+(** [first_chars s n] returns the first [n] characters of [s].
216216+ This is the same function as [string_before]. *)
217217+val first_chars : string -> int -> string
218218+219219+(** [last_chars s n] returns the last [n] characters of [s]. *)
220220+val last_chars : string -> int -> string
+90
vendor/opam/re/lib/view.ml
···11+open Import
22+33+module Cset = struct
44+ include Cset
55+66+ module Range = struct
77+ type t =
88+ { first : Char.t
99+ ; last : Char.t
1010+ }
1111+1212+ let first t = t.first
1313+ let last t = t.last
1414+ end
1515+1616+ let view t =
1717+ fold_right t ~init:[] ~f:(fun first last acc ->
1818+ let range = { Range.first = Cset.to_char first; last = Cset.to_char last } in
1919+ range :: acc)
2020+ ;;
2121+end
2222+2323+module Sem = Automata.Sem
2424+module Rep_kind = Automata.Rep_kind
2525+2626+type t =
2727+ | Set of Cset.t
2828+ | Sequence of Ast.t list
2929+ | Alternative of Ast.t list
3030+ | Repeat of Ast.t * int * int option
3131+ | Beg_of_line
3232+ | End_of_line
3333+ | Beg_of_word
3434+ | End_of_word
3535+ | Not_bound
3636+ | Beg_of_str
3737+ | End_of_str
3838+ | Last_end_of_line
3939+ | Start
4040+ | Stop
4141+ | Sem of Automata.Sem.t * Ast.t
4242+ | Sem_greedy of Automata.Rep_kind.t * Ast.t
4343+ | Group of string option * Ast.t
4444+ | No_group of Ast.t
4545+ | Nest of Ast.t
4646+ | Case of Ast.t
4747+ | No_case of Ast.t
4848+ | Intersection of Ast.t list
4949+ | Complement of Ast.t list
5050+ | Difference of Ast.t * Ast.t
5151+ | Pmark of Pmark.t * Ast.t
5252+5353+let view_ast f (t : _ Ast.ast) : t =
5454+ match t with
5555+ | Alternative a -> Alternative (List.map ~f a)
5656+ | No_case a -> No_case (f a)
5757+ | Case a -> Case (f a)
5858+;;
5959+6060+let view_set (cset : Ast.cset) : t =
6161+ match cset with
6262+ | Cset set -> Set set
6363+ | Intersection sets -> Intersection (List.map sets ~f:Ast.t_of_cset)
6464+ | Complement sets -> Complement (List.map sets ~f:Ast.t_of_cset)
6565+ | Difference (x, y) -> Difference (Ast.t_of_cset x, Ast.t_of_cset y)
6666+ | Cast ast -> view_ast Ast.t_of_cset ast
6767+;;
6868+6969+let view : Ast.t -> t = function
7070+ | Set s -> view_set s
7171+ | Ast s -> view_ast (fun x -> x) s
7272+ | Sem (sem, a) -> Sem (sem, a)
7373+ | Sem_greedy (sem, a) -> Sem_greedy (sem, a)
7474+ | Sequence s -> Sequence s
7575+ | Repeat (t, x, y) -> Repeat (t, x, y)
7676+ | Beg_of_line -> Beg_of_line
7777+ | End_of_line -> End_of_line
7878+ | Beg_of_word -> Beg_of_word
7979+ | End_of_word -> End_of_word
8080+ | Not_bound -> Not_bound
8181+ | Beg_of_str -> Beg_of_str
8282+ | End_of_str -> End_of_str
8383+ | Last_end_of_line -> Last_end_of_line
8484+ | Start -> Start
8585+ | Stop -> Stop
8686+ | No_group a -> No_group a
8787+ | Group (name, t) -> Group (name, t)
8888+ | Nest t -> Nest t
8989+ | Pmark (pmark, t) -> Pmark (pmark, t)
9090+;;
+58
vendor/opam/re/lib/view.mli
···11+(** A view of the top-level of a regex. This type is unstable and may change *)
22+33+module Cset : sig
44+ type t = Cset.t
55+66+ module Range : sig
77+ type t
88+99+ val first : t -> Char.t
1010+ val last : t -> Char.t
1111+ end
1212+1313+ val view : t -> Range.t list
1414+end
1515+1616+module Sem : sig
1717+ type t =
1818+ [ `Longest
1919+ | `Shortest
2020+ | `First
2121+ ]
2222+end
2323+2424+module Rep_kind : sig
2525+ type t =
2626+ [ `Greedy
2727+ | `Non_greedy
2828+ ]
2929+end
3030+3131+type t =
3232+ | Set of Cset.t
3333+ | Sequence of Ast.t list
3434+ | Alternative of Ast.t list
3535+ | Repeat of Ast.t * int * int option
3636+ | Beg_of_line
3737+ | End_of_line
3838+ | Beg_of_word
3939+ | End_of_word
4040+ | Not_bound
4141+ | Beg_of_str
4242+ | End_of_str
4343+ | Last_end_of_line
4444+ | Start
4545+ | Stop
4646+ | Sem of Sem.t * Ast.t
4747+ | Sem_greedy of Rep_kind.t * Ast.t
4848+ | Group of string option * Ast.t
4949+ | No_group of Ast.t
5050+ | Nest of Ast.t
5151+ | Case of Ast.t
5252+ | No_case of Ast.t
5353+ | Intersection of Ast.t list
5454+ | Complement of Ast.t list
5555+ | Difference of Ast.t * Ast.t
5656+ | Pmark of Pmark.t * Ast.t
5757+5858+val view : Ast.t -> t
···11+22+# Data race between Compile.State.follow_transition (inlined in Compile.next)
33+# and Compile.State.set_transition
44+race_top:^camlRe__Compile.next
55+66+# Data race within Compile.find_initial_state (read/write re.initial_states)
77+race_top:^camlRe__Compile.find_initial_state
88+99+# Spurious data race due to the two-step initialization in Compile.State.make
1010+# (between Compile.State.get_info and Compile.State.set_info, both inlined)
1111+race_top:^camlRe__Compile.loop
1212+1313+# Race within Automata.Desc.status and Automata.Desc.status_no_mutex
1414+# (read/write s.status)
1515+race_top:^camlRe__Automata.status
1616+1717+# Race within Compile.final
1818+race_top:^camlRe__Compile.final
1919+2020+# Spurious data race due to the two-step initialization in Mark_info.make
2121+# (between Mark_info.make and other functions in module Mark_infos)
2222+race_top:^camlRe__Mark_infos.set
···11+module Barrier = struct
22+ type t =
33+ { waiters : int Atomic.t
44+ ; size : int
55+ ; passed : int Atomic.t
66+ }
77+88+ let create n = { waiters = Atomic.make n; size = n; passed = Atomic.make 0 }
99+1010+ let await { waiters; size; passed } =
1111+ if Atomic.fetch_and_add passed 1 = size - 1
1212+ then (
1313+ Atomic.set passed 0;
1414+ Atomic.set waiters 0);
1515+ while Atomic.get waiters = size do
1616+ Domain.cpu_relax ()
1717+ done;
1818+ Atomic.incr waiters;
1919+ while Atomic.get waiters < size do
2020+ Domain.cpu_relax ()
2121+ done
2222+ ;;
2323+end
2424+2525+let shuffle_array a =
2626+ let n = Array.length a in
2727+ let a' = Array.copy a in
2828+ for i = n - 1 downto 1 do
2929+ let j = Random.int (i + 1) in
3030+ let temp = a'.(i) in
3131+ a'.(i) <- a'.(j);
3232+ a'.(j) <- temp
3333+ done;
3434+ a'
3535+;;
3636+3737+let inverse_permutation p =
3838+ let n = Array.length p in
3939+ let inv = Array.make n 0 in
4040+ for i = 0 to n - 1 do
4141+ inv.(p.(i)) <- i
4242+ done;
4343+ inv
4444+;;
4545+4646+let apply_permutation p a =
4747+ let n = Array.length p in
4848+ let b = Array.make n a.(0) in
4949+ for i = 0 to n - 1 do
5050+ b.(i) <- a.(p.(i))
5151+ done;
5252+ b
5353+;;
5454+5555+(****)
5656+5757+let re1 = Re.(alt [ group (char 'a'); char 'b' ])
5858+let re2 = Re.(seq [ re1; re1 ])
5959+let re3 = Re.(seq [ re2; re2 ])
6060+let re4 = Re.(seq [ re3; re3 ])
6161+6262+let re5 =
6363+ Re.(
6464+ alt
6565+ [ seq [ re4; re4 ]
6666+ ; group (str "b")
6767+ ; group (str "bb")
6868+ ; group (str "bbb")
6969+ ; group (str "bbbb")
7070+ ])
7171+;;
7272+7373+let size = 300
7474+7575+let strings =
7676+ Array.init size (fun _ -> String.init 30 (fun _ -> if Random.bool () then 'a' else 'b'))
7777+;;
7878+7979+let execute ~short re a =
8080+ apply_permutation
8181+ (inverse_permutation a)
8282+ (Array.map
8383+ (fun i ->
8484+ try
8585+ Some
8686+ (Re.Group.all_offset
8787+ @@ Re.exec ~pos:(if short then 30 - 7 else 0) re strings.(i))
8888+ with
8989+ | Not_found -> None)
9090+ a)
9191+;;
9292+9393+let compare_groups g g' = g = g'
9494+9595+let concurrent f f' =
9696+ let barrier = Barrier.create 2 in
9797+ let domain =
9898+ Domain.spawn
9999+ @@ fun () ->
100100+ Barrier.await barrier;
101101+ f' ()
102102+ in
103103+ Barrier.await barrier;
104104+ let res = f () in
105105+ let res' = Domain.join domain in
106106+ res, res'
107107+;;
108108+109109+let sequential f f' = f (), f' ()
110110+111111+let test compose ~short n =
112112+ let success = ref true in
113113+ for _ = 1 to n do
114114+ let re = Re.compile re5 in
115115+ let a = shuffle_array (Array.init size Fun.id) in
116116+ let a' = shuffle_array a in
117117+ try
118118+ let groups, groups' =
119119+ compose (fun () -> execute ~short re a) (fun () -> execute ~short re a')
120120+ in
121121+ let ok = Array.for_all2 (Option.equal compare_groups) groups groups' in
122122+ success := !success && ok;
123123+ if not ok then prerr_endline "Bad group"
124124+ with
125125+ | Invalid_argument msg ->
126126+ prerr_endline ("Invalid_argument " ^ msg);
127127+ success := false
128128+ | Division_by_zero ->
129129+ prerr_endline "Division_by_zero";
130130+ success := false
131131+ done;
132132+ if not !success then exit 1
133133+;;
134134+135135+let () =
136136+ prerr_endline "Sequential";
137137+ test sequential ~short:false 20;
138138+ test sequential ~short:true 10;
139139+ prerr_endline "Concurrent";
140140+ test ~short:false concurrent 750;
141141+ test ~short:true concurrent 250
142142+;;
···11+(library
22+ (name re_tests)
33+ (libraries
44+ re_private
55+ ;; This is because of the (implicit_transitive_deps false)
66+ ;; in dune-project
77+ ppx_expect.config
88+ ppx_expect.config_types
99+ ppx_expect
1010+ ppx_expect_common
1111+ base
1212+ str
1313+ ppx_inline_test.config)
1414+ (inline_tests
1515+ (modes native js))
1616+ (preprocess
1717+ (pps ppx_expect)))
1818+1919+;; ppx_expect v16 depends on ppx_expect.common
2020+(subdir
2121+ ppx_expect_common
2222+ (library
2323+ (name ppx_expect_common)
2424+ (enabled_if
2525+ (< %{ocaml_version} 5.0))
2626+ (libraries (re_export ppx_expect.common)))
2727+ (library
2828+ (name ppx_expect_common)
2929+ (enabled_if
3030+ (>= %{ocaml_version} 5.0))))
3131+3232+;; this hackery is needed because ppx_expect itself uses re, therefore we need to mangle
3333+;; the library name
3434+3535+(subdir
3636+ private_re
3737+ (library
3838+ (name re_private))
3939+ (copy_files %{project_root}/lib/*.{ml,mli}))
+85
vendor/opam/re/lib_test/expect/import.ml
···11+module Re = Re_private.Re
22+include Re_private.Import
33+module Fmt = Re_private.Fmt
44+module Dyn = Re_private.Dyn
55+66+let printf = Printf.printf
77+88+let t re s =
99+ let group = Re.exec_opt (Re.compile re) s in
1010+ Format.printf "%a@." (Fmt.opt Re.Group.pp) group
1111+;;
1212+1313+let re_whitespace = Re.Pcre.regexp "[\t ]+"
1414+let re_eol = Re.compile Re.eol
1515+let re_bow = Re.compile Re.bow
1616+let re_eow = Re.compile Re.eow
1717+let strings = Format.printf "[%a]@." Fmt.(list ~pp_sep:(Fmt.lit "; ") Fmt.quoted_string)
1818+let re_empty = Re.Posix.compile_pat ""
1919+2020+let invalid_argument f =
2121+ match f () with
2222+ | s -> ignore s
2323+ | exception Invalid_argument s -> Format.printf "Invalid_argument %S@." s
2424+;;
2525+2626+let exec_partial_detailed ?pos re s =
2727+ let re = Re.compile re in
2828+ let res = Re.exec_partial_detailed ?pos re s in
2929+ match res with
3030+ | `Mismatch -> Format.printf "`Mismatch@."
3131+ | `Partial position -> Format.printf "`Partial %d@." position
3232+ | `Full groups ->
3333+ Re.Group.all_offset groups
3434+ |> Array.to_list
3535+ |> List.map ~f:(fun (a, b) ->
3636+ Printf.sprintf
3737+ "%d,%d,%s"
3838+ a
3939+ b
4040+ (match String.sub s a (b - a) with
4141+ | exception Invalid_argument _ -> "<No match>"
4242+ | s -> Printf.sprintf "%S" s))
4343+ |> String.concat ";"
4444+ |> Format.printf "`Full [|%s|]@."
4545+;;
4646+4747+let or_not_found f fmt v =
4848+ match v () with
4949+ | exception Not_found -> Format.fprintf fmt "Not_found"
5050+ | s -> f fmt s
5151+;;
5252+5353+let array f fmt v =
5454+ Format.fprintf fmt "[| %a |]" (Fmt.list ~pp_sep:(Fmt.lit "; ") f) (Array.to_list v)
5555+;;
5656+5757+let offset fmt (x, y) = Format.fprintf fmt "(%d, %d)" x y
5858+5959+let test_re ?pos ?len r s =
6060+ let offsets () = Re.Group.all_offset (Re.exec ?pos ?len (Re.compile r) s) in
6161+ Format.printf "%a@." (or_not_found (array offset)) offsets
6262+;;
6363+6464+let rec sexp_of_dyn (t : Re_private.Dyn.t) : Base.Sexp.t =
6565+ match t with
6666+ | Int i -> Atom (Int.to_string i)
6767+ | String s -> Atom s
6868+ | Tuple xs -> List (List.map xs ~f:sexp_of_dyn)
6969+ | Enum s -> Atom s
7070+ | List xs -> List (List.map ~f:sexp_of_dyn xs)
7171+ | Variant (name, []) -> Atom name
7272+ | Variant (name, xs) ->
7373+ let xs = List.map xs ~f:sexp_of_dyn in
7474+ (match xs with
7575+ | [] -> List []
7676+ | xs -> List (Atom name :: xs))
7777+ | Record fields ->
7878+ List
7979+ (List.filter_map fields ~f:(fun (name, v) ->
8080+ match sexp_of_dyn v with
8181+ | List [] -> None
8282+ | sexp -> Some (Base.Sexp.List [ Atom name; sexp ])))
8383+;;
8484+8585+let print_dyn dyn = sexp_of_dyn dyn |> Base.Sexp.to_string_hum |> print_endline
+76
vendor/opam/re/lib_test/expect/test_186.ml
···11+open Import
22+33+let print re result =
44+ Printf.printf
55+ "%s: %s\n"
66+ re
77+ (match result with
88+ | Ok _ -> "backward range parsed"
99+ | Error `Parse_error -> "parse error"
1010+ | Error `Not_supported -> "not supported")
1111+;;
1212+1313+let cases = [ "[1-0]"; "[5-1]"; "[6-6]"; "[z-a]"; "[b-b]" ]
1414+1515+let test f =
1616+ List.iter cases ~f:(fun re ->
1717+ let result = f re in
1818+ print re result)
1919+;;
2020+2121+let%expect_test "perl" =
2222+ test Re.Perl.re_result;
2323+ [%expect
2424+ {|
2525+ [1-0]: backward range parsed
2626+ [5-1]: backward range parsed
2727+ [6-6]: backward range parsed
2828+ [z-a]: backward range parsed
2929+ [b-b]: backward range parsed
3030+ |}]
3131+;;
3232+3333+let%expect_test "pcre" =
3434+ test Re.Pcre.re_result;
3535+ [%expect
3636+ {|
3737+ [1-0]: backward range parsed
3838+ [5-1]: backward range parsed
3939+ [6-6]: backward range parsed
4040+ [z-a]: backward range parsed
4141+ [b-b]: backward range parsed
4242+ |}]
4343+;;
4444+4545+let%expect_test "posix" =
4646+ test Re.Posix.re_result;
4747+ [%expect
4848+ {|
4949+ [1-0]: backward range parsed
5050+ [5-1]: backward range parsed
5151+ [6-6]: backward range parsed
5252+ [z-a]: backward range parsed
5353+ [b-b]: backward range parsed
5454+ |}]
5555+;;
5656+5757+(* CR-someday rgrinberg: is this correct? *)
5858+let%expect_test "emacs" =
5959+ test Re.Emacs.re_result;
6060+ [%expect
6161+ {|
6262+ [1-0]: backward range parsed
6363+ [5-1]: backward range parsed
6464+ [6-6]: backward range parsed
6565+ [z-a]: backward range parsed
6666+ [b-b]: backward range parsed
6767+ |}]
6868+;;
6969+7070+(* We allow backward ranges in re. We could forbid them? *)
7171+let%expect_test "re" =
7272+ Format.printf "%a@." Re.pp (Re.rg '5' '0');
7373+ [%expect {| (Set 48-53) |}];
7474+ Format.printf "%a@." Re.pp (Re.rg '0' '5');
7575+ [%expect {| (Set 48-53) |}]
7676+;;
···11+module Category = Re_private.Category
22+module Cset = Re_private.Cset
33+44+let%expect_test "Category.from_char" =
55+ for i = 0 to 255 do
66+ let char = Char.chr i in
77+ let cat = Category.from_char char in
88+ if Cset.(mem (of_char char) cword) then assert (Category.(intersect letter cat))
99+ done
1010+;;
1111+1212+let%expect_test "newline" =
1313+ let cat = Category.from_char '\n' in
1414+ assert (Category.(intersect cat newline));
1515+ assert (Category.(intersect cat not_letter))
1616+;;
+13
vendor/opam/re/lib_test/expect/test_color.ml
···11+open Import
22+33+let all_chars = String.init 256 Char.chr
44+55+let%expect_test "match an re that distinguishes every single char" =
66+ let re =
77+ let open Re in
88+ set all_chars |> whole_string |> compile
99+ in
1010+ for i = 0 to String.length all_chars - 1 do
1111+ assert (Re.execp re (String.make 1 all_chars.[i]))
1212+ done
1313+;;
···11+open Import
22+open Re
33+44+let%expect_test "empty group" =
55+ let empty = group empty in
66+ t empty "";
77+ [%expect {| <None> |}];
88+ t empty "x";
99+ [%expect {| <None> |}]
1010+;;
1111+1212+let%expect_test "zero length group" =
1313+ let empty = group bos in
1414+ t empty "";
1515+ [%expect {| (Group ( (0 0))( (0 0))) |}];
1616+ t empty "x";
1717+ [%expect {| (Group ( (0 0))( (0 0))) |}]
1818+;;
1919+2020+let%expect_test "no group" =
2121+ let re = any in
2222+ t re "";
2323+ [%expect {| <None> |}];
2424+ t re ".";
2525+ [%expect {| (Group (. (0 1))) |}]
2626+;;
2727+2828+let%expect_test "two groups" =
2929+ let re = seq [ group any; group any ] in
3030+ t re "a";
3131+ [%expect {| <None> |}];
3232+ t re "ab";
3333+ [%expect {| (Group (ab (0 2))(a (0 1))(b (1 2))) |}];
3434+ t re "abc";
3535+ [%expect {| (Group (ab (0 2))(a (0 1))(b (1 2))) |}]
3636+;;
3737+3838+let%expect_test "maybe group" =
3939+ let twoany = seq [ any; any ] in
4040+ let re = alt [ twoany; group twoany ] in
4141+ t re "aa";
4242+ [%expect {| (Group (aa (0 2))( (-1 -1))) |}];
4343+ t re "a";
4444+ [%expect {| <None> |}]
4545+;;
4646+4747+let%expect_test "nesting of groups" =
4848+ let re = group (seq [ group (char 'a'); char 'b' ]) in
4949+ t re "ab";
5050+ [%expect {| (Group (ab (0 2))(ab (0 2))(a (0 1))) |}]
5151+;;
5252+5353+let%expect_test "group choice" =
5454+ let t = Import.exec_partial_detailed in
5555+ (* Alternation of character sets isn't flattened *)
5656+ let lhs_group =
5757+ let open Re in
5858+ alt [ group (char 'a'); char 'b' ]
5959+ in
6060+ t lhs_group "a";
6161+ [%expect {| `Full [|0,1,"a";0,1,"a"|] |}];
6262+ t lhs_group "b";
6363+ [%expect {| `Full [|0,1,"b";-1,-1,<No match>|] |}];
6464+ t
6565+ (let open Re in
6666+ alt [ group (char 'a'); group (char 'b') ])
6767+ "b";
6868+ [%expect {| `Full [|0,1,"b";-1,-1,<No match>;0,1,"b"|] |}];
6969+ (* No_group inside char set: *)
7070+ let no_group_charset =
7171+ let a = Re.group (Re.char 'a') in
7272+ let b = Re.char 'b' in
7373+ Re.no_group (Re.alt [ a; b ])
7474+ in
7575+ t no_group_charset "a";
7676+ [%expect {| `Full [|0,1,"a"|] |}];
7777+ t no_group_charset "b";
7878+ [%expect {| `Full [|0,1,"b"|] |}];
7979+ (* No_group outside char set *)
8080+ let no_group_string =
8181+ let aa = Re.group (Re.str "aa") in
8282+ let bb = Re.str "bb" in
8383+ Re.no_group (Re.alt [ aa; bb ])
8484+ in
8585+ t no_group_string "aa";
8686+ [%expect {| `Full [|0,2,"aa"|] |}];
8787+ t no_group_string "bb";
8888+ [%expect {| `Full [|0,2,"bb"|] |}]
8989+;;
9090+9191+let%expect_test "Group.{get,get_opt,offset,test}" =
9292+ let r = seq [ group (char 'a'); opt (group (char 'a')); group (char 'b') ] in
9393+ let m = exec (compile r) "ab" in
9494+ let test idx =
9595+ Format.printf "get_opt = %a@." (Fmt.opt Fmt.str) (Group.get_opt m idx);
9696+ Format.printf "get = %a@." (or_not_found Fmt.str) (fun () -> Group.get m idx);
9797+ Format.printf "test = %b@." (Group.test m idx);
9898+ Format.printf "offset = %a@." (or_not_found offset) (fun () -> Group.offset m idx)
9999+ in
100100+ test 0;
101101+ [%expect {|
102102+ get_opt = ab
103103+ get = ab
104104+ test = true
105105+ offset = (0, 2) |}];
106106+ test 1;
107107+ [%expect {|
108108+ get_opt = a
109109+ get = a
110110+ test = true
111111+ offset = (0, 1) |}];
112112+ test 2;
113113+ [%expect
114114+ {|
115115+ get_opt = <None>
116116+ get = Not_found
117117+ test = false
118118+ offset = Not_found |}];
119119+ test 3;
120120+ [%expect {|
121121+ get_opt = b
122122+ get = b
123123+ test = true
124124+ offset = (1, 2) |}];
125125+ Format.printf "%a@." (array offset) (Group.all_offset m);
126126+ [%expect {| [| (0, 2); (0, 1); (-1, -1); (1, 2) |] |}]
127127+;;
128128+129129+let%expect_test "nest" =
130130+ let r = rep (nest (alt [ group (char 'a'); char 'b' ])) in
131131+ test_re r "ab";
132132+ [%expect {| [| (0, 2); (-1, -1) |] |}];
133133+ test_re r "ba";
134134+ [%expect {| [| (0, 2); (1, 2) |] |}]
135135+;;
136136+137137+let%expect_test "group/no_group" =
138138+ let r = seq [ group (char 'a'); opt (group (char 'a')); group (char 'b') ] in
139139+ test_re r "ab";
140140+ [%expect {| [| (0, 2); (0, 1); (-1, -1); (1, 2) |] |}];
141141+ test_re (no_group r) "ab";
142142+ [%expect {| [| (0, 2) |] |}]
143143+;;
+49
vendor/opam/re/lib_test/expect/test_hashset.ml
···11+open Import
22+33+let () = Printexc.record_backtrace true
44+55+module Hash_set = Re_private.Hash_set
66+77+let id1 = 1
88+let id2 = 2
99+let id3 = 3
1010+1111+let test table f =
1212+ if f table
1313+ then print_endline "[PASS]"
1414+ else (
1515+ print_endline "[FAIL]";
1616+ Format.printf "%a@." Hash_set.pp table)
1717+;;
1818+1919+let%expect_test "basic set" =
2020+ let set = Hash_set.create () in
2121+ test set Hash_set.is_empty;
2222+ [%expect {| [PASS] |}];
2323+ test set (fun set -> not (Hash_set.mem set id1));
2424+ [%expect {|
2525+ [PASS] |}]
2626+;;
2727+2828+let%expect_test "add 1 element" =
2929+ let set = Hash_set.create () in
3030+ Hash_set.add set id1;
3131+ test set (fun set -> not (Hash_set.is_empty set));
3232+ [%expect {|
3333+ [PASS] |}];
3434+ test set (fun set -> Hash_set.mem set id1);
3535+ [%expect {|
3636+ [PASS] |}];
3737+ Hash_set.add set id1;
3838+ test set (fun set -> Hash_set.mem set id1);
3939+ [%expect {| [PASS] |}];
4040+ Hash_set.add set id2;
4141+ test set (fun set -> Hash_set.mem set id2);
4242+ [%expect {| [PASS] |}];
4343+ Hash_set.add set id3;
4444+ test set (fun set -> Hash_set.mem set id3);
4545+ [%expect {|
4646+ [PASS] |}];
4747+ test set (fun set -> List.for_all [ id1; id2; id3 ] ~f:(fun id -> Hash_set.mem set id));
4848+ [%expect {| [PASS] |}]
4949+;;
···11+open Import
22+open Re
33+44+let test_mark ?pos ?len r s il1 il2 =
55+ let subs = exec ?pos ?len (compile r) s in
66+ Format.printf
77+ "%b@."
88+ (List.for_all ~f:(Mark.test subs) il1
99+ && List.for_all ~f:(fun x -> not (Mark.test subs x)) il2)
1010+;;
1111+1212+let%expect_test "mark" =
1313+ let i, r = mark digit in
1414+ test_mark r "0" [ i ] [];
1515+ [%expect {| true |}]
1616+;;
1717+1818+let%expect_test "mark seq" =
1919+ let i, r = mark digit in
2020+ let r = seq [ r; r ] in
2121+ test_mark r "02" [ i ] [];
2222+ [%expect {| true |}]
2323+;;
2424+2525+let%expect_test "mark rep" =
2626+ let i, r = mark digit in
2727+ let r = rep r in
2828+ test_mark r "02" [ i ] [];
2929+ [%expect {| true |}]
3030+;;
3131+3232+let%expect_test "mark alt" =
3333+ let ia, ra = mark (char 'a') in
3434+ let ib, rb = mark (char 'b') in
3535+ let r = alt [ ra; rb ] in
3636+ test_mark r "a" [ ia ] [ ib ];
3737+ test_mark r "b" [ ib ] [ ia ];
3838+ [%expect {|
3939+ true
4040+ true |}];
4141+ let r = rep r in
4242+ test_mark r "ab" [ ia; ib ] [];
4343+ [%expect {| true |}]
4444+;;
4545+4646+let%expect_test "mark prefers lhs" =
4747+ let two_chars = seq [ any; any ] in
4848+ let lhs, x = mark two_chars in
4949+ let rhs, x' = mark two_chars in
5050+ let r = alt [ x; x' ] in
5151+ test_mark r "aa" [ lhs ] [ rhs ];
5252+ [%expect {| true |}]
5353+;;
+69
vendor/opam/re/lib_test/expect/test_partial.ml
···11+open Import
22+33+let t re s =
44+ let re = Re.compile re in
55+ let res = Re.exec_partial re s in
66+ Format.printf
77+ "`%s@."
88+ (match res with
99+ | `Partial -> "Partial"
1010+ | `Full -> "Full"
1111+ | `Mismatch -> "Mismatch")
1212+;;
1313+1414+let%expect_test "partial matches" =
1515+ let open Re in
1616+ t (str "hello") "he";
1717+ [%expect {| `Partial |}];
1818+ t (str "hello") "goodbye";
1919+ [%expect {| `Partial |}];
2020+ (* exec_partial 3 should be `Full *)
2121+ t (str "hello") "hello";
2222+ [%expect {| `Partial |}];
2323+ t (whole_string (str "hello")) "hello";
2424+ [%expect {| `Partial |}];
2525+ t (whole_string (str "hello")) "goodbye";
2626+ [%expect {| `Mismatch |}];
2727+ t (str "hello") "";
2828+ [%expect {| `Partial |}];
2929+ t (str "") "hello";
3030+ [%expect {| `Full |}];
3131+ t (whole_string (str "hello")) "";
3232+ [%expect {| `Partial |}]
3333+;;
3434+3535+let t = exec_partial_detailed
3636+3737+let%expect_test "partial detailed" =
3838+ let open Re in
3939+ t (str "hello") "he";
4040+ [%expect {| `Partial 0 |}];
4141+ (* Because of how the matching engine currently works, situations where
4242+ the entirety of the input string cannot be a match like the test below
4343+ actually return the last character as a potential start instead of just
4444+ return `Partial (String.length input). This is still fine however as
4545+ it still respects the mli contract, as no match could start before
4646+ the given position, and is fine in practice as testing an extra
4747+ character on extra input doesn't add much more in terms of workload.
4848+ *)
4949+ t (str "hello") "goodbye";
5050+ [%expect {| `Partial 6 |}];
5151+ t (str "hello") "hello";
5252+ [%expect {| `Full [|0,5,"hello"|] |}];
5353+ t (whole_string (str "hello")) "hello";
5454+ [%expect {| `Full [|0,5,"hello"|] |}];
5555+ t (whole_string (str "hello")) "goodbye";
5656+ [%expect {| `Mismatch |}];
5757+ t (str "hello") "";
5858+ [%expect {| `Partial 0 |}];
5959+ t (str "") "hello";
6060+ [%expect {| `Full [|0,0,""|] |}];
6161+ t (whole_string (str "hello")) "";
6262+ [%expect {| `Partial 0 |}];
6363+ t (str "abc") ".ab.ab";
6464+ [%expect {| `Partial 4 |}];
6565+ t ~pos:1 (seq [ not_boundary; str "b" ]) "ab";
6666+ [%expect {| `Full [|1,2,"b"|] |}];
6767+ t (seq [ group (str "a"); rep any; group (str "b") ]) ".acb.";
6868+ [%expect {| `Full [|1,4,"acb";1,2,"a";3,4,"b"|] |}]
6969+;;
+105
vendor/opam/re/lib_test/expect/test_pcre.ml
···11+open Import
22+module Pcre = Re_private.Pcre
33+44+let test re s =
55+ match Pcre.re re with
66+ | exception _ -> Format.printf "failed to parse@."
77+ | re -> t re s
88+;;
99+1010+let%expect_test "quoted strings" =
1111+ test {|\Qfoo\E|} "foo";
1212+ [%expect {| (Group (foo (0 3))) |}];
1313+ test {|\Qbar|} "";
1414+ [%expect {| failed to parse |}];
1515+ test {|\Qbaz\|} "";
1616+ [%expect {| failed to parse |}];
1717+ test {|\Qba\Xz\E|} {|ba\Xz|};
1818+ [%expect {| (Group (ba\Xz (0 5))) |}]
1919+;;
2020+2121+let%expect_test "octal" =
2222+ test {|\025|} (String.make 1 '\o025');
2323+ [%expect {| (Group ( (0 1))) |}];
2424+ test {|\999|} "";
2525+ [%expect {| failed to parse |}];
2626+ test {|\111|} (String.make 1 '\o111');
2727+ [%expect {| (Group (I (0 1))) |}]
2828+;;
2929+3030+let%expect_test "\\x and \\o form" =
3131+ test {|\o{111}|} (String.make 1 '\o111');
3232+ [%expect {| <None> |}];
3333+ test {|\o{111|} "";
3434+ [%expect {| failed to parse |}];
3535+ test {|\x{ff}|} (String.make 1 '\xff');
3636+ [%expect {| (Group (� (0 1))) |}];
3737+ test {|\x{ff|} "";
3838+ [%expect {| failed to parse |}]
3939+;;
4040+4141+let%expect_test "substitute" =
4242+ let open Pcre in
4343+ let substitute ~rex ~subst s = substitute ~rex ~subst s |> print_endline in
4444+ let rex = regexp "[a-zA-Z]+" in
4545+ let subst = String.capitalize_ascii in
4646+ substitute ~rex ~subst " hello world; I love chips!";
4747+ [%expect {| Hello World; I Love Chips! |}];
4848+ substitute ~rex:re_empty ~subst:(fun _ -> "a") "";
4949+ [%expect {| a |}];
5050+ substitute ~rex:(regexp "a*") ~subst:(fun _ -> "*") "cat";
5151+ [%expect {| *c*t* |}];
5252+ let rex = regexp "^ *" in
5353+ substitute ~rex ~subst:(fun _ -> "A ") "test";
5454+ [%expect {| A test |}]
5555+;;
5656+5757+let%expect_test "test_blank_class" =
5858+ let re = Re.Perl.compile_pat "\\d[[:blank:]]\\d[[:blank:]]+[a-z]" in
5959+ let successes = [ "1 2 a"; "2\t3 z"; "9\t0 \t a" ] in
6060+ let failures = [ ""; "123"; " "; "1 3z" ] in
6161+ List.iter successes ~f:(fun s -> printf "String %S should match %b\n" s (Re.execp re s));
6262+ [%expect
6363+ {|
6464+ String "1 2 a" should match true
6565+ String "2\t3 z" should match true
6666+ String "9\t0 \t a" should match true |}];
6767+ List.iter failures ~f:(fun s ->
6868+ printf "String %S should not match %b\n" s (Re.execp re s));
6969+ [%expect
7070+ {|
7171+ String "" should not match false
7272+ String "123" should not match false
7373+ String " " should not match false
7474+ String "1 3z" should not match false |}]
7575+;;
7676+7777+let%expect_test "named groups" =
7878+ let open Pcre in
7979+ let rex = regexp "(?<many_x>x+)" in
8080+ let s = exec ~rex "testxxxyyy" in
8181+ print_endline (get_named_substring rex "many_x" s);
8282+ [%expect {| xxx |}]
8383+;;
8484+8585+let%expect_test "quote" =
8686+ let test s = Printf.printf "%S\n" (Re.Pcre.quote s) in
8787+ test "";
8888+ [%expect {| "" |}];
8989+ test "\000";
9090+ [%expect {| "\000" |}];
9191+ test "";
9292+ [%expect {| "" |}];
9393+ test (String.init (126 - 32) (fun x -> Char.chr (x + 32)));
9494+ [%expect
9595+ {xxx| " !\"#\\$%&'\\(\\)\\*\\+,-\\./0123456789:;<=>\\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\[\\\\]\\^_`abcdefghijklmnopqrstuvwxyz\\{\\|}" |xxx}];
9696+ let b = Buffer.create 100 in
9797+ for i = 0 to 255 do
9898+ let c = Char.chr i in
9999+ let s = Pcre.quote (String.make 1 c) in
100100+ if String.length s > 1 then Buffer.add_char b c
101101+ done;
102102+ let b = Buffer.contents b in
103103+ Printf.printf "%S\n" b;
104104+ [%expect {xxx| "$()*+.?[\\^{|" |xxx}]
105105+;;
···11+open Import
22+33+let%expect_test "test_replace" =
44+ let re = Re.Posix.compile_pat "[a-zA-Z]+" in
55+ let f sub = String.capitalize_ascii (Re.Group.get sub 0) in
66+ print_endline (Re.replace re ~f " hello world; I love chips!");
77+ [%expect {| Hello World; I Love Chips! |}];
88+ print_endline (Re.replace ~all:false re ~f " allo maman, bobo");
99+ [%expect {| Allo maman, bobo |}];
1010+ print_endline (Re.replace re_empty ~f:(fun _ -> "a") "");
1111+ [%expect {| a |}];
1212+ print_endline (Re.replace (Re.compile (Re.rep (Re.char 'a'))) ~f:(fun _ -> "*") "cat");
1313+ [%expect {| *c*t* |}]
1414+;;
1515+1616+let%expect_test "test_replace_string" =
1717+ let re = Re.Posix.compile_pat "_[a-zA-Z]+_" in
1818+ print_endline (Re.replace_string re ~by:"goodbye" "_hello_ world");
1919+ [%expect {| goodbye world |}];
2020+ print_endline (Re.replace_string ~all:false re ~by:"brown" "The quick _XXX_ fox");
2121+ [%expect {| The quick brown fox |}]
2222+;;
2323+2424+let%expect_test "test_bug_55" =
2525+ let re = Re.(compile bol) in
2626+ let res = Re.replace_string re ~by:"z" "abc" in
2727+ print_endline res;
2828+ [%expect {| zabc |}];
2929+ let re = Re.(compile eow) in
3030+ let res = Re.replace_string re ~by:"X" "one two three" in
3131+ print_endline res;
3232+ [%expect {| oneX twoX threeX |}]
3333+;;
+78
vendor/opam/re/lib_test/expect/test_repn.ml
···11+open Import
22+open Re
33+44+let%expect_test "fixed repetition" =
55+ let re = Re.compile @@ Re.(repn (char 'a') 3 (Some 3)) in
66+ let test s = printf "%b\n" (Re.execp re s) in
77+ test "";
88+ [%expect {| false |}];
99+ test "aa";
1010+ [%expect {| false |}];
1111+ test "aaa";
1212+ [%expect {| true |}];
1313+ test "aaaa";
1414+ [%expect {| true |}]
1515+;;
1616+1717+let%expect_test "repn" =
1818+ let a = char 'a' in
1919+ test_re (repn a 0 None) "";
2020+ [%expect {| [| (0, 0) |] |}];
2121+ test_re (repn a 2 None) "a";
2222+ [%expect {| Not_found |}];
2323+ test_re (repn a 2 None) "aa";
2424+ [%expect {| [| (0, 2) |] |}];
2525+ test_re (repn a 0 (Some 0)) "";
2626+ [%expect {| [| (0, 0) |] |}];
2727+ test_re (repn a 1 (Some 2)) "a";
2828+ [%expect {| [| (0, 1) |] |}];
2929+ test_re (repn a 1 (Some 2)) "aa";
3030+ [%expect {| [| (0, 2) |] |}];
3131+ test_re (repn a 1 (Some 2)) "";
3232+ [%expect {| Not_found |}];
3333+ test_re (repn a 1 (Some 2)) "aaa";
3434+ [%expect {| [| (0, 2) |] |}];
3535+ invalid_argument (fun () -> repn empty (-1) None);
3636+ [%expect {| Invalid_argument "Re.repn" |}];
3737+ invalid_argument (fun () -> repn empty 1 (Some 0));
3838+ [%expect {| Invalid_argument "Re.repn" |}];
3939+ invalid_argument (fun () -> repn empty 4 (Some 3));
4040+ [%expect {| Invalid_argument "Re.repn" |}]
4141+;;
4242+4343+let%expect_test "rep1" =
4444+ test_re (rep1 (char 'a')) "a";
4545+ [%expect {| [| (0, 1) |] |}];
4646+ test_re (rep1 (char 'a')) "aa";
4747+ [%expect {| [| (0, 2) |] |}];
4848+ test_re (rep1 (char 'a')) "";
4949+ [%expect {| Not_found |}];
5050+ test_re (rep1 (char 'a')) "b";
5151+ [%expect {| Not_found |}]
5252+;;
5353+5454+let%expect_test "opt" =
5555+ test_re (opt (char 'a')) "";
5656+ [%expect {| [| (0, 0) |] |}];
5757+ test_re (opt (char 'a')) "a";
5858+ [%expect {| [| (0, 1) |] |}]
5959+;;
6060+6161+let copy s n =
6262+ let len = String.length s in
6363+ let b = Bytes.make (len * n) '\000' in
6464+ for i = 0 to n - 1 do
6565+ Bytes.blit_string s 0 b (i * len) len
6666+ done;
6767+ Bytes.to_string b
6868+;;
6969+7070+let%expect_test "repeat sequence" =
7171+ let s = "abcde" in
7272+ let re = str s |> rep |> whole_string |> compile in
7373+ for i = 0 to 3 do
7474+ let r = copy s i in
7575+ assert (Re.execp re r)
7676+ done;
7777+ [%expect {||}]
7878+;;
+76
vendor/opam/re/lib_test/expect/test_split.ml
···11+open Import
22+33+let re_whitespace = Re.Posix.compile_pat "[\t ]+"
44+let re_eol = Re.compile Re.eol
55+let re_bow = Re.compile Re.bow
66+let re_eow = Re.compile Re.eow
77+88+let%expect_test "split" =
99+ let split ?pos ?len re s = strings (Re.split ?pos ?len re s) in
1010+ split re_whitespace "aa bb c d ";
1111+ [%expect {| ["aa"; "bb"; "c"; "d"] |}];
1212+ split ~pos:1 ~len:4 re_whitespace "aa b c d";
1313+ [%expect {| ["a"; "b"] |}];
1414+ split re_whitespace " a full_word bc ";
1515+ [%expect {| ["a"; "full_word"; "bc"] |}];
1616+ split re_empty "abcd";
1717+ [%expect {| ["a"; "b"; "c"; "d"] |}];
1818+ split re_eol "a\nb";
1919+ [%expect {|
2020+ ["a"; "\nb"] |}];
2121+ split re_bow "a b";
2222+ [%expect {| ["a "; "b"] |}];
2323+ split re_eow "a b";
2424+ [%expect {| ["a"; " b"] |}];
2525+ split re_whitespace "";
2626+ [%expect {| [] |}];
2727+ split re_empty "";
2828+ [%expect {| [] |}]
2929+;;
3030+3131+let%expect_test "split_delim" =
3232+ let split_delim ?pos ?len re s = strings (Re.split_delim ?pos ?len re s) in
3333+ split_delim re_whitespace "aa bb c d ";
3434+ [%expect {| ["aa"; "bb"; "c"; "d"; ""] |}];
3535+ split_delim ~pos:1 ~len:4 re_whitespace "aa b c d";
3636+ [%expect {| ["a"; "b"; ""] |}];
3737+ split_delim re_whitespace " a full_word bc ";
3838+ [%expect {| [""; "a"; "full_word"; "bc"; ""] |}];
3939+ split_delim re_empty "abcd";
4040+ [%expect {| [""; "a"; "b"; "c"; "d"; ""] |}];
4141+ split_delim re_eol "a\nb";
4242+ [%expect {| ["a"; "\nb"; ""] |}];
4343+ split_delim re_bow "a b";
4444+ [%expect {| [""; "a "; "b"] |}];
4545+ split_delim re_eow "a b";
4646+ [%expect {| ["a"; " b"; ""] |}];
4747+ split_delim re_whitespace "";
4848+ [%expect {| [""] |}];
4949+ split_delim re_empty "";
5050+ [%expect {| [""; ""] |}]
5151+;;
5252+5353+let%expect_test "split_full" =
5454+ let split_full ?pos ?len re s =
5555+ let res = Re.split_full ?pos ?len re s in
5656+ Format.printf
5757+ "[%a]@."
5858+ Fmt.(
5959+ list ~pp_sep:(Fmt.lit "; ") (fun fmt what ->
6060+ match what with
6161+ | `Text s -> Format.fprintf fmt "`T %S" s
6262+ | `Delim s -> Format.fprintf fmt "`D %S" (Re.Group.get s 0)))
6363+ res
6464+ in
6565+ split_full re_whitespace "aa bb c d ";
6666+ [%expect {| [`T "aa"; `D " "; `T "bb"; `D " "; `T "c"; `D " "; `T "d"; `D " "] |}];
6767+ split_full ~pos:1 ~len:5 re_whitespace "aa \tb c d";
6868+ [%expect {| [`T "a"; `D " \t"; `T "b"; `D " "] |}];
6969+ split_full re_whitespace " a full_word bc ";
7070+ [%expect {| [`D " "; `T "a"; `D " "; `T "full_word"; `D " "; `T "bc"; `D " "] |}];
7171+ split_full re_empty "ab";
7272+ [%expect {| [`D ""; `T "a"; `D ""; `T "b"; `D ""] |}];
7373+ split_full Re.(compile (rep (char 'a'))) "cat";
7474+ [%expect {| [`D ""; `T "c"; `D "a"; `T "t"; `D ""] |}];
7575+ ()
7676+;;
+330
vendor/opam/re/lib_test/expect/test_str.ml
···11+open Import
22+33+module type Str_intf = module type of Str
44+55+module Test_matches (R : Str_intf) = struct
66+ let groups () =
77+ let group i =
88+ try `Found (R.group_beginning i) with
99+ | Not_found -> `Not_found
1010+ | Invalid_argument _ -> `Not_exists
1111+ in
1212+ let rec loop acc i =
1313+ match group i with
1414+ | `Found p -> loop ((p, R.group_end i) :: acc) (i + 1)
1515+ | `Not_found -> loop ((-1, -1) :: acc) (i + 1)
1616+ | `Not_exists -> List.rev acc
1717+ in
1818+ loop [] 0
1919+ ;;
2020+2121+ let eq_match ?(pos = 0) ?(case = true) r s =
2222+ let pat = if case then R.regexp r else R.regexp_case_fold r in
2323+ try
2424+ ignore (R.search_forward pat s pos);
2525+ Some (groups ())
2626+ with
2727+ | Not_found -> None
2828+ ;;
2929+3030+ let eq_match' ?(pos = 0) ?(case = true) r s =
3131+ let pat = if case then R.regexp r else R.regexp_case_fold r in
3232+ try
3333+ ignore (R.string_match pat s pos);
3434+ Some (groups ())
3535+ with
3636+ | Not_found -> None
3737+ ;;
3838+end
3939+4040+module T_str = Test_matches (Str)
4141+module T_re = Test_matches (Re.Str)
4242+4343+let test dyn_of_ok str re args =
4444+ let run f =
4545+ match f () with
4646+ | s -> Ok s
4747+ | exception exn -> Error exn
4848+ in
4949+ let str = run (fun () -> str args) in
5050+ let re = run (fun () -> re args) in
5151+ if not (Poly.equal str re)
5252+ then (
5353+ let printer x =
5454+ let dyn =
5555+ let open Dyn in
5656+ result dyn_of_ok (fun x -> string (Printexc.to_string x)) x
5757+ in
5858+ sexp_of_dyn dyn |> Base.Sexp.to_string_hum
5959+ in
6060+ Printf.printf "str: %s\n" (printer str);
6161+ Printf.printf "re: %s\n" (printer re))
6262+;;
6363+6464+let dyn_of_pairs x =
6565+ Dyn.option
6666+ (fun x ->
6767+ List.map x ~f:(fun (start, stop) ->
6868+ let open Dyn in
6969+ pair (int start) (int stop))
7070+ |> Dyn.list)
7171+ x
7272+;;
7373+7474+let split_result_conv =
7575+ List.map ~f:(function
7676+ | Str.Delim x -> Re.Str.Delim x
7777+ | Str.Text x -> Re.Str.Text x)
7878+;;
7979+8080+let dyn_split_result_list list =
8181+ List.map
8282+ list
8383+ ~f:
8484+ (let open Dyn in
8585+ function
8686+ | Re.Str.Delim x -> variant "Delim" [ string x ]
8787+ | Text s -> variant "Text" [ string s ])
8888+ |> Dyn.list
8989+;;
9090+9191+type ('a, 'b) test =
9292+ { name : string
9393+ ; dyn_of_ok : 'b -> Dyn.t
9494+ ; re_str : Re.Str.regexp -> 'a -> 'b
9595+ ; str : Str.regexp -> 'a -> 'b
9696+ }
9797+9898+let bounded_split_t =
9999+ { name = "bounded_split"
100100+ ; dyn_of_ok = (fun x -> Dyn.list (List.map x ~f:Dyn.string))
101101+ ; re_str = (fun re (s, n) -> Re.Str.bounded_split re s n)
102102+ ; str = (fun re (s, n) -> Str.bounded_split re s n)
103103+ }
104104+;;
105105+106106+let bounded_full_split_t =
107107+ { name = "bounded_full_split"
108108+ ; dyn_of_ok = dyn_split_result_list
109109+ ; re_str = (fun re (s, n) -> Re.Str.bounded_full_split re s n)
110110+ ; str = (fun re (s, n) -> split_result_conv (Str.bounded_full_split re s n))
111111+ }
112112+;;
113113+114114+let full_split_t =
115115+ { bounded_full_split_t with
116116+ name = "full_split"
117117+ ; re_str = (fun re s -> Re.Str.full_split re s)
118118+ ; str = (fun re s -> split_result_conv (Str.full_split re s))
119119+ }
120120+;;
121121+122122+let split_delim_t =
123123+ { name = "split_delim"
124124+ ; dyn_of_ok = (fun x -> Dyn.list (List.map x ~f:Dyn.string))
125125+ ; re_str = Re.Str.split_delim
126126+ ; str = Str.split_delim
127127+ }
128128+;;
129129+130130+let split_t =
131131+ { name = "split"
132132+ ; dyn_of_ok = (fun x -> Dyn.list (List.map x ~f:Dyn.string))
133133+ ; re_str = Re.Str.split
134134+ ; str = Str.split
135135+ }
136136+;;
137137+138138+let global_replace_t =
139139+ { name = "global_replace"
140140+ ; dyn_of_ok = Dyn.string
141141+ ; re_str = (fun re (r, s) -> Re.Str.global_replace re r s)
142142+ ; str = (fun re (r, s) -> Str.global_replace re r s)
143143+ }
144144+;;
145145+146146+let eq_match ?pos ?case re =
147147+ test dyn_of_pairs (T_str.eq_match ?pos ?case re) (T_re.eq_match ?pos ?case re)
148148+;;
149149+150150+let eq_match' ?pos ?case re =
151151+ test dyn_of_pairs (T_str.eq_match' ?pos ?case re) (T_re.eq_match' ?pos ?case re)
152152+;;
153153+154154+let test t re args =
155155+ test t.dyn_of_ok (t.re_str (Re.Str.regexp re)) (t.str (Str.regexp re)) args
156156+;;
157157+158158+let split_delim re s = test split_delim_t re s
159159+let split re s = test split_t re s
160160+let full_split re s = test full_split_t re s
161161+let bounded_split re s n = test bounded_split_t re (s, n)
162162+let bounded_full_split re s n = test bounded_full_split_t re (s, n)
163163+let global_replace re r s = test global_replace_t re (r, s)
164164+165165+let%expect_test "literal match" =
166166+ eq_match "a" "a";
167167+ eq_match "a" "b";
168168+ [%expect {||}]
169169+;;
170170+171171+let%expect_test "alt" =
172172+ eq_match "a\\|b" "a";
173173+ eq_match "a\\|b" "b";
174174+ eq_match "a\\|b" "c";
175175+ [%expect {||}]
176176+;;
177177+178178+let%expect_test "seq" =
179179+ eq_match "ab" "ab";
180180+ eq_match "ab" "ac";
181181+ [%expect {||}]
182182+;;
183183+184184+let%expect_test "epsilon" =
185185+ eq_match "" "";
186186+ eq_match "" "a";
187187+ [%expect {||}]
188188+;;
189189+190190+let%expect_test "rep" =
191191+ eq_match "a*" "";
192192+ eq_match "a*" "a";
193193+ eq_match "a*" "aa";
194194+ eq_match "a*" "b";
195195+ [%expect {||}]
196196+;;
197197+198198+let%expect_test "rep1" =
199199+ eq_match "a+" "a";
200200+ eq_match "a+" "aa";
201201+ eq_match "a+" "";
202202+ eq_match "a+" "b";
203203+ [%expect {| |}]
204204+;;
205205+206206+let%expect_test "opt" =
207207+ eq_match "a?" "";
208208+ eq_match "a?" "a";
209209+ [%expect {| |}]
210210+;;
211211+212212+let%expect_test "bol" =
213213+ eq_match "^a" "ab";
214214+ eq_match "^a" "b\na";
215215+ eq_match "^a" "ba";
216216+ [%expect {| |}]
217217+;;
218218+219219+let%expect_test "eol" =
220220+ eq_match "a$" "ba";
221221+ eq_match "a$" "a\nb";
222222+ eq_match "a$" "ba\n";
223223+ eq_match "a$" "ab";
224224+ [%expect {| |}]
225225+;;
226226+227227+let%expect_test "start" =
228228+ eq_match ~pos:1 "Za" "xab";
229229+ eq_match ~pos:1 "Za" "xb\na";
230230+ eq_match ~pos:1 "Za" "xba";
231231+ [%expect {||}]
232232+;;
233233+234234+let%expect_test "match semantics" =
235235+ eq_match "\\(a\\|b\\)*b" "aabaab";
236236+ eq_match "aa\\|aaa" "aaaa";
237237+ eq_match "aaa\\|aa" "aaaa";
238238+ [%expect {||}]
239239+;;
240240+241241+let%expect_test "Group (or submatch)" =
242242+ eq_match "\\(a\\)\\(a\\)?\\(b\\)" "ab";
243243+ [%expect {| |}];
244244+ eq_match "\\(foo" "foo";
245245+ [%expect {|
246246+ str: (Error "Failure(\"\\\\( group not closed by \\\\)\")")
247247+ re: (Error Re_private.Emacs.Parse_error)
248248+ |}]
249249+;;
250250+251251+let%expect_test "Character set" =
252252+ eq_match "[0-9]+" "0123456789";
253253+ eq_match "[0-9]+" "a";
254254+ eq_match "[9-0]+" "2";
255255+ eq_match "[5-5]" "5";
256256+ eq_match "[5-4]" "1";
257257+ eq_match' "[]]" "]";
258258+ eq_match' "[a-]" "-";
259259+ eq_match' "[-a]" "-";
260260+ eq_match' "]" "]";
261261+ eq_match' "[^b-f]" "z";
262262+ eq_match' "[^b-f]" "a";
263263+ [%expect {||}];
264264+ (* These errors aren't correct *)
265265+ eq_match' "[]" "x";
266266+ eq_match' "[" "[";
267267+ [%expect
268268+ {|
269269+ str: (Error "Failure(\"[ class not closed by ]\")")
270270+ re: (Error Re_private.Emacs.Parse_error)
271271+ str: (Error "Failure(\"[ class not closed by ]\")")
272272+ re: (Error Re_private.Emacs.Parse_error)
273273+ |}]
274274+;;
275275+276276+let%expect_test "compl" =
277277+ eq_match "[^0-9a-z]+" "A:Z+";
278278+ eq_match "[^0-9a-z]+" "0";
279279+ eq_match "[^0-9a-z]+" "a";
280280+ [%expect {||}]
281281+;;
282282+283283+let%expect_test "Word modifiers" =
284284+ eq_match' "\\bfoo" "foo";
285285+ eq_match' "\\<foo" "foo";
286286+ eq_match' "foo\\>" "foo";
287287+ eq_match' "z\\Bfoo" "zfoo";
288288+ eq_match' "\\`foo" "foo";
289289+ eq_match' "foo\\'" "foo";
290290+ [%expect {||}]
291291+;;
292292+293293+let%expect_test "Case modifiers" =
294294+ eq_match ~case:false "abc" "abc";
295295+ eq_match ~case:false "abc" "ABC";
296296+ [%expect {| |}]
297297+;;
298298+299299+let%expect_test "global_replace" =
300300+ global_replace "needle" "test" "needlehaystack";
301301+ global_replace "needle" "" "";
302302+ global_replace "needle" "" "needle";
303303+ global_replace "xxx" "yyy" "zzz";
304304+ global_replace "test\\([0-9]*\\)" "\\1-foo-\\1" "test100 test200 test";
305305+ global_replace "test\\([0-9]*\\)" "'\\-0'" "test100 test200 test";
306306+ (* Regrssion test for #129 *)
307307+ global_replace "\\(X+\\)" "A\\1YY" "XXXXXXZZZZ";
308308+ [%expect {||}]
309309+;;
310310+311311+let%expect_test "bounded_split, bounded_full_split" =
312312+ [ ",", "foo,bar,baz", 5
313313+ ; ",", "foo,bar,baz", 1
314314+ ; ",", "foo,bar,baz", 0
315315+ ; ",\\|", "foo,bar|baz", 4
316316+ ]
317317+ |> List.iter ~f:(fun (re, s, n) ->
318318+ bounded_full_split re s n;
319319+ bounded_split re s n);
320320+ [%expect {||}]
321321+;;
322322+323323+let%expect_test "split, full_split, split_delim" =
324324+ [ "re", ""; " ", "foo bar"; "\b", "one-two three"; "[0-9]", "One3TwoFive" ]
325325+ |> List.iter ~f:(fun (re, s) ->
326326+ split re s;
327327+ full_split re s;
328328+ split_delim re s);
329329+ [%expect {||}]
330330+;;
+203
vendor/opam/re/lib_test/expect/test_stream.ml
···11+open Import
22+module Stream = Re.Stream
33+44+let feed t str =
55+ let res = Stream.feed t str ~pos:0 ~len:(String.length str) in
66+ let () =
77+ match res with
88+ | No_match -> Printf.printf "%S did not match\n" str
99+ | Ok s ->
1010+ let status =
1111+ match Stream.finalize s "" ~pos:0 ~len:0 with
1212+ | true -> "matched"
1313+ | false -> "unmatched"
1414+ in
1515+ Printf.printf "%S not matched (status = %s)\n" str status
1616+ in
1717+ res
1818+;;
1919+2020+let%expect_test "out out of bounds" =
2121+ let stream = Re.any |> Re.compile |> Stream.create in
2222+ invalid_argument (fun () -> ignore (Stream.feed stream "foo" ~pos:2 ~len:3));
2323+ [%expect {| Invalid_argument "index out of bounds" |}];
2424+ invalid_argument (fun () -> ignore (Stream.finalize stream "foo" ~pos:2 ~len:3));
2525+ [%expect {| Invalid_argument "index out of bounds" |}];
2626+ let stream = Stream.Group.create stream in
2727+ invalid_argument (fun () -> ignore (Stream.Group.feed stream "foo" ~pos:2 ~len:3));
2828+ [%expect {| Invalid_argument "index out of bounds" |}];
2929+ invalid_argument (fun () -> ignore (Stream.Group.finalize stream "foo" ~pos:2 ~len:3));
3030+ [%expect {| Invalid_argument "index out of bounds" |}]
3131+;;
3232+3333+let%expect_test "basic" =
3434+ let s = [ Re.bos; Re.str "abab" ] |> Re.seq |> Re.compile |> Stream.create in
3535+ ignore (feed s "x");
3636+ [%expect {| "x" did not match |}];
3737+ let suffix = "ab" in
3838+ let s =
3939+ match feed s suffix with
4040+ | Ok s -> s
4141+ | No_match -> assert false
4242+ in
4343+ [%expect {|
4444+ "ab" not matched (status = unmatched) |}];
4545+ (let (_ : _ Stream.feed) = feed s "ab" in
4646+ [%expect {|
4747+ "ab" not matched (status = matched) |}]);
4848+ let (_ : _ Stream.feed) = feed s "xy" in
4949+ [%expect {|
5050+ "xy" did not match |}]
5151+;;
5252+5353+let%expect_test "eos" =
5454+ let s = [ Re.str "zzz"; Re.eos ] |> Re.seq |> Re.compile |> Stream.create in
5555+ ignore (feed s "zzz");
5656+ [%expect {| "zzz" not matched (status = matched) |}];
5757+ let s =
5858+ match feed s "z" with
5959+ | Ok s -> s
6060+ | No_match -> assert false
6161+ in
6262+ [%expect {| "z" not matched (status = unmatched) |}];
6363+ (let str = "zz" in
6464+ match Stream.finalize s str ~pos:0 ~len:(String.length str) with
6565+ | true -> ()
6666+ | false -> assert false);
6767+ [%expect {||}]
6868+;;
6969+7070+let%expect_test "finalize empty" =
7171+ let s = "abde" in
7272+ let stream =
7373+ let stream = Re.str s |> Re.whole_string |> Re.compile |> Stream.create in
7474+ match feed stream s with
7575+ | Ok s -> s
7676+ | No_match -> assert false
7777+ in
7878+ assert (Stream.finalize stream "" ~pos:0 ~len:0);
7979+ [%expect {| "abde" not matched (status = matched) |}]
8080+;;
8181+8282+let%expect_test "group - basic" =
8383+ let s =
8484+ let open Re in
8585+ str "foo" |> whole_string |> group |> compile |> Stream.create
8686+ in
8787+ let g = Stream.Group.create s in
8888+ let g =
8989+ match Stream.Group.feed g "f" ~pos:0 ~len:1 with
9090+ | No_match -> assert false
9191+ | Ok s -> s
9292+ in
9393+ (match Stream.Group.finalize g "oo" ~pos:0 ~len:2 with
9494+ | Ok _ -> ()
9595+ | No_match -> assert false);
9696+ [%expect {| |}]
9797+;;
9898+9999+let pmarks set m =
100100+ Printf.printf "mark present %b\n" (Re.Stream.Group.Match.test_mark set m)
101101+;;
102102+103103+let%expect_test "group - mark entire string must match" =
104104+ let m1, f = Re.(mark (char 'f')) in
105105+ let m2, oo = Re.(mark (str "oo")) in
106106+ let re =
107107+ let open Re in
108108+ [ f; oo ] |> seq |> compile
109109+ in
110110+ let s = Stream.create re in
111111+ let g = Stream.Group.create s in
112112+ let g =
113113+ match Stream.Group.feed g "f" ~pos:0 ~len:1 with
114114+ | No_match -> assert false
115115+ | Ok s -> s
116116+ in
117117+ let g =
118118+ match Stream.Group.finalize g "oo" ~pos:0 ~len:2 with
119119+ | Ok g -> g
120120+ | No_match -> assert false
121121+ in
122122+ pmarks g m1;
123123+ [%expect {| mark present true |}];
124124+ pmarks g m2;
125125+ [%expect {| mark present true |}]
126126+;;
127127+128128+let%expect_test "group - partial mark match" =
129129+ let m, foo = Re.(mark (str "foo")) in
130130+ let re = Re.compile foo in
131131+ let s = Stream.create re in
132132+ let g = Stream.Group.create s in
133133+ let g =
134134+ match Stream.Group.feed g "xx" ~pos:0 ~len:2 with
135135+ | No_match -> assert false
136136+ | Ok g -> g
137137+ in
138138+ let g =
139139+ match Stream.Group.feed g "foo" ~pos:0 ~len:3 with
140140+ | Ok g -> g
141141+ | No_match -> assert false
142142+ in
143143+ let g =
144144+ match Stream.Group.finalize g "garb" ~pos:0 ~len:4 with
145145+ | Ok g -> g
146146+ | No_match -> assert false
147147+ in
148148+ pmarks g m;
149149+ [%expect {| mark present true |}]
150150+;;
151151+152152+let print_match match_ n =
153153+ match Stream.Group.Match.get match_ n with
154154+ | None -> Printf.printf "match %d: <not found>\n" n
155155+ | Some s -> Printf.printf "match %d: %s\n" n s
156156+;;
157157+158158+let%expect_test "group - match group" =
159159+ let stream =
160160+ let re = Re.Pcre.re "_([a-z]+)_" |> Re.whole_string |> Re.compile in
161161+ Stream.Group.create (Stream.create re)
162162+ in
163163+ let s = "_abc_" in
164164+ let () =
165165+ match Stream.Group.finalize stream s ~pos:0 ~len:(String.length s) with
166166+ | No_match -> assert false
167167+ | Ok m ->
168168+ for i = 0 to 1 do
169169+ print_match m i
170170+ done
171171+ in
172172+ [%expect {|
173173+ match 0: _abc_
174174+ match 1: abc
175175+ |}]
176176+;;
177177+178178+let%expect_test "group - match group" =
179179+ let stream =
180180+ let re = Re.Pcre.re "_([a-z]+)__([a-z]+)_" |> Re.whole_string |> Re.compile in
181181+ Stream.Group.create (Stream.create re)
182182+ in
183183+ let s = "_abc_" in
184184+ let stream =
185185+ match Stream.Group.feed stream s ~pos:0 ~len:(String.length s) with
186186+ | No_match -> assert false
187187+ | Ok m -> m
188188+ in
189189+ let s = "_de_" in
190190+ let () =
191191+ match Stream.Group.finalize stream s ~pos:0 ~len:(String.length s) with
192192+ | No_match -> assert false
193193+ | Ok m ->
194194+ for i = 0 to 2 do
195195+ print_match m i
196196+ done
197197+ in
198198+ [%expect {|
199199+ match 0: _abc__de_
200200+ match 1: abc
201201+ match 2: de
202202+ |}]
203203+;;
+12
vendor/opam/re/lib_test/expect/test_validation.ml
···11+open Import
22+33+let () = Printexc.record_backtrace false
44+let any = Re.(compile (rep any))
55+66+let%expect_test "bound errors" =
77+ let (_ : bool) = Re.execp any ~pos:4 "foo" in
88+ [%expect {| |}];
99+ let (_ : bool) = Re.execp any ~pos:1 ~len:3 "foo" in
1010+ [%expect.unreachable]
1111+[@@expect.uncaught_exn {| (Invalid_argument "Re.exec: out of bounds") |}]
1212+;;
+6
vendor/opam/re/lib_test/expect/test_view.ml
···11+open Import
22+33+let%expect_test "view" =
44+ let view = Re.View.view (Re.str "foo") in
55+ ignore view
66+;;