···11+v1.7.0 05-11-2025 Paris (France)
22+--------------------------------
33+44+* Fix categories on RSS2 (@yawaramin, @dinosaure, #87)
55+66+v1.6.1 05-20-2019 Mons (Belgium)
77+--------------------------------
88+99+* `Syndic.Date.of_rfc822` accepts dates such as “May 15th, 2019”.
1010+1111+v1.6.0 01-21-2019 Paris (France)
1212+--------------------------------
1313+1414+* Lost support of OCaml 4.03.0
1515+* Support of OCaml 4.07.0
1616+* Fix tests
1717+ * Move to `ocurl`
1818+ * Add materials to test in distribution
1919+* Accept entries with empty author (@thomas-huet)
2020+* Dunify project
2121+* Added Z timezone to comply with RFC 822 and reality (@wictory)
2222+* Accept empty CDATA in copyright field (@sgrove, @dinosaure)
+20
stack/syndic/LICENSE
···11+The MIT License (MIT)
22+33+Copyright (c) 2014 Cumulus
44+55+Permission is hereby granted, free of charge, to any person obtaining a copy of
66+this software and associated documentation files (the "Software"), to deal in
77+the Software without restriction, including without limitation the rights to
88+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
99+the Software, and to permit persons to whom the Software is furnished to do so,
1010+subject to the following conditions:
1111+1212+The above copyright notice and this permission notice shall be included in all
1313+copies or substantial portions of the Software.
1414+1515+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1616+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
1717+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
1818+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
1919+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2020+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+19
stack/syndic/README.md
···11+Syndic
22+======
33+44+RSS and Atom feed parsing
55+66+Documentation
77+=============
88+99+You can find it [here](http://cumulus.github.io/Syndic/).
1010+1111+Build Requirements
1212+==================
1313+1414+ * OCaml >= 4.01.0
1515+ * Ptime >= 0.8.0
1616+ * Xmlm >= 1.2.0
1717+ * Uri >= 1.3.1
1818+1919+[](https://travis-ci.org/Cumulus/Syndic)
···11+module Rss1 = Syndic_rss1
22+module Rss2 = Syndic_rss2
33+module Atom = Syndic_atom
44+module Opml1 = Syndic_opml1
55+module W3C = Syndic_w3c
66+module Date = Syndic_date
77+module XML = Syndic_xml
+1605
stack/syndic/lib/syndic_atom.ml
···11+open Syndic_common.XML
22+open Syndic_common.Util
33+module XML = Syndic_xml
44+module Error = Syndic_error
55+module Date = Syndic_date
66+77+let atom_ns = "http://www.w3.org/2005/Atom"
88+let xhtml_ns = "http://www.w3.org/1999/xhtml"
99+let namespaces = [atom_ns]
1010+1111+type rel = Alternate | Related | Self | Enclosure | Via | Link of Uri.t
1212+1313+type link =
1414+ { href: Uri.t
1515+ ; rel: rel
1616+ ; type_media: string option
1717+ ; hreflang: string option
1818+ ; title: string
1919+ ; length: int option }
2020+2121+let link ?type_media ?hreflang ?(title = "") ?length ?(rel = Alternate) href =
2222+ {href; rel; type_media; hreflang; title; length}
2323+2424+type link' =
2525+ [ `HREF of Uri.t
2626+ | `Rel of string
2727+ | `Type of string
2828+ | `HREFLang of string
2929+ | `Title of string
3030+ | `Length of string ]
3131+3232+(* The actual XML content is supposed to be inside a <div> which is NOT part of
3333+ the content. *)
3434+let rec get_xml_content xml0 = function
3535+ | XML.Data (_, s) :: tl ->
3636+ if only_whitespace s then get_xml_content xml0 tl
3737+ else xml0 (* unexpected *)
3838+ | XML.Node (_pos, tag, data) :: tl when tag_is tag "div" ->
3939+ let is_space =
4040+ List.for_all
4141+ (function XML.Data (_, s) -> only_whitespace s | _ -> false)
4242+ tl
4343+ in
4444+ if is_space then data else xml0
4545+ | _ -> xml0
4646+4747+let no_namespace = Some ""
4848+let rm_namespace _ = no_namespace
4949+5050+(* For HTML, the spec says the whole content needs to be escaped
5151+ http://tools.ietf.org/html/rfc4287#section-3.1.1.2 (some feeds use <![CDATA[
5252+ ]]>) so a single data item should be present. If not, assume the HTML was
5353+ properly parsed and convert it back to a string as it should. *)
5454+let get_html_content html =
5555+ match html with
5656+ | [XML.Data (_, d)] -> d
5757+ | h ->
5858+ (* It is likely that, when the HTML was parsed, the Atom namespace was
5959+ applied. Remove it. *)
6060+ String.concat "" (List.map (XML.to_string ~ns_prefix:rm_namespace) h)
6161+6262+type text_construct =
6363+ | Text of string
6464+ | Html of Uri.t option * string
6565+ | Xhtml of Uri.t option * XML.t list
6666+6767+let text_construct_of_xml ~xmlbase
6868+ ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) =
6969+ let xmlbase = xmlbase_of_attr ~xmlbase attr in
7070+ match find (fun a -> attr_is a "type") attr with
7171+ | Some (_, "html") -> Html (xmlbase, get_html_content data)
7272+ | Some (_, "application/xhtml+xml") | Some (_, "xhtml") ->
7373+ Xhtml (xmlbase, get_xml_content data data)
7474+ | _ -> Text (get_leaf data)
7575+7676+type author = {name: string; uri: Uri.t option; email: string option}
7777+7878+let empty_author = {name= ""; uri= None; email= None}
7979+let not_empty_author a = a.name <> "" || a.uri <> None || a.email <> None
8080+let author ?uri ?email name = {uri; email; name}
8181+8282+type person' = [`Name of string | `URI of Uri.t | `Email of string]
8383+8484+let make_person datas ~pos:_ (l : [< person'] list) =
8585+ (* element atom:name { text } *)
8686+ let name =
8787+ match find (function `Name _ -> true | _ -> false) l with
8888+ | Some (`Name s) -> s
8989+ | _ ->
9090+ (* The spec mandates that <author><name>name</name></author> but
9191+ several feeds just do <author>name</author> *)
9292+ get_leaf datas
9393+ in
9494+ (* element atom:uri { atomUri }? *)
9595+ let uri =
9696+ match find (function `URI _ -> true | _ -> false) l with
9797+ | Some (`URI u) -> Some u
9898+ | _ -> None
9999+ in
100100+ (* element atom:email { atomEmailAddress }? *)
101101+ let email =
102102+ match find (function `Email _ -> true | _ -> false) l with
103103+ | Some (`Email e) -> Some e
104104+ | _ -> None
105105+ in
106106+ ({name; uri; email} : author)
107107+108108+let make_author datas ~pos a = `Author (make_person datas ~pos a)
109109+110110+let person_name_of_xml ~xmlbase:_ (_pos, _tag, datas) =
111111+ `Name (try get_leaf datas with Not_found -> "")
112112+113113+(* mandatory ? *)
114114+115115+let person_uri_of_xml ~xmlbase (pos, _tag, datas) =
116116+ try `URI (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
117117+ with Not_found ->
118118+ raise
119119+ (Error.Error (pos, "The content of <uri> MUST be a non-empty string"))
120120+121121+let person_email_of_xml ~xmlbase:_ (_pos, _tag, datas) =
122122+ `Email (try get_leaf datas with Not_found -> "")
123123+124124+(* mandatory ? *)
125125+126126+(* {[ atomAuthor = element atom:author { atomPersonConstruct } ]} where
127127+128128+ atomPersonConstruct = atomCommonAttributes, (element atom:name { text } &
129129+ element atom:uri { atomUri }? & element atom:email { atomEmailAddress }? &
130130+ extensionElement * )
131131+132132+ This specification assigns no significance to the order of appearance of the
133133+ child elements in a Person construct. *)
134134+let person_data_producer =
135135+ [ ("name", person_name_of_xml)
136136+ ; ("uri", person_uri_of_xml)
137137+ ; ("email", person_email_of_xml) ]
138138+139139+let author_of_xml ~xmlbase ((_, _, datas) as xml) =
140140+ generate_catcher ~namespaces ~data_producer:person_data_producer
141141+ (make_author datas) ~xmlbase xml
142142+143143+type uri = Uri.t option * string
144144+type person = [`Email of string | `Name of string | `URI of uri] list
145145+146146+let person_data_producer' =
147147+ [ ("name", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Name a))
148148+ ; ("uri", dummy_of_xml ~ctor:(fun ~xmlbase a -> `URI (xmlbase, a)))
149149+ ; ("email", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Email a)) ]
150150+151151+let author_of_xml' =
152152+ generate_catcher ~namespaces ~data_producer:person_data_producer'
153153+ (fun ~pos:_ x -> `Author x )
154154+155155+type category = {term: string; scheme: Uri.t option; label: string option}
156156+157157+let category ?scheme ?label term = {scheme; label; term}
158158+159159+type category' = [`Term of string | `Scheme of Uri.t | `Label of string]
160160+161161+let make_category ~pos (l : [< category'] list) =
162162+ (* attribute term { text } *)
163163+ let term =
164164+ match find (function `Term _ -> true | _ -> false) l with
165165+ | Some (`Term t) -> t
166166+ | _ ->
167167+ raise
168168+ (Error.Error (pos, "Category elements MUST have a 'term' attribute"))
169169+ in
170170+ (* attribute scheme { atomUri }? *)
171171+ let scheme =
172172+ match find (function `Scheme _ -> true | _ -> false) l with
173173+ | Some (`Scheme u) -> Some u
174174+ | _ -> None
175175+ in
176176+ (* attribute label { text }? *)
177177+ let label =
178178+ match find (function `Label _ -> true | _ -> false) l with
179179+ | Some (`Label l) -> Some l
180180+ | _ -> None
181181+ in
182182+ `Category ({term; scheme; label} : category)
183183+184184+let scheme_of_xml ~xmlbase a = `Scheme (XML.resolve ~xmlbase (Uri.of_string a))
185185+186186+(* atomCategory = element atom:category { atomCommonAttributes, attribute term
187187+ { text }, attribute scheme { atomUri }?, attribute label { text }?,
188188+ undefinedContent } *)
189189+let category_attr_producer =
190190+ [ ("term", fun ~xmlbase:_ a -> `Term a)
191191+ ; ("label", fun ~xmlbase:_ a -> `Label a) ]
192192+193193+let category_of_xml =
194194+ let attr_producer = ("scheme", scheme_of_xml) :: category_attr_producer in
195195+ generate_catcher ~attr_producer make_category
196196+197197+let category_of_xml' =
198198+ let attr_producer =
199199+ ("scheme", fun ~xmlbase:_ a -> `Scheme a) :: category_attr_producer
200200+ in
201201+ generate_catcher ~attr_producer (fun ~pos:_ x -> `Category x)
202202+203203+let make_contributor datas ~pos a = `Contributor (make_person datas ~pos a)
204204+205205+let contributor_of_xml ~xmlbase ((_, _, datas) as xml) =
206206+ generate_catcher ~namespaces ~data_producer:person_data_producer
207207+ (make_contributor datas) ~xmlbase xml
208208+209209+let contributor_of_xml' =
210210+ generate_catcher ~namespaces ~data_producer:person_data_producer'
211211+ (fun ~pos:_ x -> `Contributor x )
212212+213213+type generator = {version: string option; uri: Uri.t option; content: string}
214214+215215+let generator ?uri ?version content = {uri; version; content}
216216+217217+type generator' = [`URI of Uri.t | `Version of string | `Content of string]
218218+219219+let make_generator ~pos (l : [< generator'] list) =
220220+ (* text *)
221221+ let content =
222222+ match find (function `Content _ -> true | _ -> false) l with
223223+ | Some (`Content c) -> c
224224+ | _ ->
225225+ raise
226226+ (Error.Error
227227+ (pos, "The content of <generator> MUST be a non-empty string"))
228228+ in
229229+ (* attribute version { text }? *)
230230+ let version =
231231+ match find (function `Version _ -> true | _ -> false) l with
232232+ | Some (`Version v) -> Some v
233233+ | _ -> None
234234+ in
235235+ (* attribute uri { atomUri }? *)
236236+ let uri =
237237+ match find (function `URI _ -> true | _ -> false) l with
238238+ | Some (`URI u) -> Some u
239239+ | _ -> None
240240+ in
241241+ `Generator ({version; uri; content} : generator)
242242+243243+(* URI, if present, MUST be an IRI reference [RFC3987]. The definition of "IRI"
244244+ excludes relative references but we resolve it anyway in case this is not
245245+ respected by the generator. *)
246246+let generator_uri_of_xml ~xmlbase a =
247247+ `URI (XML.resolve ~xmlbase (Uri.of_string a))
248248+249249+(* atomGenerator = element atom:generator { atomCommonAttributes, attribute uri
250250+ { atomUri }?, attribute version { text }?, text } *)
251251+let generator_of_xml =
252252+ let attr_producer =
253253+ [("version", fun ~xmlbase:_ a -> `Version a); ("uri", generator_uri_of_xml)]
254254+ in
255255+ let leaf_producer ~xmlbase:_ _pos data = `Content data in
256256+ generate_catcher ~attr_producer ~leaf_producer make_generator
257257+258258+let generator_of_xml' =
259259+ let attr_producer =
260260+ [ ("version", fun ~xmlbase:_ a -> `Version a)
261261+ ; ("uri", fun ~xmlbase a -> `URI (xmlbase, a)) ]
262262+ in
263263+ let leaf_producer ~xmlbase:_ _pos data = `Content data in
264264+ generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Generator x)
265265+266266+type icon = Uri.t
267267+268268+let make_icon ~pos (l : Uri.t list) =
269269+ (* (atomUri) *)
270270+ let uri =
271271+ match l with
272272+ | u :: _ -> u
273273+ | [] ->
274274+ raise
275275+ (Error.Error (pos, "The content of <icon> MUST be a non-empty string"))
276276+ in
277277+ `Icon uri
278278+279279+(* atomIcon = element atom:icon { atomCommonAttributes, } *)
280280+let icon_of_xml =
281281+ let leaf_producer ~xmlbase _pos data =
282282+ XML.resolve ~xmlbase (Uri.of_string data)
283283+ in
284284+ generate_catcher ~leaf_producer make_icon
285285+286286+let icon_of_xml' =
287287+ let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in
288288+ generate_catcher ~leaf_producer (fun ~pos:_ x -> `Icon x)
289289+290290+type id = Uri.t
291291+292292+let make_id ~pos (l : string list) =
293293+ (* (atomUri) *)
294294+ let id =
295295+ match l with
296296+ | u :: _ -> Uri.of_string u
297297+ | [] ->
298298+ raise
299299+ (Error.Error (pos, "The content of <id> MUST be a non-empty string"))
300300+ in
301301+ `ID id
302302+303303+(* atomId = element atom:id { atomCommonAttributes, (atomUri) } *)
304304+let id_of_xml, id_of_xml' =
305305+ let leaf_producer ~xmlbase:_ _pos data = data in
306306+ ( generate_catcher ~leaf_producer make_id
307307+ , generate_catcher ~leaf_producer (fun ~pos:_ x -> `ID x) )
308308+309309+let rel_of_string s =
310310+ match String.lowercase_ascii (String.trim s) with
311311+ | "alternate" -> Alternate
312312+ | "related" -> Related
313313+ | "self" -> Self
314314+ | "enclosure" -> Enclosure
315315+ | "via" -> Via
316316+ | uri ->
317317+ (* RFC 4287 § 4.2.7.2: the use of a relative reference other than a
318318+ simple name is not allowed. Thus no need to resolve against xml:base. *)
319319+ Link (Uri.of_string uri)
320320+321321+let make_link ~pos (l : [< link'] list) =
322322+ (* attribute href { atomUri } *)
323323+ let href =
324324+ match find (function `HREF _ -> true | _ -> false) l with
325325+ | Some (`HREF u) -> u
326326+ | _ ->
327327+ raise (Error.Error (pos, "Link elements MUST have a 'href' attribute"))
328328+ in
329329+ (* attribute rel { atomNCName | atomUri }? *)
330330+ let rel =
331331+ match find (function `Rel _ -> true | _ -> false) l with
332332+ | Some (`Rel r) -> rel_of_string r
333333+ | _ -> Alternate
334334+ (* cf. RFC 4287 § 4.2.7.2 *)
335335+ in
336336+ (* attribute type { atomMediaType }? *)
337337+ let type_media =
338338+ match find (function `Type _ -> true | _ -> false) l with
339339+ | Some (`Type t) -> Some t
340340+ | _ -> None
341341+ in
342342+ (* attribute hreflang { atomLanguageTag }? *)
343343+ let hreflang =
344344+ match find (function `HREFLang _ -> true | _ -> false) l with
345345+ | Some (`HREFLang l) -> Some l
346346+ | _ -> None
347347+ in
348348+ (* attribute title { text }? *)
349349+ let title =
350350+ match find (function `Title _ -> true | _ -> false) l with
351351+ | Some (`Title s) -> s
352352+ | _ -> ""
353353+ in
354354+ (* attribute length { text }? *)
355355+ let length =
356356+ match find (function `Length _ -> true | _ -> false) l with
357357+ | Some (`Length i) -> Some (int_of_string i)
358358+ | _ -> None
359359+ in
360360+ `Link ({href; rel; type_media; hreflang; title; length} : link)
361361+362362+let link_href_of_xml ~xmlbase a =
363363+ `HREF (XML.resolve ~xmlbase (Uri.of_string a))
364364+365365+(* atomLink = element atom:link { atomCommonAttributes, attribute href {
366366+ atomUri }, attribute rel { atomNCName | atomUri }?, attribute type {
367367+ atomMediaType }?, attribute hreflang { atomLanguageTag }?, attribute title {
368368+ text }?, attribute length { text }?, undefinedContent } *)
369369+let link_attr_producer =
370370+ [ ("rel", fun ~xmlbase:_ a -> `Rel a)
371371+ ; ("type", fun ~xmlbase:_ a -> `Type a)
372372+ ; ("hreflang", fun ~xmlbase:_ a -> `HREFLang a)
373373+ ; ("title", fun ~xmlbase:_ a -> `Title a)
374374+ ; ("length", fun ~xmlbase:_ a -> `Length a) ]
375375+376376+let link_of_xml =
377377+ let attr_producer = ("href", link_href_of_xml) :: link_attr_producer in
378378+ generate_catcher ~attr_producer make_link
379379+380380+let link_of_xml' =
381381+ let attr_producer =
382382+ ("href", fun ~xmlbase:_ a -> `HREF a) :: link_attr_producer
383383+ in
384384+ generate_catcher ~attr_producer (fun ~pos:_ x -> `Link x)
385385+386386+type logo = Uri.t
387387+388388+let make_logo ~pos (l : Uri.t list) =
389389+ (* (atomUri) *)
390390+ let uri =
391391+ match l with
392392+ | u :: _ -> u
393393+ | [] ->
394394+ raise
395395+ (Error.Error (pos, "The content of <logo> MUST be a non-empty string"))
396396+ in
397397+ `Logo uri
398398+399399+(* atomLogo = element atom:logo { atomCommonAttributes, (atomUri) } *)
400400+let logo_of_xml =
401401+ let leaf_producer ~xmlbase _pos data =
402402+ XML.resolve ~xmlbase (Uri.of_string data)
403403+ in
404404+ generate_catcher ~leaf_producer make_logo
405405+406406+let logo_of_xml' =
407407+ let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in
408408+ generate_catcher ~leaf_producer (fun ~pos:_ x -> `Logo x)
409409+410410+type published = Date.t
411411+type published' = [`Date of string]
412412+413413+let make_published ~pos (l : [< published'] list) =
414414+ (* atom:published { atomDateConstruct } *)
415415+ let date =
416416+ match find (fun (`Date _) -> true) l with
417417+ | Some (`Date d) -> Date.of_rfc3339 d
418418+ | _ ->
419419+ raise
420420+ (Error.Error
421421+ (pos, "The content of <published> MUST be a non-empty string"))
422422+ in
423423+ `Published date
424424+425425+(* atomPublished = element atom:published { atomDateConstruct } *)
426426+let published_of_xml, published_of_xml' =
427427+ let leaf_producer ~xmlbase:_ _pos data = `Date data in
428428+ ( generate_catcher ~leaf_producer make_published
429429+ , generate_catcher ~leaf_producer (fun ~pos:_ x -> `Published x) )
430430+431431+type rights = text_construct
432432+433433+let rights_of_xml ~xmlbase a = `Rights (text_construct_of_xml ~xmlbase a)
434434+435435+(* atomRights = element atom:rights { atomTextConstruct } *)
436436+let rights_of_xml' ~xmlbase:_
437437+ ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) =
438438+ `Rights data
439439+440440+type title = text_construct
441441+442442+let title_of_xml ~xmlbase a = `Title (text_construct_of_xml ~xmlbase a)
443443+444444+(* atomTitle = element atom:title { atomTextConstruct } *)
445445+let title_of_xml' ~xmlbase:_
446446+ ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) =
447447+ `Title data
448448+449449+type subtitle = text_construct
450450+451451+let subtitle_of_xml ~xmlbase a = `Subtitle (text_construct_of_xml ~xmlbase a)
452452+453453+(* atomSubtitle = element atom:subtitle { atomTextConstruct } *)
454454+let subtitle_of_xml' ~xmlbase:_
455455+ ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) =
456456+ `Subtitle data
457457+458458+type updated = Date.t
459459+type updated' = [`Date of string]
460460+461461+let make_updated ~pos (l : [< updated'] list) =
462462+ (* atom:updated { atomDateConstruct } *)
463463+ let updated =
464464+ match find (fun (`Date _) -> true) l with
465465+ | Some (`Date d) -> Date.of_rfc3339 d
466466+ | _ ->
467467+ raise
468468+ (Error.Error
469469+ (pos, "The content of <updated> MUST be a non-empty string"))
470470+ in
471471+ `Updated updated
472472+473473+(* atomUpdated = element atom:updated { atomDateConstruct } *)
474474+let updated_of_xml, updated_of_xml' =
475475+ let leaf_producer ~xmlbase:_ _pos data = `Date data in
476476+ ( generate_catcher ~leaf_producer make_updated
477477+ , generate_catcher ~leaf_producer (fun ~pos:_ x -> `Updated x) )
478478+479479+type source =
480480+ { authors: author list
481481+ ; categories: category list
482482+ ; contributors: author list
483483+ ; generator: generator option
484484+ ; icon: icon option
485485+ ; id: id
486486+ ; links: link list
487487+ ; logo: logo option
488488+ ; rights: rights option
489489+ ; subtitle: subtitle option
490490+ ; title: title
491491+ ; updated: updated option }
492492+493493+let source ?(categories = []) ?(contributors = []) ?generator ?icon
494494+ ?(links = []) ?logo ?rights ?subtitle ?updated ~authors ~id title =
495495+ { authors
496496+ ; categories
497497+ ; contributors
498498+ ; generator
499499+ ; icon
500500+ ; id
501501+ ; links
502502+ ; logo
503503+ ; rights
504504+ ; subtitle
505505+ ; title
506506+ ; updated }
507507+508508+type source' =
509509+ [ `Author of author
510510+ | `Category of category
511511+ | `Contributor of author
512512+ | `Generator of generator
513513+ | `Icon of icon
514514+ | `ID of id
515515+ | `Link of link
516516+ | `Logo of logo
517517+ | `Subtitle of subtitle
518518+ | `Title of title
519519+ | `Rights of rights
520520+ | `Updated of updated ]
521521+522522+let make_source ~pos (l : [< source'] list) =
523523+ (* atomAuthor* *)
524524+ let authors =
525525+ List.fold_left
526526+ (fun acc -> function `Author x -> x :: acc | _ -> acc)
527527+ [] l
528528+ in
529529+ (* atomCategory* *)
530530+ let categories =
531531+ List.fold_left
532532+ (fun acc -> function `Category x -> x :: acc | _ -> acc)
533533+ [] l
534534+ in
535535+ (* atomContributor* *)
536536+ let contributors =
537537+ List.fold_left
538538+ (fun acc -> function `Contributor x -> x :: acc | _ -> acc)
539539+ [] l
540540+ in
541541+ (* atomGenerator? *)
542542+ let generator =
543543+ match find (function `Generator _ -> true | _ -> false) l with
544544+ | Some (`Generator g) -> Some g
545545+ | _ -> None
546546+ in
547547+ (* atomIcon? *)
548548+ let icon =
549549+ match find (function `Icon _ -> true | _ -> false) l with
550550+ | Some (`Icon u) -> Some u
551551+ | _ -> None
552552+ in
553553+ (* atomId? *)
554554+ let id =
555555+ match find (function `ID _ -> true | _ -> false) l with
556556+ | Some (`ID i) -> i
557557+ | _ ->
558558+ raise
559559+ (Error.Error
560560+ (pos, "<source> elements MUST contains exactly one <id> elements"))
561561+ in
562562+ (* atomLink* *)
563563+ let links =
564564+ List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l
565565+ in
566566+ (* atomLogo? *)
567567+ let logo =
568568+ match find (function `Logo _ -> true | _ -> false) l with
569569+ | Some (`Logo u) -> Some u
570570+ | _ -> None
571571+ in
572572+ (* atomRights? *)
573573+ let rights =
574574+ match find (function `Rights _ -> true | _ -> false) l with
575575+ | Some (`Rights r) -> Some r
576576+ | _ -> None
577577+ in
578578+ (* atomSubtitle? *)
579579+ let subtitle =
580580+ match find (function `Subtitle _ -> true | _ -> false) l with
581581+ | Some (`Subtitle s) -> Some s
582582+ | _ -> None
583583+ in
584584+ (* atomTitle? *)
585585+ let title =
586586+ match find (function `Title _ -> true | _ -> false) l with
587587+ | Some (`Title s) -> s
588588+ | _ ->
589589+ raise
590590+ (Error.Error
591591+ ( pos
592592+ , "<source> elements MUST contains exactly one <title> elements"
593593+ ))
594594+ in
595595+ (* atomUpdated? *)
596596+ let updated =
597597+ match find (function `Updated _ -> true | _ -> false) l with
598598+ | Some (`Updated d) -> Some d
599599+ | _ -> None
600600+ in
601601+ `Source
602602+ ( { authors
603603+ ; categories
604604+ ; contributors
605605+ ; generator
606606+ ; icon
607607+ ; id
608608+ ; links
609609+ ; logo
610610+ ; rights
611611+ ; subtitle
612612+ ; title
613613+ ; updated }
614614+ : source )
615615+616616+(* atomSource = element atom:source { atomCommonAttributes, (atomAuthor* &
617617+ atomCategory* & atomContributor* & atomGenerator? & atomIcon? & atomId? &
618618+ atomLink* & atomLogo? & atomRights? & atomSubtitle? & atomTitle? &
619619+ atomUpdated? & extensionElement * ) } *)
620620+let source_of_xml =
621621+ let data_producer =
622622+ [ ("author", author_of_xml)
623623+ ; ("category", category_of_xml)
624624+ ; ("contributor", contributor_of_xml)
625625+ ; ("generator", generator_of_xml)
626626+ ; ("icon", icon_of_xml); ("id", id_of_xml); ("link", link_of_xml)
627627+ ; ("logo", logo_of_xml); ("rights", rights_of_xml)
628628+ ; ("subtitle", subtitle_of_xml)
629629+ ; ("title", title_of_xml)
630630+ ; ("updated", updated_of_xml) ]
631631+ in
632632+ generate_catcher ~namespaces ~data_producer make_source
633633+634634+let source_of_xml' =
635635+ let data_producer =
636636+ [ ("author", author_of_xml')
637637+ ; ("category", category_of_xml')
638638+ ; ("contributor", contributor_of_xml')
639639+ ; ("generator", generator_of_xml')
640640+ ; ("icon", icon_of_xml'); ("id", id_of_xml'); ("link", link_of_xml')
641641+ ; ("logo", logo_of_xml'); ("rights", rights_of_xml')
642642+ ; ("subtitle", subtitle_of_xml')
643643+ ; ("title", title_of_xml')
644644+ ; ("updated", updated_of_xml') ]
645645+ in
646646+ generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Source x)
647647+648648+type mime = string
649649+650650+type content =
651651+ | Text of string
652652+ | Html of Uri.t option * string
653653+ | Xhtml of Uri.t option * Syndic_xml.t list
654654+ | Mime of mime * string
655655+ | Src of mime option * Uri.t
656656+657657+[@@@warning "-34"]
658658+659659+type content' = [`Type of string | `SRC of string | `Data of Syndic_xml.t list]
660660+661661+(* atomInlineTextContent = element atom:content { atomCommonAttributes,
662662+ attribute type { "text" | "html" }?, (text)* }
663663+664664+ atomInlineXHTMLContent = element atom:content { atomCommonAttributes,
665665+ attribute type { "xhtml" }, xhtmlDiv }
666666+667667+ atomInlineOtherContent = element atom:content { atomCommonAttributes,
668668+ attribute type { atomMediaType }?, (text|anyElement)* }
669669+670670+ atomOutOfLineContent = element atom:content { atomCommonAttributes,
671671+ attribute type { atomMediaType }?, attribute src { atomUri }, empty }
672672+673673+ atomContent = atomInlineTextContent | atomInlineXHTMLContent |
674674+ atomInlineOtherContent | atomOutOfLineContent *)
675675+let content_of_xml ~xmlbase
676676+ ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) =
677677+ (* MIME ::= attribute type { "text" | "html" }? | attribute type { "xhtml" }
678678+ | attribute type { atomMediaType }? *)
679679+ (* attribute src { atomUri } | none If src s present, [data] MUST be empty. *)
680680+ match find (fun a -> attr_is a "src") attr with
681681+ | Some (_, src) ->
682682+ let mime =
683683+ match find (fun a -> attr_is a "type") attr with
684684+ | Some (_, ty) -> Some ty
685685+ | None -> None
686686+ in
687687+ `Content (Src (mime, XML.resolve ~xmlbase (Uri.of_string src)))
688688+ | None ->
689689+ (* (text)*
690690+ * | xhtmlDiv
691691+ * | (text|anyElement)*
692692+ * | none *)
693693+ `Content
694694+ ( match find (fun a -> attr_is a "type") attr with
695695+ | Some (_, "text") | None -> Text (get_leaf data)
696696+ | Some (_, "html") -> Html (xmlbase, get_html_content data)
697697+ | Some (_, "xhtml") -> Xhtml (xmlbase, get_xml_content data data)
698698+ | Some (_, mime) -> Mime (mime, get_leaf data) )
699699+700700+let content_of_xml' ~xmlbase:_
701701+ ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) =
702702+ let l =
703703+ match find (fun a -> attr_is a "src") attr with
704704+ | Some (_, src) -> [`SRC src]
705705+ | None -> []
706706+ in
707707+ let l =
708708+ match find (fun a -> attr_is a "type") attr with
709709+ | Some (_, ty) -> `Type ty :: l
710710+ | None -> l
711711+ in
712712+ `Content (`Data data :: l)
713713+714714+type summary = text_construct
715715+716716+(* atomSummary = element atom:summary { atomTextConstruct } *)
717717+let summary_of_xml ~xmlbase a = `Summary (text_construct_of_xml ~xmlbase a)
718718+719719+let summary_of_xml' ~xmlbase:_ ((_, (_, _), data) : XML.pos * XML.tag * t list)
720720+ =
721721+ `Summary data
722722+723723+type entry =
724724+ { authors: author * author list
725725+ ; categories: category list
726726+ ; content: content option
727727+ ; contributors: author list
728728+ ; id: id
729729+ ; links: link list
730730+ ; published: published option
731731+ ; rights: rights option
732732+ ; source: source option
733733+ ; summary: summary option
734734+ ; title: title
735735+ ; updated: updated }
736736+737737+let entry ?(categories = []) ?content ?(contributors = []) ?(links = [])
738738+ ?published ?rights ?source ?summary ~id ~authors ~title ~updated () =
739739+ { authors
740740+ ; categories
741741+ ; content
742742+ ; contributors
743743+ ; id
744744+ ; links
745745+ ; published
746746+ ; rights
747747+ ; source
748748+ ; summary
749749+ ; title
750750+ ; updated }
751751+752752+type entry' =
753753+ [ `Author of author
754754+ | `Category of category
755755+ | `Contributor of author
756756+ | `ID of id
757757+ | `Link of link
758758+ | `Published of published
759759+ | `Rights of rights
760760+ | `Source of source
761761+ | `Content of content
762762+ | `Summary of summary
763763+ | `Title of title
764764+ | `Updated of updated ]
765765+766766+module LinkOrder : Set.OrderedType with type t = string * string = struct
767767+ type t = string * string
768768+769769+ let compare (a : t) (b : t) =
770770+ match compare (fst a) (fst b) with 0 -> compare (snd a) (snd b) | n -> n
771771+end
772772+773773+module LinkSet = Set.Make (LinkOrder)
774774+775775+let uniq_link_alternate ~pos (l : link list) =
776776+ let string_of_duplicate_link {href; type_media; hreflang; _}
777777+ (type_media', hreflang') =
778778+ let ty = (function Some a -> a | None -> "(none)") type_media in
779779+ let hl = (function Some a -> a | None -> "(none)") hreflang in
780780+ let ty' = (function "" -> "(none)" | s -> s) type_media' in
781781+ let hl' = (function "" -> "(none)" | s -> s) hreflang' in
782782+ Printf.sprintf
783783+ "Duplicate link between <link href=\"%s\" hreflang=\"%s\" type=\"%s\" \
784784+ ..> and <link hreflang=\"%s\" type=\"%s\" ..>"
785785+ (Uri.to_string href) hl ty hl' ty'
786786+ in
787787+ let raise_error link link' =
788788+ raise (Error.Error (pos, string_of_duplicate_link link link'))
789789+ in
790790+ let rec aux acc = function
791791+ | [] -> l
792792+ | ({rel; type_media= Some ty; hreflang= Some hl; _} as x) :: r
793793+ when rel = Alternate ->
794794+ if LinkSet.mem (ty, hl) acc then
795795+ raise_error x (LinkSet.find (ty, hl) acc)
796796+ else aux (LinkSet.add (ty, hl) acc) r
797797+ | ({rel; type_media= None; hreflang= Some hl; _} as x) :: r
798798+ when rel = Alternate ->
799799+ if LinkSet.mem ("", hl) acc then
800800+ raise_error x (LinkSet.find ("", hl) acc)
801801+ else aux (LinkSet.add ("", hl) acc) r
802802+ | ({rel; type_media= Some ty; hreflang= None; _} as x) :: r
803803+ when rel = Alternate ->
804804+ if LinkSet.mem (ty, "") acc then
805805+ raise_error x (LinkSet.find (ty, "") acc)
806806+ else aux (LinkSet.add (ty, "") acc) r
807807+ | ({rel; type_media= None; hreflang= None; _} as x) :: r
808808+ when rel = Alternate ->
809809+ if LinkSet.mem ("", "") acc then
810810+ raise_error x (LinkSet.find ("", "") acc)
811811+ else aux (LinkSet.add ("", "") acc) r
812812+ | _ :: r -> aux acc r
813813+ in
814814+ aux LinkSet.empty l
815815+816816+type feed' =
817817+ [ `Author of author
818818+ | `Category of category
819819+ | `Contributor of author
820820+ | `Generator of generator
821821+ | `Icon of icon
822822+ | `ID of id
823823+ | `Link of link
824824+ | `Logo of logo
825825+ | `Rights of rights
826826+ | `Subtitle of subtitle
827827+ | `Title of title
828828+ | `Updated of updated
829829+ | `Entry of entry ]
830830+831831+let dummy_name = "\000"
832832+833833+let make_entry ~pos l =
834834+ let authors =
835835+ List.fold_left
836836+ (fun acc -> function `Author x -> x :: acc | _ -> acc)
837837+ [] l
838838+ in
839839+ (* atomSource? *)
840840+ let sources =
841841+ List.fold_left
842842+ (fun acc -> function `Source x -> x :: acc | _ -> acc)
843843+ [] l
844844+ in
845845+ let source =
846846+ match sources with
847847+ | [] -> None
848848+ | [s] -> Some s
849849+ | _ ->
850850+ (* RFC 4287 § 4.1.2 *)
851851+ let msg =
852852+ "<entry> elements MUST NOT contain more than one <source> element."
853853+ in
854854+ raise (Error.Error (pos, msg))
855855+ in
856856+ let authors =
857857+ match (authors, source) with
858858+ | a0 :: a, _ -> (a0, a)
859859+ | [], Some (s : source) -> (
860860+ (* If an atom:entry element does not contain atom:author elements, then
861861+ the atom:author elements of the contained atom:source element are
862862+ considered to apply. http://tools.ietf.org/html/rfc4287#section-4.2.1 *)
863863+ match s.authors with
864864+ | a0 :: a -> (a0, a)
865865+ | [] ->
866866+ let msg =
867867+ "<entry> does not contain an <author> and its <source> neither does"
868868+ in
869869+ raise (Error.Error (pos, msg)) )
870870+ | [], None -> ({name= dummy_name; uri= None; email= None}, [])
871871+ (* unacceptable value, see fix_author below *)
872872+ (* atomCategory* *)
873873+ in
874874+ let categories =
875875+ List.fold_left
876876+ (fun acc -> function `Category x -> x :: acc | _ -> acc)
877877+ [] l
878878+ (* atomContributor* *)
879879+ in
880880+ let contributors =
881881+ List.fold_left
882882+ (fun acc -> function `Contributor x -> x :: acc | _ -> acc)
883883+ [] l
884884+ in
885885+ (* atomId *)
886886+ let id =
887887+ match find (function `ID _ -> true | _ -> false) l with
888888+ | Some (`ID i) -> i
889889+ | _ ->
890890+ raise
891891+ (Error.Error
892892+ (pos, "<entry> elements MUST contains exactly one <id> elements"))
893893+ (* atomLink* *)
894894+ in
895895+ let links =
896896+ List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l
897897+ in
898898+ (* atomPublished? *)
899899+ let published =
900900+ match find (function `Published _ -> true | _ -> false) l with
901901+ | Some (`Published s) -> Some s
902902+ | _ -> None
903903+ in
904904+ (* atomRights? *)
905905+ let rights =
906906+ match find (function `Rights _ -> true | _ -> false) l with
907907+ | Some (`Rights r) -> Some r
908908+ | _ -> None
909909+ in
910910+ (* atomContent? *)
911911+ let content =
912912+ match find (function `Content _ -> true | _ -> false) l with
913913+ | Some (`Content c) -> Some c
914914+ | _ -> None
915915+ in
916916+ (* atomSummary? *)
917917+ let summary =
918918+ match find (function `Summary _ -> true | _ -> false) l with
919919+ | Some (`Summary s) -> Some s
920920+ | _ -> None
921921+ in
922922+ (* atomTitle *)
923923+ let title =
924924+ match find (function `Title _ -> true | _ -> false) l with
925925+ | Some (`Title t) -> t
926926+ | _ ->
927927+ raise
928928+ (Error.Error
929929+ ( pos
930930+ , "<entry> elements MUST contains exactly one <title> elements" ))
931931+ in
932932+ (* atomUpdated *)
933933+ let updated =
934934+ match find (function `Updated _ -> true | _ -> false) l with
935935+ | Some (`Updated u) -> u
936936+ | _ ->
937937+ raise
938938+ (Error.Error
939939+ ( pos
940940+ , "<entry> elements MUST contains exactly one <updated> elements"
941941+ ))
942942+ in
943943+ `Entry
944944+ ( pos
945945+ , ( { authors
946946+ ; categories
947947+ ; content
948948+ ; contributors
949949+ ; id
950950+ ; links= uniq_link_alternate ~pos links
951951+ ; published
952952+ ; rights
953953+ ; source
954954+ ; summary
955955+ ; title
956956+ ; updated }
957957+ : entry ) )
958958+959959+(* atomEntry = element atom:entry { atomCommonAttributes, (atomAuthor* &
960960+ atomCategory* & atomContent? & atomContributor* & atomId & atomLink* &
961961+ atomPublished? & atomRights? & atomSource? & atomSummary? & atomTitle &
962962+ atomUpdated & extensionElement * ) } *)
963963+let entry_of_xml =
964964+ let data_producer =
965965+ [ ("author", author_of_xml)
966966+ ; ("category", category_of_xml)
967967+ ; ("contributor", contributor_of_xml)
968968+ ; ("id", id_of_xml); ("link", link_of_xml)
969969+ ; ("published", published_of_xml)
970970+ ; ("rights", rights_of_xml); ("source", source_of_xml)
971971+ ; ("content", content_of_xml)
972972+ ; ("summary", summary_of_xml)
973973+ ; ("title", title_of_xml)
974974+ ; ("updated", updated_of_xml) ]
975975+ in
976976+ generate_catcher ~namespaces ~data_producer make_entry
977977+978978+let entry_of_xml' =
979979+ let data_producer =
980980+ [ ("author", author_of_xml')
981981+ ; ("category", category_of_xml')
982982+ ; ("contributor", contributor_of_xml')
983983+ ; ("id", id_of_xml'); ("link", link_of_xml')
984984+ ; ("published", published_of_xml')
985985+ ; ("rights", rights_of_xml'); ("source", source_of_xml')
986986+ ; ("content", content_of_xml')
987987+ ; ("summary", summary_of_xml')
988988+ ; ("title", title_of_xml')
989989+ ; ("updated", updated_of_xml') ]
990990+ in
991991+ generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Entry x)
992992+993993+type feed =
994994+ { authors: author list
995995+ ; categories: category list
996996+ ; contributors: author list
997997+ ; generator: generator option
998998+ ; icon: icon option
999999+ ; id: id
10001000+ ; links: link list
10011001+ ; logo: logo option
10021002+ ; rights: rights option
10031003+ ; subtitle: subtitle option
10041004+ ; title: title
10051005+ ; updated: updated
10061006+ ; entries: entry list }
10071007+10081008+let feed ?(authors = []) ?(categories = []) ?(contributors = []) ?generator
10091009+ ?icon ?(links = []) ?logo ?rights ?subtitle ~id ~title ~updated entries =
10101010+ { authors
10111011+ ; categories
10121012+ ; contributors
10131013+ ; generator
10141014+ ; icon
10151015+ ; id
10161016+ ; links
10171017+ ; logo
10181018+ ; rights
10191019+ ; subtitle
10201020+ ; title
10211021+ ; updated
10221022+ ; entries }
10231023+10241024+let make_feed ~pos (l : _ list) =
10251025+ (* atomAuthor* *)
10261026+ let authors =
10271027+ List.fold_left
10281028+ (fun acc -> function `Author x -> x :: acc | _ -> acc)
10291029+ [] l
10301030+ in
10311031+ (* atomCategory* *)
10321032+ let categories =
10331033+ List.fold_left
10341034+ (fun acc -> function `Category x -> x :: acc | _ -> acc)
10351035+ [] l
10361036+ in
10371037+ (* atomContributor* *)
10381038+ let contributors =
10391039+ List.fold_left
10401040+ (fun acc -> function `Contributor x -> x :: acc | _ -> acc)
10411041+ [] l
10421042+ in
10431043+ (* atomLink* *)
10441044+ let links =
10451045+ List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l
10461046+ in
10471047+ (* atomGenerator? *)
10481048+ let generator =
10491049+ match find (function `Generator _ -> true | _ -> false) l with
10501050+ | Some (`Generator g) -> Some g
10511051+ | _ -> None
10521052+ in
10531053+ (* atomIcon? *)
10541054+ let icon =
10551055+ match find (function `Icon _ -> true | _ -> false) l with
10561056+ | Some (`Icon i) -> Some i
10571057+ | _ -> None
10581058+ in
10591059+ (* atomId *)
10601060+ let id =
10611061+ match find (function `ID _ -> true | _ -> false) l with
10621062+ | Some (`ID i) -> i
10631063+ | _ ->
10641064+ raise
10651065+ (Error.Error
10661066+ (pos, "<feed> elements MUST contains exactly one <id> elements"))
10671067+ in
10681068+ (* atomLogo? *)
10691069+ let logo =
10701070+ match find (function `Logo _ -> true | _ -> false) l with
10711071+ | Some (`Logo l) -> Some l
10721072+ | _ -> None
10731073+ in
10741074+ (* atomRights? *)
10751075+ let rights =
10761076+ match find (function `Rights _ -> true | _ -> false) l with
10771077+ | Some (`Rights r) -> Some r
10781078+ | _ -> None
10791079+ in
10801080+ (* atomSubtitle? *)
10811081+ let subtitle =
10821082+ match find (function `Subtitle _ -> true | _ -> false) l with
10831083+ | Some (`Subtitle s) -> Some s
10841084+ | _ -> None
10851085+ in
10861086+ (* atomTitle *)
10871087+ let title =
10881088+ match find (function `Title _ -> true | _ -> false) l with
10891089+ | Some (`Title t) -> t
10901090+ | _ ->
10911091+ raise
10921092+ (Error.Error
10931093+ (pos, "<feed> elements MUST contains exactly one <title> elements"))
10941094+ in
10951095+ (* atomUpdated *)
10961096+ let updated =
10971097+ match find (function `Updated _ -> true | _ -> false) l with
10981098+ | Some (`Updated u) -> u
10991099+ | _ ->
11001100+ raise
11011101+ (Error.Error
11021102+ ( pos
11031103+ , "<feed> elements MUST contains exactly one <updated> elements"
11041104+ ))
11051105+ in
11061106+ (* atomEntry* *)
11071107+ let fix_author pos (e : entry) =
11081108+ match e.authors with
11091109+ | a, [] when a.name = dummy_name -> (
11101110+ (* In an Atom Feed Document, the atom:author elements of the containing
11111111+ atom:feed element are considered to apply to the entry if there are no
11121112+ atom:author elements in the locations described above.
11131113+ http://tools.ietf.org/html/rfc4287#section-4.2.1 *)
11141114+ match authors with
11151115+ | a0 :: a -> {e with authors= (a0, a)}
11161116+ | [] ->
11171117+ let msg =
11181118+ "<entry> elements MUST contains at least an <author> element or \
11191119+ <feed> element MUST contains one or more <author> elements"
11201120+ in
11211121+ raise (Error.Error (pos, msg)) )
11221122+ | _ -> e
11231123+ in
11241124+ let entries =
11251125+ List.fold_left
11261126+ (fun acc -> function `Entry (pos, e) -> fix_author pos e :: acc
11271127+ | _ -> acc )
11281128+ [] l
11291129+ in
11301130+ ( { authors
11311131+ ; categories
11321132+ ; contributors
11331133+ ; generator
11341134+ ; icon
11351135+ ; id
11361136+ ; links
11371137+ ; logo
11381138+ ; rights
11391139+ ; subtitle
11401140+ ; title
11411141+ ; updated
11421142+ ; entries }
11431143+ : feed )
11441144+11451145+(* atomFeed = element atom:feed { atomCommonAttributes, (atomAuthor* &
11461146+ atomCategory* & atomContributor* & atomGenerator? & atomIcon? & atomId &
11471147+ atomLink* & atomLogo? & atomRights? & atomSubtitle? & atomTitle &
11481148+ atomUpdated & extensionElement * ), atomEntry* } *)
11491149+11501150+let feed_of_xml =
11511151+ let data_producer =
11521152+ [ ("author", author_of_xml)
11531153+ ; ("category", category_of_xml)
11541154+ ; ("contributor", contributor_of_xml)
11551155+ ; ("generator", generator_of_xml)
11561156+ ; ("icon", icon_of_xml); ("id", id_of_xml); ("link", link_of_xml)
11571157+ ; ("logo", logo_of_xml); ("rights", rights_of_xml)
11581158+ ; ("subtitle", subtitle_of_xml)
11591159+ ; ("title", title_of_xml)
11601160+ ; ("updated", updated_of_xml)
11611161+ ; ("entry", entry_of_xml) ]
11621162+ in
11631163+ generate_catcher ~namespaces ~data_producer make_feed
11641164+11651165+let feed_of_xml' =
11661166+ let data_producer =
11671167+ [ ("author", author_of_xml')
11681168+ ; ("category", category_of_xml')
11691169+ ; ("contributor", contributor_of_xml')
11701170+ ; ("generator", generator_of_xml')
11711171+ ; ("icon", icon_of_xml'); ("id", id_of_xml'); ("link", link_of_xml')
11721172+ ; ("logo", logo_of_xml'); ("rights", rights_of_xml')
11731173+ ; ("subtitle", subtitle_of_xml')
11741174+ ; ("title", title_of_xml')
11751175+ ; ("updated", updated_of_xml')
11761176+ ; ("entry", entry_of_xml') ]
11771177+ in
11781178+ generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> x)
11791179+11801180+(* Remove all tags *)
11811181+let rec add_to_buffer buf = function
11821182+ | XML.Node (_, _, subs) -> List.iter (add_to_buffer buf) subs
11831183+ | XML.Data (_, d) -> Buffer.add_string buf d
11841184+11851185+let xhtml_to_string xhtml =
11861186+ let buf = Buffer.create 128 in
11871187+ List.iter (add_to_buffer buf) xhtml ;
11881188+ Buffer.contents buf
11891189+11901190+let string_of_text_construct = function
11911191+ (* FIXME: Once we use a proper HTML library, we probably would like to parse
11921192+ the HTML and remove the tags *)
11931193+ | (Text s : text_construct) | Html (_, s) -> s
11941194+ | Xhtml (_, x) -> xhtml_to_string x
11951195+11961196+let parse ?self ?xmlbase input =
11971197+ let feed =
11981198+ match XML.of_xmlm input |> snd with
11991199+ | XML.Node (pos, tag, datas) when tag_is tag "feed" ->
12001200+ feed_of_xml ~xmlbase (pos, tag, datas)
12011201+ | _ ->
12021202+ raise
12031203+ (Error.Error
12041204+ ((0, 0), "document MUST contains exactly one <feed> element"))
12051205+ in
12061206+ (* FIXME: the spec says that an entry can appear as the top-level element *)
12071207+ match self with
12081208+ | None -> feed
12091209+ | Some self ->
12101210+ if List.exists (fun l -> l.rel = Self) feed.links then feed
12111211+ else
12121212+ let links =
12131213+ { href= self
12141214+ ; rel= Self
12151215+ ; type_media= Some "application/atom+xml"
12161216+ ; hreflang= None
12171217+ ; title= string_of_text_construct feed.title
12181218+ ; length= None }
12191219+ :: feed.links
12201220+ in
12211221+ {feed with links}
12221222+12231223+let read ?self ?xmlbase fname =
12241224+ let fh = open_in fname in
12251225+ try
12261226+ let x = parse ?self ?xmlbase (XML.input_of_channel fh) in
12271227+ close_in fh ; x
12281228+ with e -> close_in fh ; raise e
12291229+12301230+let set_self_link feed ?hreflang ?length url =
12311231+ match List.partition (fun l -> l.rel = Self) feed.links with
12321232+ | l :: _, links ->
12331233+ let hreflang =
12341234+ match hreflang with None -> l.hreflang | Some _ -> hreflang
12351235+ in
12361236+ let length = match length with None -> l.length | Some _ -> length in
12371237+ let self = {l with href= url; hreflang; length} in
12381238+ {feed with links= self :: links}
12391239+ | [], links ->
12401240+ let links =
12411241+ { href= url
12421242+ ; rel= Self
12431243+ ; type_media= Some "application/atom+xml"
12441244+ ; hreflang
12451245+ ; title= string_of_text_construct feed.title
12461246+ ; length }
12471247+ :: links
12481248+ in
12491249+ {feed with links}
12501250+12511251+let get_self_link feed =
12521252+ try Some (List.find (fun l -> l.rel = Self) feed.links) with Not_found ->
12531253+ None
12541254+12551255+let unsafe ?xmlbase input =
12561256+ match XML.of_xmlm input |> snd with
12571257+ | XML.Node (pos, tag, datas) when tag_is tag "feed" ->
12581258+ `Feed (feed_of_xml' ~xmlbase (pos, tag, datas))
12591259+ | _ -> `Feed []
12601260+12611261+let remove_empty_authors a = List.filter not_empty_author a
12621262+12631263+(* [normalize_authors a authors] returns (a', authors') where [authors'] is
12641264+ [authors] where the empty authors and the author [a] have been removed and
12651265+ [a'] is [a] possibly completed with the information found for [a] in
12661266+ [authors]. *)
12671267+let rec normalize_authors (a : author) = function
12681268+ | [] -> (a, [])
12691269+ | a0 :: tl ->
12701270+ if not_empty_author a0 then
12711271+ if a0.name = a.name then
12721272+ (* Merge [a0] and [a]. *)
12731273+ let uri = match a.uri with None -> a0.uri | Some _ -> a.uri in
12741274+ let email =
12751275+ match a.email with None -> a0.email | Some _ -> a.email
12761276+ in
12771277+ normalize_authors {name= a.name; uri; email} tl
12781278+ else
12791279+ let a', authors' = normalize_authors a tl in
12801280+ (a', a0 :: authors')
12811281+ else normalize_authors a tl
12821282+12831283+(* drop the empty author *)
12841284+12851285+let set_main_author_entry author (e : entry) =
12861286+ (* If the entry has a source, then [author] should be ignored and the one
12871287+ from the [source] should be used instead. *)
12881288+ let author, author_ok, source =
12891289+ match e.source with
12901290+ | None -> (author, true, None)
12911291+ | Some s -> (
12921292+ let s_authors = remove_empty_authors s.authors in
12931293+ let s_contributors = remove_empty_authors s.contributors in
12941294+ let s =
12951295+ Some {s with authors= s_authors; contributors= s_contributors}
12961296+ in
12971297+ (* A source exists. If it contains no author, one should not change the
12981298+ entry authors with [author] because that may wrongly attribute the
12991299+ post. *)
13001300+ match s_authors with
13011301+ | [] -> (author, false, s)
13021302+ | s_author :: _ -> (s_author, true, s) )
13031303+ in
13041304+ let a0, a = e.authors in
13051305+ let authors =
13061306+ match remove_empty_authors (a0 :: a) with
13071307+ | a0 :: a -> (a0, a)
13081308+ | [] -> ((if author_ok then author else empty_author), [])
13091309+ in
13101310+ let contributors = remove_empty_authors e.contributors in
13111311+ {e with authors; contributors; source}
13121312+13131313+let set_main_author feed author =
13141314+ let author, authors = normalize_authors author feed.authors in
13151315+ let contributors = remove_empty_authors feed.contributors in
13161316+ let entries = List.map (set_main_author_entry author) feed.entries in
13171317+ {feed with authors= author :: authors; contributors; entries}
13181318+13191319+(* Conversion to XML *)
13201320+13211321+(* Tag with the Atom namespace *)
13221322+let atom name : XML.tag = ((atom_ns, name), [])
13231323+13241324+let add_attr_xmlbase ~xmlbase attrs =
13251325+ match xmlbase with
13261326+ | Some u -> ((Xmlm.ns_xml, "base"), Uri.to_string u) :: attrs
13271327+ | None -> attrs
13281328+13291329+let text_construct_to_xml tag_name (t : text_construct) =
13301330+ match t with
13311331+ | Text t ->
13321332+ XML.Node
13331333+ ( dummy_pos
13341334+ , ((atom_ns, tag_name), [(("", "type"), "text")])
13351335+ , [XML.Data (dummy_pos, t)] )
13361336+ | Html (xmlbase, t) ->
13371337+ let attr = add_attr_xmlbase ~xmlbase [(("", "type"), "html")] in
13381338+ XML.Node
13391339+ (dummy_pos, ((atom_ns, tag_name), attr), [XML.Data (dummy_pos, t)])
13401340+ | Xhtml (xmlbase, x) ->
13411341+ let div =
13421342+ XML.Node
13431343+ (dummy_pos, ((xhtml_ns, "div"), [(("", "xmlns"), xhtml_ns)]), x)
13441344+ in
13451345+ let attr = add_attr_xmlbase ~xmlbase [(("", "type"), "xhtml")] in
13461346+ XML.Node (dummy_pos, ((atom_ns, tag_name), attr), [div])
13471347+13481348+let person_to_xml name (a : author) =
13491349+ XML.Node
13501350+ ( dummy_pos
13511351+ , atom name
13521352+ , [node_data (atom "name") a.name]
13531353+ |> add_node_uri (atom "uri") a.uri
13541354+ |> add_node_data (atom "email") a.email )
13551355+13561356+let author_to_xml a = person_to_xml "author" a
13571357+let contributor_to_xml a = person_to_xml "contributor" a
13581358+13591359+let category_to_xml (c : category) =
13601360+ let attrs =
13611361+ [(("", "term"), c.term)]
13621362+ |> add_attr_uri ("", "scheme") c.scheme
13631363+ |> add_attr ("", "label") c.label
13641364+ in
13651365+ XML.Node (dummy_pos, ((atom_ns, "category"), attrs), [])
13661366+13671367+let generator_to_xml (g : generator) =
13681368+ let attr =
13691369+ [] |> add_attr ("", "version") g.version |> add_attr_uri ("", "uri") g.uri
13701370+ in
13711371+ XML.Node
13721372+ ( dummy_pos
13731373+ , ((atom_ns, "generator"), attr)
13741374+ , [XML.Data (dummy_pos, g.content)] )
13751375+13761376+let string_of_rel = function
13771377+ | Alternate -> "alternate"
13781378+ | Related -> "related"
13791379+ | Self -> "self"
13801380+ | Enclosure -> "enclosure"
13811381+ | Via -> "via"
13821382+ | Link l -> Uri.to_string l
13831383+13841384+let link_to_xml (l : link) =
13851385+ let attr =
13861386+ [(("", "href"), Uri.to_string l.href); (("", "rel"), string_of_rel l.rel)]
13871387+ |> add_attr ("", "type") l.type_media
13881388+ |> add_attr ("", "hreflang") l.hreflang
13891389+ in
13901390+ let attr = if l.title = "" then attr else (("", "title"), l.title) :: attr in
13911391+ let attr =
13921392+ match l.length with
13931393+ | Some len -> (("", "length"), string_of_int len) :: attr
13941394+ | None -> attr
13951395+ in
13961396+ XML.Node (dummy_pos, ((atom_ns, "link"), attr), [])
13971397+13981398+let add_node_date tag date nodes =
13991399+ match date with
14001400+ | None -> nodes
14011401+ | Some d -> node_data tag (Date.to_rfc3339 d) :: nodes
14021402+14031403+let source_to_xml (s : source) =
14041404+ let nodes =
14051405+ node_data (atom "id") (Uri.to_string s.id)
14061406+ :: text_construct_to_xml "title" s.title
14071407+ :: List.map author_to_xml s.authors
14081408+ |> add_nodes_rev_map category_to_xml s.categories
14091409+ |> add_nodes_rev_map contributor_to_xml s.contributors
14101410+ |> add_node_option generator_to_xml s.generator
14111411+ |> add_node_option (node_uri (atom "icon")) s.icon
14121412+ |> add_nodes_rev_map link_to_xml s.links
14131413+ |> add_node_option (node_uri (atom "logo")) s.logo
14141414+ |> add_node_option (text_construct_to_xml "rights") s.rights
14151415+ |> add_node_option (text_construct_to_xml "subtitle") s.subtitle
14161416+ |> add_node_date (atom "updated") s.updated
14171417+ in
14181418+ XML.Node (dummy_pos, atom "source", nodes)
14191419+14201420+let content_to_xml (c : content) =
14211421+ match c with
14221422+ | Text t ->
14231423+ XML.Node
14241424+ ( dummy_pos
14251425+ , ((atom_ns, "content"), [(("", "type"), "text")])
14261426+ , [XML.Data (dummy_pos, t)] )
14271427+ | Html (xmlbase, t) ->
14281428+ let attrs = add_attr_xmlbase ~xmlbase [(("", "type"), "html")] in
14291429+ XML.Node
14301430+ (dummy_pos, ((atom_ns, "content"), attrs), [XML.Data (dummy_pos, t)])
14311431+ | Xhtml (xmlbase, x) ->
14321432+ let div =
14331433+ XML.Node
14341434+ (dummy_pos, ((xhtml_ns, "div"), [(("", "xmlns"), xhtml_ns)]), x)
14351435+ in
14361436+ let attrs = add_attr_xmlbase ~xmlbase [(("", "type"), "xhtml")] in
14371437+ XML.Node (dummy_pos, ((atom_ns, "content"), attrs), [div])
14381438+ | Mime (mime, d) ->
14391439+ XML.Node
14401440+ ( dummy_pos
14411441+ , ((atom_ns, "content"), [(("", "type"), mime)])
14421442+ , [XML.Data (dummy_pos, d)] )
14431443+ | Src (mime, uri) ->
14441444+ let attr =
14451445+ [(("", "src"), Uri.to_string uri)] |> add_attr ("", "type") mime
14461446+ in
14471447+ XML.Node (dummy_pos, ((atom_ns, "content"), attr), [])
14481448+14491449+let entry_to_xml (e : entry) =
14501450+ let a0, a = e.authors in
14511451+ let nodes =
14521452+ node_data (atom "id") (Uri.to_string e.id)
14531453+ :: text_construct_to_xml "title" e.title
14541454+ :: node_data (atom "updated") (Date.to_rfc3339 e.updated)
14551455+ :: author_to_xml a0
14561456+ :: List.map author_to_xml a
14571457+ |> add_nodes_rev_map category_to_xml e.categories
14581458+ |> add_node_option content_to_xml e.content
14591459+ |> add_nodes_rev_map contributor_to_xml e.contributors
14601460+ |> add_nodes_rev_map link_to_xml e.links
14611461+ |> add_node_date (atom "published") e.published
14621462+ |> add_node_option (text_construct_to_xml "rights") e.rights
14631463+ |> add_node_option source_to_xml e.source
14641464+ |> add_node_option (text_construct_to_xml "summary") e.summary
14651465+ in
14661466+ XML.Node (dummy_pos, atom "entry", nodes)
14671467+14681468+let to_xml (f : feed) =
14691469+ let nodes =
14701470+ node_data (atom "id") (Uri.to_string f.id)
14711471+ :: text_construct_to_xml "title" f.title
14721472+ :: node_data (atom "updated") (Date.to_rfc3339 f.updated)
14731473+ :: List.map entry_to_xml f.entries
14741474+ |> add_nodes_rev_map author_to_xml (List.rev f.authors)
14751475+ |> add_nodes_rev_map category_to_xml f.categories
14761476+ |> add_nodes_rev_map contributor_to_xml f.contributors
14771477+ |> add_node_option generator_to_xml f.generator
14781478+ |> add_node_option (node_uri (atom "icon")) f.icon
14791479+ |> add_nodes_rev_map link_to_xml f.links
14801480+ |> add_node_option (node_uri (atom "logo")) f.logo
14811481+ |> add_node_option (text_construct_to_xml "rights") f.rights
14821482+ |> add_node_option (text_construct_to_xml "subtitle") f.subtitle
14831483+ in
14841484+ XML.Node (dummy_pos, ((atom_ns, "feed"), [(("", "xmlns"), atom_ns)]), nodes)
14851485+14861486+(* Atom and XHTML have been declared well in the above XML representation. One
14871487+ can remove them. *)
14881488+let output_ns_prefix s = if s = atom_ns || s = xhtml_ns then Some "" else None
14891489+14901490+let output feed dest =
14911491+ let o = XML.make_output dest ~ns_prefix:output_ns_prefix in
14921492+ XML.to_xmlm (to_xml feed) o
14931493+14941494+let write feed fname =
14951495+ let fh = open_out fname in
14961496+ try
14971497+ output feed (`Channel fh) ;
14981498+ close_out fh
14991499+ with e -> close_out fh ; raise e
15001500+15011501+(* Comparing entries *)
15021502+15031503+let entry_date e = match e.published with Some d -> d | None -> e.updated
15041504+15051505+let ascending (e1 : entry) (e2 : entry) =
15061506+ Date.compare (entry_date e1) (entry_date e2)
15071507+15081508+let descending (e1 : entry) (e2 : entry) =
15091509+ Date.compare (entry_date e2) (entry_date e1)
15101510+15111511+(* Feed aggregation *)
15121512+15131513+let syndic_generator =
15141514+ { version= Some Syndic_conf.version
15151515+ ; uri= Some Syndic_conf.homepage
15161516+ ; content= "OCaml Syndic.Atom feed aggregator" }
15171517+15181518+let ocaml_icon = Uri.of_string "http://ocaml.org/img/colour-icon-170x148.png"
15191519+let default_title : text_construct = Text "Syndic.Atom aggregated feed"
15201520+15211521+let[@warning "-32"] is_alternate_Atom (l : link) =
15221522+ match l.type_media with
15231523+ | None -> false
15241524+ | Some ty -> ty = "application/atom+xml" && l.rel = Alternate
15251525+15261526+let add_entries_of_feed entries feed : entry list =
15271527+ let source_of_feed =
15281528+ Some
15291529+ { authors= feed.authors
15301530+ ; categories= feed.categories
15311531+ ; contributors= feed.contributors
15321532+ ; generator= feed.generator
15331533+ ; icon= feed.icon
15341534+ ; id= feed.id
15351535+ ; links= feed.links
15361536+ ; logo= feed.logo
15371537+ ; rights= feed.rights
15381538+ ; subtitle= feed.subtitle
15391539+ ; title= feed.title
15401540+ ; updated= Some feed.updated }
15411541+ in
15421542+ let add_entry entries (e : entry) =
15431543+ match e.source with
15441544+ | Some _ -> e :: entries (* if a source is present, do not overwrite it. *)
15451545+ | None -> {e with source= source_of_feed} :: entries
15461546+ in
15471547+ List.fold_left add_entry entries feed.entries
15481548+15491549+let entries_of_feeds feeds = List.fold_left add_entries_of_feed [] feeds
15501550+15511551+let more_recent d1 (e : entry) =
15521552+ if Date.compare d1 e.updated >= 0 then d1 else e.updated
15531553+15541554+let aggregate ?self ?id ?updated ?subtitle ?(title = default_title)
15551555+ ?(sort = `Newest_first) ?n feeds : feed =
15561556+ let entries = entries_of_feeds feeds in
15571557+ let entries =
15581558+ match sort with
15591559+ | `Newest_first -> List.sort descending entries
15601560+ | `Oldest_first -> List.sort ascending entries
15611561+ | `None -> entries
15621562+ in
15631563+ let entries = match n with Some n -> take entries n | None -> entries in
15641564+ let id =
15651565+ match id with
15661566+ | Some id -> id
15671567+ | None ->
15681568+ (* Collect all ids of the entries and "digest" them. *)
15691569+ let b = Buffer.create 4096 in
15701570+ let add_id (e : entry) = Buffer.add_string b (Uri.to_string e.id) in
15711571+ List.iter add_id entries ;
15721572+ let d = Digest.to_hex (Digest.string (Buffer.contents b)) in
15731573+ (* FIXME: use urn:uuid *)
15741574+ Uri.of_string ("urn:md5:" ^ d)
15751575+ in
15761576+ let links =
15771577+ match self with
15781578+ | Some u ->
15791579+ [ link u
15801580+ ~title:(string_of_text_construct title)
15811581+ ~rel:Self ~type_media:"application/atom+xml" ]
15821582+ | None -> []
15831583+ in
15841584+ let updated =
15851585+ match updated with
15861586+ | Some d -> d
15871587+ | None -> (
15881588+ (* Use the more recent date of the entries. *)
15891589+ match entries with
15901590+ | [] -> Date.epoch
15911591+ | e0 :: el -> List.fold_left more_recent e0.updated el )
15921592+ in
15931593+ { authors= []
15941594+ ; categories= []
15951595+ ; contributors= []
15961596+ ; generator= Some syndic_generator
15971597+ ; icon= Some ocaml_icon
15981598+ ; id
15991599+ ; links
16001600+ ; logo= None
16011601+ ; rights= None
16021602+ ; subtitle
16031603+ ; title
16041604+ ; updated
16051605+ ; entries }
+597
stack/syndic/lib/syndic_atom.mli
···11+(** [Syndic.Atom]: {{: http://tools.ietf.org/html/rfc4287} RFC 4287} compliant
22+ Atom parser. *)
33+44+module Error : module type of Syndic_error
55+66+(** {2 Structure of Atom document} *)
77+88+(** A {{:http://tools.ietf.org/html/rfc4287#section-3.1}text construct}. It
99+ contains human-readable text, usually in small quantities. The content of
1010+ Text constructs is Language-Sensitive.
1111+1212+ Since the constructors [Text], [Html] or [Xhtml] are shadowed by those of
1313+ the same name in the definition of {!type:content}, you may need a type
1414+ annotation to disambiguate the two. *)
1515+type text_construct =
1616+ | Text of string (** [Text(content)] *)
1717+ | Html of Uri.t option * string
1818+ (** [Html(xmlbase, content)] where the content is left unparsed. *)
1919+ | Xhtml of Uri.t option * Syndic_xml.t list (** [Xhtml(xmlbase, content)] *)
2020+2121+(** Describes a person, corporation, or similar entity (hereafter, 'person')
2222+ that indicates the author of the entry or feed. {{:
2323+ http://tools.ietf.org/html/rfc4287#section-3.2} See RFC 4287 § 3.2}.
2424+ Person constructs allow extension Metadata elements (see {{:
2525+ http://tools.ietf.org/html/rfc4287#section-6.4}Section 6.4}).
2626+2727+ They are used for authors
2828+ ({{:http://tools.ietf.org/html/rfc4287#section-4.2.1} See RFC 4287
2929+ § 4.2.1}) and contributors
3030+ ({{:http://tools.ietf.org/html/rfc4287#section-4.2.3} See RFC 4287
3131+ § 4.2.3}) *)
3232+type author = {name: string; uri: Uri.t option; email: string option}
3333+3434+val author : ?uri:Uri.t -> ?email:string -> string -> author
3535+3636+(** The [category] element conveys information about a category associated with
3737+ an entry or feed. This specification assigns no meaning to the content (if
3838+ any) of this element. {{:http://tools.ietf.org/html/rfc4287#section-4.2.2}
3939+ See RFC 4287 § 4.2.2}.
4040+4141+ - [term] is a string that identifies the category to which the entry or
4242+ feed belongs. {{: http://tools.ietf.org/html/rfc4287#section-4.2.2.2} See
4343+ RFC 4287 § 4.2.2.2} - [scheme], if present, is an IRI that identifies a
4444+ categorization scheme. {{:
4545+ http://tools.ietf.org/html/rfc4287#section-4.2.2.3} See RFC 4287 §
4646+ 4.2.2.3} - [label], if present, is a human-readable label for display in
4747+ end-user applications. The content of the "label" attribute is
4848+ Language-Sensitive. {{: http://tools.ietf.org/html/rfc4287#section-4.2.2.1}
4949+ See RFC 4287 § 4.2.2.1} *)
5050+type category = {term: string; scheme: Uri.t option; label: string option}
5151+5252+val category : ?scheme:Uri.t -> ?label:string -> string -> category
5353+5454+(** The [generator] element's content identifies the agent used to generate a
5555+ feed, for debugging and other purposes. - [content] is a human-readable
5656+ name for the generating agent. - [uri], if present, SHOULD produce a
5757+ representation that is relevant to that agent. - [version], if present,
5858+ indicates the version of the generating agent.
5959+6060+ See {{: http://tools.ietf.org/html/rfc4287#section-4.2.4}RFC 4287 §
6161+ 4.2.4}. *)
6262+type generator = {version: string option; uri: Uri.t option; content: string}
6363+6464+val generator : ?uri:Uri.t -> ?version:string -> string -> generator
6565+6666+(** The [icon] element's content is an IRI reference [RFC3987] that identifies
6767+ an image that provides iconic visual identification for a feed.
6868+6969+ The image SHOULD have an aspect ratio of one (horizontal) to one (vertical)
7070+ and SHOULD be suitable for presentation at a small size.
7171+7272+ {{:http://tools.ietf.org/html/rfc4287#section-4.2.5} See RFC 4287 § 4.2.5} *)
7373+type icon = Uri.t
7474+7575+(** The [id] element conveys a permanent, universally unique identifier for an
7676+ entry or feed.
7777+7878+ Its content MUST be an IRI, as defined by [RFC3987]. Note that the
7979+ definition of "IRI" excludes relative references. Though the IRI might use
8080+ a dereferencable scheme, Atom Processors MUST NOT assume it can be
8181+ dereferenced.
8282+8383+ There is more information in the RFC but they are not necessary here, at
8484+ least, they can not be checked here.
8585+8686+ {{: http://tools.ietf.org/html/rfc4287#section-4.2.6} See RFC 4287 § 4.2.6
8787+ } *)
8888+type id = Uri.t
8989+9090+(** Indicates the link relation type. See {{:
9191+ http://tools.ietf.org/html/rfc4287#section-4.2.7.2} RFC 4287 § 4.2.7.2}. *)
9292+type rel =
9393+ | Alternate
9494+ (** Signifies that the URI in the value of the link [href] field
9595+ identifies an alternate version of the resource described by the
9696+ containing element. *)
9797+ | Related
9898+ (** Signifies that the URI in the value of the link [href] field
9999+ identifies a resource related to the resource described by the
100100+ containing element. *)
101101+ | Self
102102+ (** Signifies that the URI in the value of the link [href] field
103103+ identifies a resource equivalent to the containing element. *)
104104+ | Enclosure
105105+ (** Signifies that the IRI in the value of the link [href] field
106106+ identifies a related resource that is potentially large in size and
107107+ might require special handling. When [Enclosure] is specified, the
108108+ length attribute SHOULD be provided. *)
109109+ | Via
110110+ (** Signifies that the IRI in the value of the link [href] field
111111+ identifies a resource that is the source of the information provided
112112+ in the containing element. *)
113113+ | Link of Uri.t
114114+ (** The URI MUST be non-empty and match either the "isegment-nz-nc" or
115115+ the "IRI" production in {{:http://tools.ietf.org/html/rfc3987}
116116+ RFC3987}. Note that use of a relative reference other than a simple
117117+ name is not allowed. *)
118118+119119+(** [link] defines a reference from an entry or feed to a Web resource. See {{:
120120+ http://tools.ietf.org/html/rfc4287#section-4.2.7} RFC 4287 § 4.2.7}.
121121+122122+ - [href] contains the link's IRI. The value MUST be a IRI reference,
123123+ {{:http://tools.ietf.org/html/rfc3987} RFC3987}. See {{:
124124+ http://tools.ietf.org/html/rfc4287#section-4.2.7.1} RFC 4287 § 4.2.7.1}. -
125125+ [type_media] is an advisory media type: it is a hint about the type of the
126126+ representation that is expected to be returned when the value of the href
127127+ attribute is dereferenced. Note that the type attribute does not override
128128+ the actual media type returned with the representation. The value of
129129+ [type_media], if given, MUST conform to the syntax of a MIME media type,
130130+ {{:http://tools.ietf.org/html/rfc4287#ref-MIMEREG} MIMEREG}. See {{:
131131+ http://tools.ietf.org/html/rfc4287#section-4.2.7.3} RFC 4287 § 4.2.7.3}. -
132132+ [hreflang] describes the language of the resource pointed to by the href
133133+ attribute. When used together with the [rel=Alternate], it implies a
134134+ translated version of the entry. The value of [hreflang] MUST be a language
135135+ tag, {{:http://tools.ietf.org/html/rfc3066} RFC3066}. See {{:
136136+ http://tools.ietf.org/html/rfc4287#section-4.2.7.4} RFC 4287 § 4.2.7.4}. -
137137+ [title] conveys human-readable information about the link. The content of
138138+ the "title" attribute is Language-Sensitive. The value [""] means that no
139139+ title is provided. See {{:
140140+ http://tools.ietf.org/html/rfc4287#section-4.2.7.5} RFC 4287 § 4.2.7.5}. -
141141+ [length] indicates an advisory length of the linked content in octets; it
142142+ is a hint about the content length of the representation returned when the
143143+ IRI in the href attribute is mapped to a URI and dereferenced. Note that
144144+ the length attribute does not override the actual content length of the
145145+ representation as reported by the underlying protocol. See {{:
146146+ http://tools.ietf.org/html/rfc4287#section-4.2.7.6} RFC 4287 § 4.2.7.6}. *)
147147+type link =
148148+ { href: Uri.t
149149+ ; rel: rel
150150+ ; type_media: string option
151151+ ; hreflang: string option
152152+ ; title: string
153153+ ; length: int option }
154154+155155+val link :
156156+ ?type_media:string
157157+ -> ?hreflang:string
158158+ -> ?title:string
159159+ -> ?length:int
160160+ -> ?rel:rel
161161+ -> Uri.t
162162+ -> link
163163+(** [link uri] creates a link element.
164164+165165+ @param rel The [rel] attribute of the link. It defaults to [Alternate]
166166+ since {{:http://tools.ietf.org/html/rfc4287#section-4.2.7.2} RFC 4287 §
167167+ 4.2.7.2} says that {i if the "rel" attribute is not present, the link
168168+ element MUST be interpreted as if the link relation type is "alternate".}
169169+170170+ The other optional arguments all default to [None] (i.e., not specified). *)
171171+172172+(** [logo] is an IRI reference [RFC3987] that identifies an image that provides
173173+ visual identification for a feed.
174174+175175+ The image SHOULD have an aspect ratio of 2 (horizontal) to 1 (vertical).
176176+177177+ {{: http://tools.ietf.org/html/rfc4287#section-4.2.8} See RFC 4287 §
178178+ 4.2.8} *)
179179+type logo = Uri.t
180180+181181+(** [published] is a Date construct indicating an instant in time associated
182182+ with an event early in the life cycle of the entry.
183183+184184+ Typically, [published] will be associated with the initial creation or
185185+ first availability of the resource.
186186+187187+ {{: http://tools.ietf.org/html/rfc4287#section-4.2.9} See RFC 4287 §
188188+ 4.2.9} *)
189189+type published = Syndic_date.t
190190+191191+(** [rights] is a Text construct that conveys information about rights held in
192192+ and over an entry or feed. The [rights] element SHOULD NOT be used to
193193+ convey machine-readable licensing information.
194194+195195+ If an atom:entry element does not contain an atom:rights element, then the
196196+ atom:rights element of the containing atom:feed element, if present, is
197197+ considered to apply to the entry.
198198+199199+ See {{: http://tools.ietf.org/html/rfc4287#section-4.2.10} RFC 4287 §
200200+ 4.2.10 } *)
201201+type rights = text_construct
202202+203203+(** [title] is a Text construct that conveys a human-readable title for an
204204+ entry or feed. {{: http://tools.ietf.org/html/rfc4287#section-4.2.14} See
205205+ RFC 4287 § 4.2.14 } *)
206206+type title = text_construct
207207+208208+(** [subtitle] is a Text construct that conveys a human-readable description or
209209+ subtitle for a feed. {{: http://tools.ietf.org/html/rfc4287#section-4.2.12}
210210+ See RFC 4287 § 4.2.12 } *)
211211+type subtitle = text_construct
212212+213213+(** [updated] is a Date construct indicating the most recent instant in time
214214+ when an entry or feed was modified in a way the publisher considers
215215+ significant. Therefore, not all modifications necessarily result in a
216216+ changed [updated] value.
217217+218218+ Publishers MAY change the value of this element over time.
219219+220220+ {{: http://tools.ietf.org/html/rfc4287#section-4.2.15} See RFC 4287 §
221221+ 4.2.15 } *)
222222+type updated = Syndic_date.t
223223+224224+(** If an {!entry} is copied from one feed into another feed, then the source
225225+ {!feed}'s metadata (all child elements of atom:feed other than the
226226+ atom:entry elements) MAY be preserved within the copied entry by adding an
227227+ atom:source child element, if it is not already present in the entry, and
228228+ including some or all of the source feed's Metadata elements as the
229229+ atom:source element's children. Such metadata SHOULD be preserved if the
230230+ source atom:feed contains any of the child elements atom:author,
231231+ atom:contributor, atom:rights, or atom:category and those child elements
232232+ are not present in the source atom:entry.
233233+234234+ {{: http://tools.ietf.org/html/rfc4287#section-4.2.11} See RFC 4287 §
235235+ 4.2.11 }
236236+237237+ The atom:source element is designed to allow the aggregation of entries
238238+ from different feeds while retaining information about an entry's source
239239+ feed. For this reason, Atom Processors that are performing such aggregation
240240+ SHOULD include at least the required feed-level Metadata fields ([id],
241241+ [title], and [updated]) in the [source] element.
242242+243243+ {{: http://tools.ietf.org/html/rfc4287#section-4.1.2} See RFC 4287 § 4.1.2
244244+ for more details.} *)
245245+type source =
246246+ { authors: author list
247247+ ; categories: category list
248248+ ; contributors: author list
249249+ (** {{: http://tools.ietf.org/html/rfc4287#section-4.2.3} See RFC 4287
250250+ § 4.2.3 } *)
251251+ ; generator: generator option
252252+ ; icon: icon option
253253+ ; id: id
254254+ ; links: link list
255255+ ; logo: logo option
256256+ ; rights: rights option
257257+ ; subtitle: subtitle option
258258+ ; title: title
259259+ ; updated: updated option }
260260+261261+val source :
262262+ ?categories:category list
263263+ -> ?contributors:author list
264264+ -> ?generator:generator
265265+ -> ?icon:icon
266266+ -> ?links:link list
267267+ -> ?logo:logo
268268+ -> ?rights:rights
269269+ -> ?subtitle:subtitle
270270+ -> ?updated:updated
271271+ -> authors:author list
272272+ -> id:id
273273+ -> title
274274+ -> source
275275+276276+(** A MIME type that conform to the syntax of a MIME media type, but MUST NOT
277277+ be a composite type (see Section 4.2.6 of [MIMEREG]).
278278+279279+ {{: http://tools.ietf.org/html/rfc4287#section-4.1.3.1} See RFC 4287 §
280280+ 4.1.3.1 } *)
281281+type mime = string
282282+283283+(** [content] either contains or links to the content of the entry. The value
284284+ of [content] is Language-Sensitive. {{:
285285+ http://tools.ietf.org/html/rfc4287#section-4.1.3} See RFC 4287 § 4.1.3}
286286+287287+ - [Text], [Html], [Xhtml] or [Mime] means that the content was part of the
288288+ document and is provided as an argument. The first argument to [Html] and
289289+ [Xhtml] is the possible xml:base value.
290290+ {{:http://tools.ietf.org/html/rfc4287#section-3.1.1} See RFC 4287 § 3.1.1}
291291+ - [Src(m, iri)] means that the content is to be found at [iri] and has MIME
292292+ type [m]. Atom Processors MAY use the IRI to retrieve the content and MAY
293293+ choose to ignore remote content or to present it in a different manner than
294294+ local content. The value of [m] is advisory; that is to say, when the
295295+ corresponding URI (mapped from an IRI, if necessary) is dereferenced, if
296296+ the server providing that content also provides a media type, the
297297+ server-provided media type is authoritative. See {{:
298298+ http://tools.ietf.org/html/rfc4287#section-4.1.3.2} RFC 4287 § 4.1.3.2} *)
299299+type content =
300300+ | Text of string
301301+ | Html of Uri.t option * string
302302+ | Xhtml of Uri.t option * Syndic_xml.t list
303303+ | Mime of mime * string
304304+ | Src of mime option * Uri.t
305305+306306+(** [summary] is a Text construct that conveys a short summary, abstract, or
307307+ excerpt of an entry.
308308+309309+ It is not advisable for [summary] to duplicate {!title} or {!content}
310310+ because Atom Processors might assume there is a useful summary when there
311311+ is none.
312312+313313+ {{: http://tools.ietf.org/html/rfc4287#section-4.2.13} See RFC 4287 §
314314+ 4.2.13 } *)
315315+type summary = text_construct
316316+317317+(** [entry] represents an individual entry, acting as a container for metadata
318318+ and data associated with the entry. This element can appear as a child of
319319+ the atom:feed element, or it can appear as the document (i.e., top-level)
320320+ element of a stand-alone Atom Entry Document.
321321+322322+ The specification mandates that each entry contains an author unless it
323323+ contains some sources or the feed contains an author element. This library
324324+ ensures that the authors are properly dispatched to all locations.
325325+326326+ The following child elements are defined by this specification (note that
327327+ it requires the presence of some of these elements):
328328+329329+ - if [content = None], then [links] MUST contain at least one element with
330330+ a rel attribute value of [Alternate]. - There MUST NOT be more than one
331331+ element of [links] with a rel attribute value of [Alternate] that has the
332332+ same combination of type and hreflang attribute values. - There MAY be
333333+ additional elements of [links] beyond those described above. - There MUST
334334+ be an [summary] in either of the following cases: {ul {- the atom:entry
335335+ contains an atom:content that has a "src" attribute (and is thus empty).}
336336+ {- the atom:entry contains content that is encoded in Base64; i.e., the
337337+ "type" attribute of atom:content is a MIME media type [MIMEREG], but is not
338338+ an XML media type [RFC3023], does not begin with "text/", and does not end
339339+ with "/xml" or "+xml".}}
340340+341341+ {{: http://tools.ietf.org/html/rfc4287#section-4.1.2} See RFC 4287 §
342342+ 4.1.2} *)
343343+type entry =
344344+ { authors: author * author list
345345+ ; categories: category list
346346+ ; content: content option
347347+ ; contributors: author list
348348+ ; id: id
349349+ ; links: link list
350350+ ; published: published option
351351+ ; rights: rights option
352352+ ; source: source option
353353+ ; summary: summary option
354354+ ; title: title
355355+ ; updated: updated }
356356+357357+val entry :
358358+ ?categories:category list
359359+ -> ?content:content
360360+ -> ?contributors:author list
361361+ -> ?links:link list
362362+ -> ?published:published
363363+ -> ?rights:rights
364364+ -> ?source:source
365365+ -> ?summary:summary
366366+ -> id:id
367367+ -> authors:author * author list
368368+ -> title:title
369369+ -> updated:updated
370370+ -> unit
371371+ -> entry
372372+373373+(** [feed] is the document (i.e., top-level) element of an Atom Feed Document,
374374+ acting as a container for metadata and data associated with the feed. Its
375375+ element children consist of metadata elements followed by zero or more
376376+ atom:entry child elements.
377377+378378+ - one of the [links] SHOULD have a [rel] attribute value of [Self]. This is
379379+ the preferred URI for retrieving Atom Feed Documents representing this Atom
380380+ feed. - There MUST NOT be more than one element of [links] with a rel
381381+ attribute value of [Alternate] that has the same combination of type and
382382+ hreflang attribute values. - There may be additional elements in [links]
383383+ beyond those described above.
384384+385385+ If multiple {!entry} elements with the same {!id} value appear in an Atom
386386+ Feed Document, they represent the same entry. Their {!updated} timestamps
387387+ SHOULD be different. If an Atom Feed Document contains multiple entries
388388+ with the same {!id}, Atom Processors MAY choose to display all of them or
389389+ some subset of them. One typical behavior would be to display only the
390390+ entry with the latest {!updated} timestamp.
391391+392392+ {{: http://tools.ietf.org/html/rfc4287#section-4.1.1} See RFC 4287 §
393393+ 4.1.1} *)
394394+type feed =
395395+ { authors: author list
396396+ ; categories: category list
397397+ ; contributors: author list
398398+ ; generator: generator option
399399+ ; icon: icon option
400400+ ; id: id
401401+ ; links: link list
402402+ ; logo: logo option
403403+ ; rights: rights option
404404+ ; subtitle: subtitle option
405405+ ; title: title
406406+ ; updated: updated
407407+ ; entries: entry list }
408408+409409+val feed :
410410+ ?authors:author list
411411+ -> ?categories:category list
412412+ -> ?contributors:author list
413413+ -> ?generator:generator
414414+ -> ?icon:icon
415415+ -> ?links:link list
416416+ -> ?logo:logo
417417+ -> ?rights:rights
418418+ -> ?subtitle:subtitle
419419+ -> id:id
420420+ -> title:title
421421+ -> updated:updated
422422+ -> entry list
423423+ -> feed
424424+425425+(** {2 Input and output} *)
426426+427427+val parse : ?self:Uri.t -> ?xmlbase:Uri.t -> Xmlm.input -> feed
428428+(** [parse xml] returns the feed corresponding to [xml]. Beware that [xml] is
429429+ mutable, so when the parsing fails, one has to create a new copy of [xml]
430430+ to use it with another function. If you retrieve [xml] from a URL, you
431431+ should use that URL as [~xmlbase].
432432+433433+ Raise [Error.Expected], [Expected_Data] or [Error.Duplicate_Link] if [xml]
434434+ is not a valid Atom document.
435435+436436+ @param xmlbase default xml:base to resolve relative URLs (of course
437437+ xml:base attributes in the XML Atom document take precedence over this).
438438+ See {{:http://www.w3.org/TR/xmlbase/}XML Base}.
439439+440440+ @param self the URI from where the current feed was retrieved. Giving this
441441+ information will add an entry to [links] with [rel = Self] unless one
442442+ already exists. *)
443443+444444+val read : ?self:Uri.t -> ?xmlbase:Uri.t -> string -> feed
445445+(** [read fname] reads the file name [fname] and parses it. For the optional
446446+ parameters, see {!parse}. *)
447447+448448+val to_xml : feed -> Syndic_xml.t
449449+(** [to_xml f] converts the feed [f] to an XML tree. *)
450450+451451+val output : feed -> Xmlm.dest -> unit
452452+(** [output f dest] writes the XML tree of the feed [f] to [dest]. *)
453453+454454+val write : feed -> string -> unit
455455+(** [write f fname] writes the XML tree of the feed [f] to the file named
456456+ [fname]. *)
457457+458458+(** {2 Convenience functions} *)
459459+460460+val ascending : entry -> entry -> int
461461+(** Compare entries so that older dates are smaller. The date of the entry is
462462+ taken from the [published] field, if available, or otherwise [updated] is
463463+ used. *)
464464+465465+val descending : entry -> entry -> int
466466+(** Compare entries so that more recent dates are smaller. The date of the
467467+ entry is taken from the [published] field, if available, or otherwise
468468+ [updated] is used. *)
469469+470470+val aggregate :
471471+ ?self:Uri.t
472472+ -> ?id:id
473473+ -> ?updated:updated
474474+ -> ?subtitle:subtitle
475475+ -> ?title:text_construct
476476+ -> ?sort:[`Newest_first | `Oldest_first | `None]
477477+ -> ?n:int
478478+ -> feed list
479479+ -> feed
480480+(** [aggregate feeds] returns a single feed containing all the posts in
481481+ [feeds]. In order to track the origin of each post in the aggrated feed, it
482482+ is recommended that each feed in [feeds] possesses a link with
483483+ [rel = Self] so that the [source] added to each entry contains a link to
484484+ the original feed. If an entry contains a [source], il will {i not} be
485485+ overwritten.
486486+487487+ @param self The preferred URI for retrieving this aggregayed Atom Feed.
488488+ While not mandatory, it is good practice to set this.
489489+490490+ @param id the universally unique identifier for the aggregated feed. If it
491491+ is not provided a URN is built from the [feeds] IDs. @param sort whether to
492492+ sort the entries of the final feed. The default is [`Newest_first] because
493493+ it is generally desired. @param n number of entries of the (sorted)
494494+ aggregated feed to return. *)
495495+496496+val set_self_link : feed -> ?hreflang:string -> ?length:int -> Uri.t -> feed
497497+(** [set_self feed url] add or replace the URI in the self link of the feed.
498498+ You can also set the [hreflang] and [length] of the self link. *)
499499+500500+val get_self_link : feed -> link option
501501+(** [get_self feed] return the self link of the feed, if any is present. *)
502502+503503+val set_main_author : feed -> author -> feed
504504+(** [set_main_author feed author] will add [author] in front of the list of
505505+ authors of the [feed] (if an author with the same name already exists, the
506506+ optional information are merged, the ones in [author] taking precedence).
507507+ Also remove all empty authors (name = "" and no URI, no email) and replace
508508+ them with [author] if no author is left and an authors is mandatory. *)
509509+510510+(**/**)
511511+512512+(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not [None],
513513+ gives the base URI against which [uri] must be resolved if it is relative. *)
514514+type uri = Uri.t option * string
515515+516516+type person = [`Email of string | `Name of string | `URI of uri] list
517517+518518+val unsafe :
519519+ ?xmlbase:Uri.t
520520+ -> Xmlm.input
521521+ -> [> `Feed of [> `Author of person
522522+ | `Category of [> `Label of string
523523+ | `Scheme of string
524524+ | `Term of string ]
525525+ list
526526+ | `Contributor of person
527527+ | `Entry of [> `Author of person
528528+ | `Category of [> `Label of string
529529+ | `Scheme of string
530530+ | `Term of string ]
531531+ list
532532+ | `Content of [> `Data of Syndic_xml.t list
533533+ | `SRC of string
534534+ | `Type of string ]
535535+ list
536536+ | `Contributor of person
537537+ | `ID of string list
538538+ | `Link of [> `HREF of string
539539+ | `HREFLang of string
540540+ | `Length of string
541541+ | `Rel of string
542542+ | `Title of string
543543+ | `Type of string ]
544544+ list
545545+ | `Published of [> `Date of string] list
546546+ | `Rights of Syndic_xml.t list
547547+ | `Source of [> `Author of person
548548+ | `Category of [> `Label of string
549549+ | `Scheme of string
550550+ | `Term of string ]
551551+ list
552552+ | `Contributor of person
553553+ | `Generator of [> `Content of string
554554+ | `URI of uri
555555+ | `Version of string
556556+ ]
557557+ list
558558+ | `ID of string list
559559+ | `Icon of [> `URI of uri] list
560560+ | `Link of [> `HREF of string
561561+ | `HREFLang of string
562562+ | `Length of string
563563+ | `Rel of string
564564+ | `Title of string
565565+ | `Type of string ]
566566+ list
567567+ | `Logo of [> `URI of uri] list
568568+ | `Rights of Syndic_xml.t list
569569+ | `Subtitle of Syndic_xml.t list
570570+ | `Title of Syndic_xml.t list
571571+ | `Updated of [> `Date of string]
572572+ list ]
573573+ list
574574+ | `Summary of Syndic_xml.t list
575575+ | `Title of Syndic_xml.t list
576576+ | `Updated of [> `Date of string] list ]
577577+ list
578578+ | `Generator of [> `Content of string
579579+ | `URI of uri
580580+ | `Version of string ]
581581+ list
582582+ | `ID of string list
583583+ | `Icon of [> `URI of uri] list
584584+ | `Link of [> `HREF of string
585585+ | `HREFLang of string
586586+ | `Length of string
587587+ | `Rel of string
588588+ | `Title of string
589589+ | `Type of string ]
590590+ list
591591+ | `Logo of [> `URI of uri] list
592592+ | `Rights of Syndic_xml.t list
593593+ | `Subtitle of Syndic_xml.t list
594594+ | `Title of Syndic_xml.t list
595595+ | `Updated of [> `Date of string] list ]
596596+ list ]
597597+(** Analysis without verification, enjoy ! *)
+152
stack/syndic/lib/syndic_common.ml
···11+(* XML *)
22+33+module XML = struct
44+ include Syndic_xml
55+66+ type node = pos * tag * t list
77+88+ let xmlbase_tag = (Xmlm.ns_xml, "base")
99+1010+ let xmlbase_of_attr ~xmlbase attr =
1111+ try
1212+ let new_base = List.assoc xmlbase_tag attr in
1313+ Some (Syndic_xml.resolve ~xmlbase (Uri.of_string new_base))
1414+ with Not_found -> xmlbase
1515+1616+ let generate_catcher ?(namespaces = [""]) ?(attr_producer = [])
1717+ ?(data_producer = []) ?leaf_producer maker =
1818+ let in_namespaces ((prefix, _), _) = List.mem prefix namespaces in
1919+ let get_attr_name (((_prefix, name), _) : Xmlm.attribute) = name in
2020+ let get_attr_value ((_, value) : Xmlm.attribute) = value in
2121+ let get_tag_name (((_prefix, name), _) : tag) = name in
2222+ let get_attrs ((_, attrs) : tag) = attrs in
2323+ let get_producer name map =
2424+ try Some (List.assoc name map) with _ -> None
2525+ in
2626+ let rec catch_attr ~xmlbase acc pos = function
2727+ | attr :: r -> (
2828+ match get_producer (get_attr_name attr) attr_producer with
2929+ | Some f when in_namespaces attr ->
3030+ let acc = f ~xmlbase (get_attr_value attr) :: acc in
3131+ catch_attr ~xmlbase acc pos r
3232+ | _ -> catch_attr ~xmlbase acc pos r )
3333+ | [] -> acc
3434+ in
3535+ let rec catch_datas ~xmlbase acc = function
3636+ | Node (pos, tag, datas) :: r -> (
3737+ match get_producer (get_tag_name tag) data_producer with
3838+ | Some f when in_namespaces tag ->
3939+ let acc = f ~xmlbase (pos, tag, datas) :: acc in
4040+ catch_datas ~xmlbase acc r
4141+ | _ -> catch_datas ~xmlbase acc r )
4242+ | Data (pos, str) :: r -> (
4343+ match leaf_producer with
4444+ | Some f -> catch_datas ~xmlbase (f ~xmlbase pos str :: acc) r
4545+ | None -> catch_datas ~xmlbase acc r )
4646+ | [] -> acc
4747+ in
4848+ let generate ~xmlbase ((pos, tag, datas) : node) =
4949+ (* The spec says that "The base URI for a URI reference appearing in any
5050+ other attribute value, including default attribute values, is the base
5151+ URI of the element bearing the attribute" so get xml:base first. *)
5252+ let xmlbase = xmlbase_of_attr ~xmlbase (get_attrs tag) in
5353+ let acc = catch_attr ~xmlbase [] pos (get_attrs tag) in
5454+ maker ~pos (catch_datas ~xmlbase acc datas)
5555+ in
5656+ generate
5757+5858+ let dummy_of_xml ~ctor =
5959+ let leaf_producer ~xmlbase _pos data = ctor ~xmlbase data in
6060+ let head ~pos:_ = function [] -> ctor ~xmlbase:None "" | x :: _ -> x in
6161+ generate_catcher ~leaf_producer head
6262+end
6363+6464+(* Util *)
6565+6666+module Util = struct
6767+ let find f l = try Some (List.find f l) with Not_found -> None
6868+6969+ exception Found of XML.t
7070+7171+ let recursive_find f root =
7272+ let rec aux = function
7373+ | [] -> None
7474+ | x :: _ when f x -> raise (Found x)
7575+ | XML.Node (_, _, x) :: r -> (
7676+ aux x
7777+ |> function
7878+ | Some x -> raise (Found x) (* assert false ? *) | None -> aux r )
7979+ | XML.Data _ :: r -> aux r
8080+ in
8181+ try aux [root] with Found x -> Some x | _ -> None
8282+8383+ let rec filter_map l f =
8484+ match l with
8585+ | [] -> []
8686+ | x :: tl -> (
8787+ match f x with None -> filter_map tl f | Some x -> x :: filter_map tl f )
8888+8989+ let rec take l n =
9090+ match l with
9191+ | [] -> []
9292+ | e :: tl -> if n > 0 then e :: take tl (n - 1) else []
9393+9494+ let tag_is (((_prefix, name), _attrs) : XML.tag) = ( = ) name
9595+ let attr_is (((_prefix, name), _value) : Xmlm.attribute) = ( = ) name
9696+ let datas_has_leaf = List.exists (function XML.Data _ -> true | _ -> false)
9797+9898+ let get_leaf l =
9999+ match find (function XML.Data _ -> true | _ -> false) l with
100100+ | Some (XML.Data (_, s)) -> s
101101+ | _ -> raise Not_found
102102+103103+ let get_attrs ((_, attrs) : XML.tag) = attrs
104104+ let get_value ((_, value) : Xmlm.attribute) = value
105105+ let get_attr_name (((_prefix, name), _) : Xmlm.attribute) = name
106106+ let get_tag_name (((_prefix, name), _) : XML.tag) = name
107107+ let is_space c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
108108+109109+ let only_whitespace s =
110110+ let r = ref true in
111111+ let i = ref 0 and len = String.length s in
112112+ while !r && !i < len do
113113+ r := is_space s.[!i] ;
114114+ incr i
115115+ done ;
116116+ !r
117117+118118+ (* Output feeds to XML *)
119119+120120+ let add_attr name v_opt attr =
121121+ match v_opt with None | Some "" -> attr | Some v -> (name, v) :: attr
122122+123123+ let add_attr_uri name v_opt attr =
124124+ match v_opt with None -> attr | Some v -> (name, Uri.to_string v) :: attr
125125+126126+ let tag name = (("", name), [])
127127+ let dummy_pos = (0, 0)
128128+129129+ (* Do smarter positions make sense? *)
130130+131131+ let node_data tag content =
132132+ XML.Node (dummy_pos, tag, [XML.Data (dummy_pos, content)])
133133+134134+ let node_uri tag uri = node_data tag (Uri.to_string uri)
135135+136136+ let add_node_data tag c nodes =
137137+ match c with
138138+ | None -> nodes
139139+ | Some content -> node_data tag content :: nodes
140140+141141+ let add_node_uri tag c nodes =
142142+ match c with
143143+ | None -> nodes
144144+ | Some uri -> node_data tag (Uri.to_string uri) :: nodes
145145+146146+ (* Add to [nodes] those coming from mapping [f] on [els] *)
147147+ let add_nodes_rev_map f els nodes =
148148+ List.fold_left (fun nodes el -> f el :: nodes) nodes els
149149+150150+ let add_node_option f op nodes =
151151+ match op with None -> nodes | Some v -> f v :: nodes
152152+end
+70
stack/syndic/lib/syndic_common.mli
···11+module XML : sig
22+ type t = Syndic_xml.t
33+ type node = Syndic_xml.pos * Syndic_xml.tag * t list
44+55+ val generate_catcher :
66+ ?namespaces:string list
77+ -> ?attr_producer:(string * (xmlbase:Uri.t option -> string -> 'a)) list
88+ -> ?data_producer:(string * (xmlbase:Uri.t option -> node -> 'a)) list
99+ -> ?leaf_producer:(xmlbase:Uri.t option -> Xmlm.pos -> string -> 'a)
1010+ -> (pos:Xmlm.pos -> 'a list -> 'b)
1111+ -> xmlbase:Uri.t option
1212+ -> node
1313+ -> 'b
1414+1515+ val dummy_of_xml :
1616+ ctor:(xmlbase:Uri.t option -> string -> 'a)
1717+ -> xmlbase:Uri.t option
1818+ -> node
1919+ -> 'a
2020+2121+ val xmlbase_of_attr :
2222+ xmlbase:Uri.t option -> Xmlm.attribute list -> Uri.t option
2323+end
2424+2525+module Util : sig
2626+ val find : ('a -> bool) -> 'a list -> 'a option
2727+ val recursive_find : (XML.t -> bool) -> XML.t -> XML.t option
2828+ val filter_map : 'a list -> ('a -> 'b option) -> 'b list
2929+ val take : 'a list -> int -> 'a list
3030+ val tag_is : Xmlm.tag -> string -> bool
3131+ val attr_is : Xmlm.attribute -> string -> bool
3232+ val datas_has_leaf : XML.t list -> bool
3333+ val get_leaf : XML.t list -> string
3434+ val get_attrs : Xmlm.tag -> Xmlm.attribute list
3535+ val get_value : Xmlm.attribute -> string
3636+ val get_attr_name : Xmlm.attribute -> string
3737+ val get_tag_name : Xmlm.tag -> string
3838+ val only_whitespace : string -> bool
3939+4040+ (** {2 Helpers to output XML} *)
4141+4242+ val dummy_pos : Xmlm.pos
4343+ (** A dummy position when generating XML files. *)
4444+4545+ val add_attr :
4646+ Xmlm.name -> string option -> Xmlm.attribute list -> Xmlm.attribute list
4747+4848+ val add_attr_uri :
4949+ Xmlm.name -> Uri.t option -> Xmlm.attribute list -> Xmlm.attribute list
5050+5151+ val tag : string -> Xmlm.tag
5252+ (** [tag n] returns a tag with name [n], no namespace, and no attributes. *)
5353+5454+ val node_data : Xmlm.tag -> string -> XML.t
5555+ (** [node_data tag content] returns a node named [tag] with data set to
5656+ [content]. *)
5757+5858+ val node_uri : Xmlm.tag -> Uri.t -> XML.t
5959+ val add_node_data : Xmlm.tag -> string option -> XML.t list -> XML.t list
6060+ val add_node_uri : Xmlm.tag -> Uri.t option -> XML.t list -> XML.t list
6161+6262+ val add_nodes_rev_map : ('a -> XML.t) -> 'a list -> XML.t list -> XML.t list
6363+ (** [add_nodes_rev_map f l nodes] apply [f] to each element of [l] and add
6464+ the resulting HTML trees in reverse order in front of [nodes]. *)
6565+6666+ val add_node_option : ('a -> XML.t) -> 'a option -> XML.t list -> XML.t list
6767+ (** [add_node_option f o nodes]: if [o] is [None], return [nodes]; otherwise
6868+ apply [f] to the value carried by [o] and add the resulting XML tree in
6969+ front of [nodes]. *)
7070+end
+196
stack/syndic/lib/syndic_date.ml
···11+open Printf
22+open Scanf
33+44+type t = Ptime.t
55+66+let epoch = Ptime.epoch
77+let compare = Ptime.compare
88+let max d1 d2 = if compare d1 d2 < 0 then d2 else d1
99+let min d1 d2 = if compare d1 d2 < 0 then d1 else d2
1010+let month_to_int = Hashtbl.create 12
1111+1212+let () =
1313+ let add m i = Hashtbl.add month_to_int m i in
1414+ add "Jan" 1 ;
1515+ add "Feb" 2 ;
1616+ add "Mar" 3 ;
1717+ add "Apr" 4 ;
1818+ add "May" 5 ;
1919+ add "Jun" 6 ;
2020+ add "Jul" 7 ;
2121+ add "Aug" 8 ;
2222+ add "Sep" 9 ;
2323+ add "Oct" 10 ;
2424+ add "Nov" 11 ;
2525+ add "Dec" 12
2626+2727+let map f = function Some x -> f x | None -> None
2828+let map2 f a b = match (a, b) with Some a, Some b -> f a b | _ -> None
2929+3030+(* RFC3339 date *)
3131+let of_rfc3339 s =
3232+ match Ptime.of_rfc3339 ~strict:false s with
3333+ | Result.Error _ ->
3434+ invalid_arg (sprintf "Syndic.Date.of_string: cannot parse %S" s)
3535+ | Result.Ok (t, tz_offset_s, _) -> (
3636+ match Ptime.of_date_time @@ Ptime.to_date_time ?tz_offset_s t with
3737+ | Some x -> x
3838+ | None -> invalid_arg (sprintf "Syndic.Data.of_string: cannot part %S" s) )
3939+4040+(* Format:
4141+ http://www.rssboard.org/rss-specification#ltpubdategtSubelementOfLtitemgt
4242+ Examples: Sun, 19 May 2002 15:21:36 GMT Sat, 25 Sep 2010 08:01:00 -0700 20
4343+ Mar 2013 03:47:14 +0000 *)
4444+let of_rfc822 s =
4545+ let make_date day month year h m maybe_s z =
4646+ let month =
4747+ if String.length month <= 3 then month else String.sub month 0 3
4848+ in
4949+ let month = Hashtbl.find month_to_int month in
5050+ let date = Ptime.of_date (year, month, day) in
5151+ let s =
5252+ if maybe_s <> "" && maybe_s.[0] = ':' then
5353+ float_of_string (String.sub maybe_s 1 (String.length maybe_s - 1))
5454+ else 0.
5555+ in
5656+ let span = Ptime.Span.of_int_s ((h * 3600) + (m * 60)) in
5757+ let span =
5858+ map (fun x -> Some (Ptime.Span.add span x)) (Ptime.Span.of_float_s s)
5959+ in
6060+ let date_and_time =
6161+ if z = "" || z = "GMT" || z = "UT" || z = "Z" then
6262+ map2 (fun date span -> Ptime.add_span date span) date span
6363+ |> map (fun x -> Some (Ptime.to_date_time x))
6464+ else
6565+ (* FIXME: this should be made more robust. *)
6666+ let tz_offset_s =
6767+ match z with
6868+ | "EST" -> -5 * 3600
6969+ | "EDT" -> -4 * 3600
7070+ | "CST" -> -6 * 3600
7171+ | "CDT" -> -5 * 3600
7272+ | "MST" -> -7 * 3600
7373+ | "MDT" -> -6 * 3600
7474+ | "PST" -> -8 * 3600
7575+ | "PDT" -> -7 * 3600
7676+ | "A" -> -1 * 3600
7777+ | "M" -> -12 * 3600
7878+ | "N" -> 1 * 3600
7979+ | "Y" -> 12 * 3600
8080+ | _ ->
8181+ let zh = sscanf (String.sub z 0 3) "%i" (fun i -> i) in
8282+ let zm = sscanf (String.sub z 3 2) "%i" (fun i -> i) in
8383+ let tz_sign = if zh < 0 then -1 else 1 in
8484+ if zh < 0 then tz_sign * ((-zh * 3600) + (zm * 60))
8585+ else tz_sign * ((zh * 3600) + (zm * 60))
8686+ in
8787+ let rt = map2 (fun date span -> Ptime.add_span date span) date span in
8888+ (* XXX: We lose minutes with this conversion, but Calendar does not
8989+ propose to handle minutes. *)
9090+ map (fun x -> Some (Ptime.to_date_time ~tz_offset_s x)) rt
9191+ in
9292+ match map Ptime.of_date_time date_and_time with
9393+ | Some x -> x
9494+ | None -> invalid_arg (sprintf "Syndic.Date.of_rfc822: cannot parse")
9595+ in
9696+ try
9797+ if 'A' <= s.[0] && s.[0] <= 'Z' then
9898+ try sscanf s "%_s %i %s %i %i:%i%s %s" make_date with _ ->
9999+ try sscanf s "%_s %ist %s %i %i:%i%s %s" make_date with _ ->
100100+ (* For e.g. "May 15th, 2019" — even though it is not standard *)
101101+ sscanf s "%s %i%_s %i" (fun m d y -> make_date d m y 0 0 "" "UT")
102102+ else
103103+ try sscanf s "%i %s %i %i:%i%s %s" make_date with _ ->
104104+ sscanf s "%i %s %i" (fun d m y -> make_date d m y 0 0 "" "UT")
105105+ with _ ->
106106+ (* Fallback: Some RSS feeds use RFC3339 dates instead of RFC822 *)
107107+ try of_rfc3339 s
108108+ with _ -> invalid_arg (sprintf "Syndic.Date.of_string+: cannot parse %S" s)
109109+110110+type month =
111111+ | Jan
112112+ | Feb
113113+ | Mar
114114+ | Apr
115115+ | May
116116+ | Jun
117117+ | Jul
118118+ | Aug
119119+ | Sep
120120+ | Oct
121121+ | Nov
122122+ | Dec
123123+124124+type day = Thu | Fri | Sat | Sun | Mon | Tue | Wed
125125+126126+let string_of_month = function
127127+ | Jan -> "Jan"
128128+ | Feb -> "Feb"
129129+ | Mar -> "Mar"
130130+ | Apr -> "Apr"
131131+ | May -> "May"
132132+ | Jun -> "Jun"
133133+ | Jul -> "Jul"
134134+ | Aug -> "Aug"
135135+ | Sep -> "Sep"
136136+ | Oct -> "Oct"
137137+ | Nov -> "Nov"
138138+ | Dec -> "Dec"
139139+140140+let month_of_date =
141141+ let months =
142142+ [|Jan; Feb; Mar; Apr; May; Jun; Jul; Aug; Sep; Oct; Nov; Dec|]
143143+ in
144144+ fun t ->
145145+ let _, i, _ = Ptime.to_date t in
146146+ months.(i - 1)
147147+148148+let to_rfc3339 d =
149149+ (* Example: 2014-03-19T15:51:25.050-07:00 *)
150150+ Ptime.to_rfc3339 d
151151+152152+(* Convenience functions *)
153153+154154+let day_of_week =
155155+ let wday = [|Thu; Fri; Sat; Sun; Mon; Tue; Wed|] in
156156+ fun t ->
157157+ let i = fst Ptime.(Span.to_d_ps @@ to_span t) mod 7 in
158158+ wday.((if i < 0 then 7 + i else i))
159159+160160+let string_of_day = function
161161+ | Thu -> "Thu"
162162+ | Fri -> "Fri"
163163+ | Sat -> "Sat"
164164+ | Sun -> "Sun"
165165+ | Mon -> "Mon"
166166+ | Tue -> "Tue"
167167+ | Wed -> "Wed"
168168+169169+let year t =
170170+ let year, _, _ = Ptime.to_date t in
171171+ year
172172+173173+let month = month_of_date
174174+175175+let day t =
176176+ let (_, _, day), _ = Ptime.to_date_time t in
177177+ day
178178+179179+let hour t =
180180+ let _, ((hh, _, _), _) = Ptime.to_date_time t in
181181+ hh
182182+183183+let minute t =
184184+ let _, ((_, mm, _), _) = Ptime.to_date_time t in
185185+ mm
186186+187187+let second t =
188188+ let _, ((_, _, ss), _) = Ptime.to_date_time t in
189189+ float_of_int ss
190190+191191+let to_rfc822 t =
192192+ (* Example: Sat, 25 Sep 2010 08:01:00 -0700 *)
193193+ let ds = day_of_week t |> string_of_day in
194194+ let ms = month_of_date t |> string_of_month in
195195+ let (y, _m, d), ((hh, mm, ss), t) = Ptime.to_date_time t in
196196+ Printf.sprintf "%s, %d %s %d %02d:%02d:%02d %04d" ds d ms y hh mm ss t
+54
stack/syndic/lib/syndic_date.mli
···11+(** Minimal date module required by Syndic. *)
22+33+(** A date with time. *)
44+type t = Ptime.t
55+66+val epoch : t
77+(** The POSIX time, i.e. Thursday, 1 January 1970 00:00:00 (UTC). *)
88+99+val compare : t -> t -> int
1010+(** Compare dates in increasing order. *)
1111+1212+val max : t -> t -> t
1313+(** [max d1 d2] return the maximum (i.e. more recent) of the dates [d1] and
1414+ [d2]. *)
1515+1616+val min : t -> t -> t
1717+(** [min d1 d2] return the minimum (i.e. less recent) of the dates [d1] and
1818+ [d2]. *)
1919+2020+val of_rfc822 : string -> t
2121+val to_rfc822 : t -> string
2222+val of_rfc3339 : string -> t
2323+val to_rfc3339 : t -> string
2424+2525+(** Month of the year. *)
2626+type month =
2727+ | Jan
2828+ | Feb
2929+ | Mar
3030+ | Apr
3131+ | May
3232+ | Jun
3333+ | Jul
3434+ | Aug
3535+ | Sep
3636+ | Oct
3737+ | Nov
3838+ | Dec
3939+4040+val string_of_month : month -> string
4141+(** Return the 3 letters identifying the month in English. *)
4242+4343+val year : t -> int
4444+(** Return the 4 digit year of the date. *)
4545+4646+val month : t -> month
4747+(** Return the month of the date. *)
4848+4949+val day : t -> int
5050+(** Return the day of the month (1..31). *)
5151+5252+val hour : t -> int
5353+val minute : t -> int
5454+val second : t -> float
+9
stack/syndic/lib/syndic_error.ml
···11+open Printf
22+33+type t = Xmlm.pos * string
44+55+exception Error of t
66+77+let to_string = function
88+ | Error (pos, str) -> sprintf "%s at l.%d c.%d" str (fst pos) (snd pos)
99+ | exn -> Printexc.to_string exn
+7
stack/syndic/lib/syndic_error.mli
···11+(** The common signature that all error modules must (at least) satisfy. *)
22+33+type t = Xmlm.pos * string
44+55+exception Error of t
66+77+val to_string : exn -> string
+545
stack/syndic/lib/syndic_opml1.ml
···11+open Syndic_common.XML
22+open Syndic_common.Util
33+open Printf
44+module XML = Syndic_xml
55+module Error = Syndic_error
66+module Date = Syndic_date
77+88+type head =
99+ { title: string
1010+ ; date_created: Date.t option
1111+ ; date_modified: Date.t
1212+ ; owner_name: string
1313+ ; owner_email: string
1414+ ; expansion_state: int list
1515+ ; vert_scroll_state: int option
1616+ ; window_top: int option
1717+ ; window_left: int option
1818+ ; window_bottom: int option
1919+ ; window_right: int option }
2020+2121+let head ?date_created ?(expansion_state = []) ?vert_scroll_state ?window_top
2222+ ?window_left ?window_bottom ?window_right ~date_modified ~owner_name
2323+ ~owner_email title =
2424+ { title
2525+ ; date_created
2626+ ; date_modified
2727+ ; owner_name
2828+ ; owner_email
2929+ ; expansion_state
3030+ ; vert_scroll_state
3131+ ; window_top
3232+ ; window_left
3333+ ; window_bottom
3434+ ; window_right }
3535+3636+let string_of_xml name (pos, _, datas) =
3737+ try get_leaf datas with Not_found ->
3838+ raise (Error.Error (pos, name ^ " must not be empty"))
3939+4040+let title_of_xml ~xmlbase:_ a = `Title (string_of_xml "<title>" a)
4141+let owner_name_of_xml ~xmlbase:_ a = `OwnerName (string_of_xml "<ownerName>" a)
4242+4343+let owner_email_of_xml ~xmlbase:_ a =
4444+ `OwnerEmail (string_of_xml "<ownerEmail>" a)
4545+4646+let expansion_state_of_xml ~xmlbase:_ (pos, _, datas) =
4747+ let explode s =
4848+ let rec aux acc i =
4949+ if i = String.length s then acc else aux (s.[i] :: acc) (succ i)
5050+ in
5151+ aux [] 0 |> List.rev
5252+ in
5353+ let implode l =
5454+ let rec aux s = function
5555+ | x :: xs -> aux (s ^ Char.escaped x) xs
5656+ | [] -> s
5757+ in
5858+ aux "" l
5959+ in
6060+ let split sep s =
6161+ let rec aux acc_char acc = function
6262+ | x :: xs when x = sep -> aux [] (List.rev acc_char :: acc) xs
6363+ | x :: xs -> aux (x :: acc_char) acc xs
6464+ | [] -> List.rev acc_char :: acc
6565+ in
6666+ explode s |> aux [] [] |> List.rev |> List.map implode
6767+ in
6868+ try
6969+ `ExpansionState (get_leaf datas |> split ',' |> List.map int_of_string)
7070+ with
7171+ | Not_found -> `ExpansionState []
7272+ | _ ->
7373+ raise
7474+ (Error.Error
7575+ ( pos
7676+ , "<expansionState> must be a list of numbers separated by commas \
7777+ as 1,2,3" ))
7878+7979+let int_of_xml name (pos, _, datas) =
8080+ try get_leaf datas |> int_of_string with
8181+ | Not_found -> raise (Error.Error (pos, name ^ " must not be empty"))
8282+ | Failure _ -> raise (Error.Error (pos, name ^ " must be an integer"))
8383+8484+let vert_scroll_state_of_xml ~xmlbase:_ a =
8585+ `VertScrollState (int_of_xml "<vertScrollState>" a)
8686+8787+let window_top_of_xml ~xmlbase:_ a = `WindowTop (int_of_xml "<windowTop>" a)
8888+let window_left_of_xml ~xmlbase:_ a = `WindowLeft (int_of_xml "<windowLeft>" a)
8989+9090+let window_bottom_of_xml ~xmlbase:_ a =
9191+ `WindowBottom (int_of_xml "<windowBotton>" a)
9292+9393+let window_right_of_xml ~xmlbase:_ a =
9494+ `WindowRight (int_of_xml "<windowRight>" a)
9595+9696+type head' =
9797+ [ `Title of string
9898+ | `DateCreated of Date.t
9999+ | `DateModified of Date.t
100100+ | `OwnerName of string
101101+ | `OwnerEmail of string
102102+ | `ExpansionState of int list
103103+ | `VertScrollState of int
104104+ | `WindowTop of int
105105+ | `WindowLeft of int
106106+ | `WindowBottom of int
107107+ | `WindowRight of int ]
108108+109109+let make_head ~pos (l : [< head'] list) =
110110+ let title =
111111+ match find (function `Title _ -> true | _ -> false) l with
112112+ | Some (`Title s) -> s
113113+ | _ ->
114114+ raise
115115+ (Error.Error (pos, "<head> MUST contains exactly one <title> element"))
116116+ in
117117+ let date_created =
118118+ match find (function `DateCreated _ -> true | _ -> false) l with
119119+ | Some (`DateCreated d) -> Some d
120120+ | _ -> None
121121+ in
122122+ let date_modified =
123123+ match find (function `DateModified _ -> true | _ -> false) l with
124124+ | Some (`DateModified d) -> d
125125+ | _ ->
126126+ raise
127127+ (Error.Error
128128+ (pos, "<head> MUST contains exactly one <dateModified> element"))
129129+ in
130130+ let owner_name =
131131+ match find (function `OwnerName _ -> true | _ -> false) l with
132132+ | Some (`OwnerName s) -> s
133133+ | _ ->
134134+ raise
135135+ (Error.Error
136136+ (pos, "<head> MUST contains exactly one <ownerName> element"))
137137+ in
138138+ let owner_email =
139139+ match find (function `OwnerEmail _ -> true | _ -> false) l with
140140+ | Some (`OwnerEmail s) -> s
141141+ | _ ->
142142+ raise
143143+ (Error.Error
144144+ (pos, "<head> MUST contains exactly one <ownerEmail> element"))
145145+ in
146146+ let expansion_state =
147147+ match find (function `ExpansionState _ -> true | _ -> false) l with
148148+ | Some (`ExpansionState l) -> l
149149+ | _ -> []
150150+ in
151151+ let vert_scroll_state =
152152+ match find (function `VertScrollState _ -> true | _ -> false) l with
153153+ | Some (`VertScrollState n) -> Some n
154154+ | _ -> None
155155+ in
156156+ let window_top =
157157+ match find (function `WindowTop _ -> true | _ -> false) l with
158158+ | Some (`WindowTop h) -> Some h
159159+ | _ -> None
160160+ in
161161+ let window_left =
162162+ match find (function `WindowLeft _ -> true | _ -> false) l with
163163+ | Some (`WindowLeft x) -> Some x
164164+ | _ -> None
165165+ in
166166+ let window_bottom =
167167+ match find (function `WindowBottom _ -> true | _ -> false) l with
168168+ | Some (`WindowBottom y) -> Some y
169169+ | _ -> None
170170+ in
171171+ let window_right =
172172+ match find (function `WindowRight _ -> true | _ -> false) l with
173173+ | Some (`WindowRight r) -> Some r
174174+ | _ -> None
175175+ in
176176+ `Head
177177+ { title
178178+ ; date_created
179179+ ; date_modified
180180+ ; owner_name
181181+ ; owner_email
182182+ ; expansion_state
183183+ ; vert_scroll_state
184184+ ; window_top
185185+ ; window_left
186186+ ; window_bottom
187187+ ; window_right }
188188+189189+let date_of_xml name (pos, _, datas) =
190190+ let d =
191191+ try get_leaf datas with Not_found ->
192192+ raise (Error.Error (pos, name ^ " must not be empty"))
193193+ in
194194+ try Date.of_rfc822 d with _ ->
195195+ raise (Error.Error (pos, sprintf "Date %S incorrect" d))
196196+197197+let date_created_of_xml ~xmlbase:_ a =
198198+ `DateCreated (date_of_xml "<dateCreated>" a)
199199+200200+let date_modified_of_xml ~xmlbase:_ a =
201201+ `DateModified (date_of_xml "<dateModified>" a)
202202+203203+let head_of_xml =
204204+ let data_producer =
205205+ [ ("title", title_of_xml)
206206+ ; ("dateCreated", date_created_of_xml)
207207+ ; ("dateModified", date_modified_of_xml)
208208+ ; ("ownerName", owner_name_of_xml)
209209+ ; ("ownerEmail", owner_email_of_xml)
210210+ ; ("expansionState", expansion_state_of_xml)
211211+ ; ("vertScrollState", vert_scroll_state_of_xml)
212212+ ; ("windowTop", window_top_of_xml)
213213+ ; ("windowLeft", window_left_of_xml)
214214+ ; ("windowBottom", window_bottom_of_xml)
215215+ ; ("windowRight", window_right_of_xml) ]
216216+ in
217217+ generate_catcher ~data_producer make_head
218218+219219+let head_of_xml' =
220220+ let data_producer =
221221+ [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a))
222222+ ; ("dateCreated", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `DateCreated a))
223223+ ; ("dateModified", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `DateModified a))
224224+ ; ("ownerName", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `OwnerName a))
225225+ ; ("ownerEmail", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `OwnerEmail a))
226226+ ; ( "expansionState"
227227+ , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `ExpansionSate a) )
228228+ ; ( "vertScrollState"
229229+ , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `VertScrollState a) )
230230+ ; ("windowTop", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WindowTop a))
231231+ ; ("windowLeft", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WindowLeft a))
232232+ ; ("windowBottom", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WindowBottom a))
233233+ ; ("windowRight", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WindowRight a))
234234+ ]
235235+ in
236236+ generate_catcher ~data_producer (fun ~pos:_ x -> `Head x)
237237+238238+type outline =
239239+ { text: string
240240+ ; typ: string option
241241+ ; is_comment: bool
242242+ ; (* see common attributes *)
243243+ is_breakpoint: bool
244244+ ; (* see common attributes *)
245245+ xml_url: Uri.t option
246246+ ; html_url: Uri.t option
247247+ ; attrs: Xmlm.attribute list
248248+ ; outlines: outline list }
249249+250250+let outline ?typ ?(is_comment = false) ?(is_breakpoint = false) ?xml_url
251251+ ?html_url ?(attrs = []) ?(outlines = []) text =
252252+ {text; typ; is_comment; is_breakpoint; xml_url; html_url; attrs; outlines}
253253+254254+let rec outline_of_node ~xmlbase ((pos, (_, attributes), datas) : node) =
255255+ let text = ref ""
256256+ and typ = ref None
257257+ and is_comment = ref false
258258+ and is_breakpoint = ref false
259259+ and xml_url = ref None
260260+ and html_url = ref None
261261+ and attrs = ref []
262262+ and outlines = ref [] in
263263+ (* Get xml:base first as it must be used the these attributes too. *)
264264+ let xmlbase = xmlbase_of_attr ~xmlbase attributes in
265265+ let process_attrs ((name, v) as attr) =
266266+ match name with
267267+ | _, "text" -> text := v
268268+ | _, "type" -> typ := Some v
269269+ | _, "isComment" -> (
270270+ try is_comment := bool_of_string v with _ ->
271271+ raise (Error.Error (pos, "<isComment> must have true or false value."))
272272+ )
273273+ | _, "isBreakpoint" -> (
274274+ try is_breakpoint := bool_of_string v with _ ->
275275+ raise
276276+ (Error.Error (pos, "<isBreakpoint> must have true or false value."))
277277+ )
278278+ | _, "xmlUrl" -> (
279279+ try xml_url := Some (XML.resolve ~xmlbase (Uri.of_string v)) with _ ->
280280+ raise (Error.Error (pos, "<xmlUrl> content must be an URL")) )
281281+ | _, "htmlUrl" -> (
282282+ try html_url := Some (XML.resolve ~xmlbase (Uri.of_string v)) with _ ->
283283+ raise (Error.Error (pos, "<htmlUrl> content must be an URL")) )
284284+ | _ -> attrs := attr :: !attrs
285285+ in
286286+ List.iter process_attrs attributes ;
287287+ let process_outlines = function
288288+ | XML.Node (p, (((ns, name), _) as t), d) ->
289289+ if ns = "" && name = "outline" then
290290+ outlines := outline_of_node ~xmlbase (p, t, d) :: !outlines
291291+ | XML.Data _ -> ()
292292+ in
293293+ List.iter process_outlines datas ;
294294+ { text= !text
295295+ ; typ= !typ
296296+ ; is_comment= !is_comment
297297+ ; is_breakpoint= !is_breakpoint
298298+ ; xml_url= !xml_url
299299+ ; html_url= !html_url
300300+ ; attrs= !attrs
301301+ ; outlines= !outlines }
302302+303303+let outline_of_xml ~xmlbase a = `Outline (outline_of_node ~xmlbase a)
304304+305305+let rec outline_of_node' ~xmlbase ((_pos, (_, attributes), datas) : node) =
306306+ let el = ref [] in
307307+ let xmlbase = xmlbase_of_attr ~xmlbase attributes in
308308+ let el_of_attrs (name, v) =
309309+ match name with
310310+ | _, "text" -> el := `Text v :: !el
311311+ | _, "type" -> el := `Type v :: !el
312312+ | _, "isComment" -> el := `IsComment v :: !el
313313+ | _, "isBreakpoint" -> el := `IsBreakpoint v :: !el
314314+ | _, "xmlUrl" -> el := `XML_url (xmlbase, v) :: !el
315315+ | _, "htmlUrl" -> el := `HTML_url (xmlbase, v) :: !el
316316+ | _, name -> el := `Attr (name, v) :: !el
317317+ in
318318+ List.iter el_of_attrs attributes ;
319319+ let process_outlines = function
320320+ | XML.Node (p, (((ns, name), _) as t), d) ->
321321+ if ns = "" && name = "outline" then
322322+ el := `Outline (outline_of_node' ~xmlbase (p, t, d)) :: !el
323323+ | XML.Data _ -> ()
324324+ in
325325+ List.iter process_outlines datas ;
326326+ !el
327327+328328+let outline_of_xml' ~xmlbase a = `Outline (outline_of_node' ~xmlbase a)
329329+330330+type body = outline list
331331+type body' = [`Outline of outline]
332332+333333+let make_body ~pos (l : [< body'] list) =
334334+ let l = List.map (function `Outline o -> o) l |> List.rev in
335335+ if List.length l <> 0 then `Body l
336336+ else raise (Error.Error (pos, "Body must contains one <outline> element."))
337337+338338+let body_of_xml =
339339+ let data_producer = [("outline", outline_of_xml)] in
340340+ generate_catcher ~data_producer make_body
341341+342342+let body_of_xml' =
343343+ let data_producer = [("outline", outline_of_xml')] in
344344+ generate_catcher ~data_producer (fun ~pos:_ x -> `Body x)
345345+346346+type t = {version: string; head: head; body: body}
347347+type opml = t
348348+349349+(* FIXME: @deprecated *)
350350+351351+type opml' = [`Version of string | `Head of head | `Body of body]
352352+353353+let make_opml ~pos (l : [< opml'] list) =
354354+ let version =
355355+ match find (function `Version _ -> true | _ -> false) l with
356356+ | Some (`Version v) -> v
357357+ | _ -> raise (Error.Error (pos, "Opml tag must have <version> attribut"))
358358+ in
359359+ let head =
360360+ match find (function `Head _ -> true | _ -> false) l with
361361+ | Some (`Head h) -> h
362362+ | _ ->
363363+ raise
364364+ (Error.Error (pos, "Opml tag must have exactly one <head> element"))
365365+ in
366366+ let body =
367367+ match find (function `Body _ -> true | _ -> false) l with
368368+ | Some (`Body b) -> b
369369+ | _ ->
370370+ raise
371371+ (Error.Error (pos, "Opml tag must have exactly one <body> element"))
372372+ in
373373+ {version; head; body}
374374+375375+let opml_of_xml =
376376+ let attr_producer = [("version", fun ~xmlbase:_ a -> `Version a)] in
377377+ let data_producer = [("head", head_of_xml); ("body", body_of_xml)] in
378378+ generate_catcher ~attr_producer ~data_producer make_opml
379379+380380+let opml_of_xml' =
381381+ let attr_producer = [("version", fun ~xmlbase:_ a -> `Version a)] in
382382+ let data_producer = [("head", head_of_xml'); ("body", body_of_xml')] in
383383+ generate_catcher ~attr_producer ~data_producer (fun ~pos:_ x -> x)
384384+385385+let find_opml l =
386386+ find (function XML.Node (_, t, _) -> tag_is t "opml" | _ -> false) l
387387+388388+let parse ?xmlbase input =
389389+ match XML.of_xmlm input |> snd with
390390+ | XML.Node (pos, tag, data) -> (
391391+ if tag_is tag "opml" then opml_of_xml ~xmlbase (pos, tag, data)
392392+ else
393393+ match find_opml data with
394394+ | Some (XML.Node (p, t, d)) -> opml_of_xml ~xmlbase (p, t, d)
395395+ | _ ->
396396+ raise
397397+ (Error.Error
398398+ ((0, 0), "document MUST contains exactly one <opml> element"))
399399+ )
400400+ | _ ->
401401+ raise
402402+ (Error.Error
403403+ ((0, 0), "document MUST contains exactly one <opml> element"))
404404+405405+let read ?xmlbase fname =
406406+ let fh = open_in fname in
407407+ try
408408+ let x = parse ?xmlbase (XML.input_of_channel fh) in
409409+ close_in fh ; x
410410+ with e -> close_in fh ; raise e
411411+412412+type uri = Uri.t option * string
413413+414414+let unsafe ?xmlbase input =
415415+ match XML.of_xmlm input |> snd with
416416+ | XML.Node (pos, tag, data) -> (
417417+ if tag_is tag "opml" then `Opml (opml_of_xml' ~xmlbase (pos, tag, data))
418418+ else
419419+ match find_opml data with
420420+ | Some (XML.Node (p, t, d)) -> `Opml (opml_of_xml' ~xmlbase (p, t, d))
421421+ | _ -> `Opml [] )
422422+ | _ -> `Opml []
423423+424424+(* Output functions *)
425425+426426+(* Names have have no namespace. This shortcut makes it more readable. *)
427427+let n x = ("", x)
428428+let node name sub = XML.Node (dummy_pos, (n name, []), sub)
429429+let data d = XML.Data (dummy_pos, d)
430430+431431+let add_node name opt to_string xml =
432432+ match opt with
433433+ | None -> xml
434434+ | Some d -> node name [data (to_string d)] :: xml
435435+436436+let head_to_xml h =
437437+ let xml =
438438+ add_node "windowRight" h.window_right string_of_int []
439439+ |> add_node "windowBottom" h.window_bottom string_of_int
440440+ |> add_node "windowLeft" h.window_left string_of_int
441441+ |> add_node "windowTop" h.window_top string_of_int
442442+ |> add_node "vertScrollState" h.vert_scroll_state string_of_int
443443+ |> (fun x ->
444444+ let c = List.map string_of_int h.expansion_state in
445445+ node "expansionState" [data (String.concat "," c)] :: x )
446446+ |> add_node "dateCreated" h.date_created Date.to_rfc822
447447+ in
448448+ node "title" [data h.title]
449449+ :: node "dateModified" [data (Date.to_rfc822 h.date_modified)]
450450+ :: node "ownerName" [data h.owner_name]
451451+ :: node "ownerEmail" [data h.owner_email]
452452+ :: xml
453453+454454+let add_attr name opt to_string attr =
455455+ match opt with None -> attr | Some d -> (n name, to_string d) :: attr
456456+457457+let id_string (s : string) = s
458458+459459+let rec outline_to_xml o =
460460+ (* isComment and isBreakpoint: absent <=> false *)
461461+ let attr =
462462+ if o.is_comment then (n "isComment", "true") :: o.attrs else o.attrs
463463+ in
464464+ let attr =
465465+ if o.is_breakpoint then (n "isBreakpoint", "true") :: attr else attr
466466+ in
467467+ let attr =
468468+ (n "text", o.text) :: attr
469469+ |> add_attr "type" o.typ id_string
470470+ |> add_attr "xmlUrl" o.xml_url Uri.to_string
471471+ |> add_attr "htmlUrl" o.html_url Uri.to_string
472472+ in
473473+ XML.Node (dummy_pos, (n "outline", attr), List.map outline_to_xml o.outlines)
474474+475475+let to_xml (o : t) =
476476+ XML.Node
477477+ ( dummy_pos
478478+ , (n "opml", [(n "version", o.version)])
479479+ , [ node "head" (head_to_xml o.head)
480480+ ; node "body" (List.map outline_to_xml o.body) ] )
481481+482482+let output opml dest = XML.to_xmlm (to_xml opml) (XML.make_output dest)
483483+484484+let write opml fname =
485485+ let fh = open_out fname in
486486+ try
487487+ output opml (`Channel fh) ;
488488+ close_out fh
489489+ with e -> close_out fh ; raise e
490490+491491+(* Creation from atom feeds *)
492492+493493+(* Remove all tags *)
494494+let rec add_to_buffer buf = function
495495+ | XML.Node (_, _, subs) -> List.iter (add_to_buffer buf) subs
496496+ | XML.Data (_, d) -> Buffer.add_string buf d
497497+498498+let xhtml_to_string ~buf xhtml =
499499+ Buffer.clear buf ;
500500+ List.iter (add_to_buffer buf) xhtml ;
501501+ Buffer.contents buf
502502+503503+let string_of_text_construct ~buf = function
504504+ (* FIXME: Once we use a proper HTML library, we probably would like to parse
505505+ the HTML and remove the tags *)
506506+ | (Syndic_atom.Text s : Syndic_atom.text_construct) | Syndic_atom.Html (_, s)
507507+ ->
508508+ s
509509+ | Syndic_atom.Xhtml (_, x) -> xhtml_to_string ~buf x
510510+511511+let rec first_non_empty = function
512512+ | a :: tl ->
513513+ if a.Syndic_atom.name = "" then first_non_empty tl
514514+ else a.Syndic_atom.name
515515+ | [] -> ""
516516+517517+let outine_of_feed ~buf (f : Syndic_atom.feed) =
518518+ let open Syndic_atom in
519519+ let author =
520520+ match f.authors with
521521+ | _ :: _ -> first_non_empty f.authors
522522+ | [] -> (
523523+ match f.entries with
524524+ | e :: _ ->
525525+ let a0, a = e.authors in
526526+ if a0.name = "" then first_non_empty a else a0.name
527527+ | [] -> "" )
528528+ in
529529+ let title = string_of_text_construct ~buf f.title in
530530+ let xml_url, is_comment =
531531+ try
532532+ let l = List.find (fun l -> l.rel = Self) f.links in
533533+ let is_comment =
534534+ match l.length with Some len -> len < 0 | None -> false
535535+ in
536536+ (Some l.href, is_comment)
537537+ with Not_found -> (None, false)
538538+ in
539539+ outline ~typ:"rss" ~is_comment
540540+ ~attrs:[(("", "title"), title)]
541541+ ?xml_url author
542542+543543+let of_atom ~head feeds =
544544+ let buf = Buffer.create 1024 in
545545+ {version= "1.1"; head; body= List.map (outine_of_feed ~buf) feeds}
+177
stack/syndic/lib/syndic_opml1.mli
···11+(** [Syndic.Opml1]: compliant with {{:http://dev.opml.org/spec1.html} OPML
22+ 1.0}.
33+44+ The purpose of the {i Outline Processor Markup Language}, or OPML, is to
55+ provide a way to exchange information between outliners and Internet
66+ services that can be browsed or controlled through an outliner. Outlines
77+ can be used for specifications, legal briefs, product plans, presentations,
88+ screenplays, directories, diaries, discussion groups, chat systems and
99+ stories. *)
1010+1111+module Error : module type of Syndic_error
1212+1313+type head =
1414+ { title: string (** Title of the document. *)
1515+ ; date_created: Syndic_date.t option
1616+ (** A date-time indicating when the document was created. *)
1717+ ; date_modified: Syndic_date.t
1818+ (** A date-time indicating when the document was last modified. *)
1919+ ; owner_name: string (** Owner of the document. *)
2020+ ; owner_email: string (** Email address of the owner of the document. *)
2121+ ; expansion_state: int list
2222+ (** A comma-separated list of line numbers that are expanded. The line
2323+ numbers in the list tell you which headlines to expand. The order
2424+ is important. For each element in the list, X, starting at the
2525+ first summit, navigate flatdown X times and expand. Repeat for each
2626+ element in the list. *)
2727+ ; vert_scroll_state: int option
2828+ (** A number saying which line of the outline is displayed on the top
2929+ line of the window. This number is calculated with the expansion
3030+ state already applied. *)
3131+ ; window_top: int option
3232+ (** Pixel location of the top edge of the window. *)
3333+ ; window_left: int option
3434+ (** Pixel location of the left edge of the window. *)
3535+ ; window_bottom: int option
3636+ (** Pixel location of the bottom edge of the window. *)
3737+ ; window_right: int option
3838+ (** Pixel location of the right edge of the window. *) }
3939+4040+val head :
4141+ ?date_created:Syndic_date.t
4242+ -> ?expansion_state:int list
4343+ -> ?vert_scroll_state:int
4444+ -> ?window_top:int
4545+ -> ?window_left:int
4646+ -> ?window_bottom:int
4747+ -> ?window_right:int
4848+ -> date_modified:Syndic_date.t
4949+ -> owner_name:string
5050+ -> owner_email:string
5151+ -> string
5252+ -> head
5353+(** [head ~date_modified ~owner_name ~owner_email title] returns a head. By
5454+ default, all optional arguments leave the corresponding fields empty. *)
5555+5656+type outline =
5757+ { text: string
5858+ (** String that's displayed when the outline is being browsed or
5959+ edited. There is no specific limit on the length of the text
6060+ attribute.*)
6161+ ; typ: string option
6262+ (** "Type" of outline. Says how other attributes of the [outline] are
6363+ interpreted. This is application dependent. For example, for news
6464+ feed, it is common to have "rss" as the value of this field. *)
6565+ ; is_comment: bool
6666+ (** Indicates whether the outline is commented or not. By convention if
6767+ an outline is commented, all subordinate outlines are considered to
6868+ be commented as well. *)
6969+ ; is_breakpoint: bool
7070+ (** Indicates whether a breakpoint is set on this outline. This
7171+ attribute is mainly necessary for outlines used to edit scripts
7272+ that execute. *)
7373+ ; xml_url: Uri.t option
7474+ (** Link to the XML data associated to this outline, typically the RSS
7575+ feed. *)
7676+ ; html_url: Uri.t option
7777+ (** Link to the HTML data associated to this outline, typically the
7878+ HTML pages rendering the news feed. *)
7979+ ; attrs: Xmlm.attribute list
8080+ (** Association list of additional attributes in the outline. *)
8181+ ; outlines: outline list
8282+ (** List of [outline] elements that are considered sub-items of the
8383+ current outline. *) }
8484+8585+val outline :
8686+ ?typ:string
8787+ -> ?is_comment:bool
8888+ -> ?is_breakpoint:bool
8989+ -> ?xml_url:Uri.t
9090+ -> ?html_url:Uri.t
9191+ -> ?attrs:Xmlm.attribute list
9292+ -> ?outlines:outline list
9393+ -> string
9494+ -> outline
9595+(** [outline text] returns an outline.
9696+9797+ @param is_comment Default: [false]. @param is_breakpoint Default: [false].
9898+9999+ All the other parameters are bu default empty. *)
100100+101101+(** List of outline elements. *)
102102+type body = outline list
103103+104104+type t =
105105+ { version: string (** The version of OPML document (should be 1.0 or 1.1) *)
106106+ ; head: head
107107+ ; body: body }
108108+109109+val parse : ?xmlbase:Uri.t -> Xmlm.input -> t
110110+(** [parse i] takes [i] and returns an opml record which is the OCaml
111111+ representation of the OPML document. *)
112112+113113+val read : ?xmlbase:Uri.t -> string -> t
114114+(** [read fname] reads the file name [fname] and parses it. For the optional
115115+ parameters, see {!parse}. *)
116116+117117+val to_xml : t -> Syndic_xml.t
118118+(** [to_xml opml] converts the OPML document [opml] to an XML tree. *)
119119+120120+val output : t -> Xmlm.dest -> unit
121121+(** [output opml dest] writes the XML tree of the OPML document [opml] to
122122+ [dest]. *)
123123+124124+val write : t -> string -> unit
125125+(** [write opml fname] writes the XML tree of the OPML document [opml] to the
126126+ file named [fname]. *)
127127+128128+val of_atom : head:head -> Syndic_atom.feed list -> t
129129+(** [of_atom ~head feeds] returns the OPML list of authors of the atom feeds.
130130+ The [text] is the name associated to a feed, i.e. the name of the first
131131+ author in the feed authors list or, if empty, the one of the first post. It
132132+ is important that the feeds contain a link entry with [rel = Self] for
133133+ the OPML document to be able to create a [xml_url] entry pointing to the
134134+ feed.
135135+136136+ As a special convention, if the length of the [rel = Self] link is
137137+ present and negative, the property [is_comment] is set to [true]. *)
138138+139139+(**/**)
140140+141141+(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not [None],
142142+ gives the base URI against which [uri] must be resolved if it is relative. *)
143143+type uri = Uri.t option * string
144144+145145+val unsafe :
146146+ ?xmlbase:Uri.t
147147+ -> Xmlm.input
148148+ -> [> `Opml of [> `Body of [> `Outline of ([> `Text of string
149149+ | `Type of string
150150+ | `IsBreakpoint of string
151151+ | `IsComment of string
152152+ | `Outline of 'a
153153+ | `XML_url of uri
154154+ | `HTML_url of uri
155155+ | `Attr of string * string ]
156156+ list
157157+ as
158158+ 'a) ]
159159+ list
160160+ | `Head of [> `DateCreated of string
161161+ | `DateModified of string
162162+ | `ExpansionSate of string
163163+ | `OwnerEmail of string
164164+ | `OwnerName of string
165165+ | `Title of string
166166+ | `VertScrollState of string
167167+ | `WindowBottom of string
168168+ | `WindowLeft of string
169169+ | `WindowRight of string
170170+ | `WindowTop of string ]
171171+ list
172172+ | `Version of string ]
173173+ list ]
174174+(** Analysis without verification. *)
175175+176176+(** @deprecated Use Syndic.Opml1.t instead. *)
177177+type opml = t
+562
stack/syndic/lib/syndic_rss1.ml
···11+open Syndic_common.XML
22+open Syndic_common.Util
33+module XML = Syndic_xml
44+module Error = Syndic_error
55+66+let namespaces =
77+ ["http://purl.org/rss/1.0/"; "http://www.w3.org/1999/02/22-rdf-syntax-ns#"]
88+99+type title = string
1010+1111+let make_title ~pos (l : string list) =
1212+ let title =
1313+ match l with
1414+ | d :: _ -> d
1515+ | [] ->
1616+ raise
1717+ (Error.Error
1818+ (pos, "The content of <title> MUST be a non-empty string"))
1919+ in
2020+ `Title title
2121+2222+let title_of_xml, title_of_xml' =
2323+ let leaf_producer ~xmlbase:_ _pos data = data in
2424+ ( generate_catcher ~namespaces ~leaf_producer make_title
2525+ , generate_catcher ~namespaces ~leaf_producer (fun ~pos:_ x -> `Title x) )
2626+2727+type name = string
2828+2929+let make_name ~pos (l : string list) =
3030+ let name =
3131+ match l with
3232+ | d :: _ -> d
3333+ | [] ->
3434+ raise
3535+ (Error.Error (pos, "The content of <name> MUST be a non-empty string"))
3636+ in
3737+ `Name name
3838+3939+let name_of_xml, name_of_xml' =
4040+ let leaf_producer ~xmlbase:_ _pos data = data in
4141+ ( generate_catcher ~namespaces ~leaf_producer make_name
4242+ , generate_catcher ~namespaces ~leaf_producer (fun ~pos:_ x -> `Name x) )
4343+4444+type description = string
4545+4646+let make_description ~pos (l : string list) =
4747+ let description =
4848+ match l with
4949+ | s :: _ -> s
5050+ | [] ->
5151+ raise
5252+ (Error.Error
5353+ (pos, "The content of <description> MUST be a non-empty string"))
5454+ in
5555+ `Description description
5656+5757+let description_of_xml, description_of_xml' =
5858+ let leaf_producer ~xmlbase:_ _pos data = data in
5959+ ( generate_catcher ~namespaces ~leaf_producer make_description
6060+ , generate_catcher ~namespaces ~leaf_producer (fun ~pos:_ x -> `Description x)
6161+ )
6262+6363+type channel_image = Uri.t
6464+type channel_image' = [`URI of Uri.t option * string]
6565+6666+let make_channel_image ~pos (l : [< channel_image'] list) =
6767+ let image =
6868+ match find (function `URI _ -> true) l with
6969+ | Some (`URI (xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u)
7070+ | _ ->
7171+ raise
7272+ (Error.Error
7373+ (pos, "The content of <image> MUST be a non-empty string"))
7474+ in
7575+ `Image image
7676+7777+let channel_image_of_xml, channel_image_of_xml' =
7878+ let attr_producer = [("resource", fun ~xmlbase a -> `URI (xmlbase, a))] in
7979+ ( generate_catcher ~namespaces ~attr_producer make_channel_image
8080+ , generate_catcher ~namespaces ~attr_producer (fun ~pos:_ x -> `Image x) )
8181+8282+type link = Uri.t
8383+type link' = [`URI of Uri.t option * string]
8484+8585+let make_link ~pos (l : [< link'] list) =
8686+ let link =
8787+ match find (function `URI _ -> true) l with
8888+ | Some (`URI (xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u)
8989+ | _ ->
9090+ raise
9191+ (Error.Error (pos, "The content of <link> MUST be a non-empty string"))
9292+ in
9393+ `Link link
9494+9595+let link_of_xml, link_of_xml' =
9696+ let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in
9797+ ( generate_catcher ~namespaces ~leaf_producer make_link
9898+ , generate_catcher ~namespaces ~leaf_producer (fun ~pos:_ x -> `Link x) )
9999+100100+type url = Uri.t
101101+type url' = [`URI of Uri.t option * string]
102102+103103+let make_url ~pos (l : [< url'] list) =
104104+ let url =
105105+ match find (function `URI _ -> true) l with
106106+ | Some (`URI (xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u)
107107+ | _ ->
108108+ raise
109109+ (Error.Error (pos, "The content of <url> MUST be a non-empty string"))
110110+ in
111111+ `URL url
112112+113113+let url_of_xml, url_of_xml' =
114114+ let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in
115115+ ( generate_catcher ~namespaces ~leaf_producer make_url
116116+ , generate_catcher ~namespaces ~leaf_producer (fun ~pos:_ x -> `URL x) )
117117+118118+type li = Uri.t
119119+type li' = [`URI of Uri.t option * string]
120120+121121+let make_li ~pos (l : [< li'] list) =
122122+ let url =
123123+ match find (function `URI _ -> true) l with
124124+ | Some (`URI (xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u)
125125+ | _ ->
126126+ raise
127127+ (Error.Error (pos, "Li elements MUST have a 'resource' attribute"))
128128+ in
129129+ `Li url
130130+131131+let li_of_xml, li_of_xml' =
132132+ let attr_producer = [("resource", fun ~xmlbase a -> `URI (xmlbase, a))] in
133133+ ( generate_catcher ~namespaces ~attr_producer make_li
134134+ , generate_catcher ~namespaces ~attr_producer (fun ~pos:_ x -> `Li x) )
135135+136136+type seq = li list
137137+type seq' = [`Li of li]
138138+139139+let make_seq ~pos:_ (l : [< seq'] list) =
140140+ let li = List.map (function `Li u -> u) l in
141141+ `Seq li
142142+143143+let seq_of_xml =
144144+ let data_producer = [("li", li_of_xml)] in
145145+ generate_catcher ~namespaces ~data_producer make_seq
146146+147147+let seq_of_xml' =
148148+ let data_producer = [("li", li_of_xml')] in
149149+ generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Seq x)
150150+151151+type items = seq
152152+type items' = [`Seq of seq]
153153+154154+let make_items ~pos (l : [< items'] list) =
155155+ let li =
156156+ match find (function `Seq _ -> true) l with
157157+ | Some (`Seq l) -> l
158158+ | _ ->
159159+ raise
160160+ (Error.Error
161161+ ( pos
162162+ , "<items> elements MUST contains exactly one <rdf:Seq> element"
163163+ ))
164164+ in
165165+ `Items li
166166+167167+let items_of_xml =
168168+ let data_producer = [("Seq", seq_of_xml)] in
169169+ generate_catcher ~namespaces ~data_producer make_items
170170+171171+let items_of_xml' =
172172+ let data_producer = [("Seq", seq_of_xml')] in
173173+ generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Items x)
174174+175175+type channel_textinput = Uri.t
176176+type channel_textinput' = [`URI of Uri.t option * string]
177177+178178+let make_textinput ~pos (l : [< channel_textinput'] list) =
179179+ let url =
180180+ match find (function `URI _ -> true) l with
181181+ | Some (`URI (xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u)
182182+ | _ ->
183183+ raise
184184+ (Error.Error
185185+ (pos, "Textinput elements MUST have a 'resource' attribute"))
186186+ in
187187+ `TextInput url
188188+189189+let channel_textinput_of_xml, channel_textinput_of_xml' =
190190+ let attr_producer = [("resource", fun ~xmlbase a -> `URI (xmlbase, a))] in
191191+ ( generate_catcher ~namespaces ~attr_producer make_textinput
192192+ , generate_catcher ~namespaces ~attr_producer (fun ~pos:_ x -> `TextInput x)
193193+ )
194194+195195+type channel =
196196+ { about: Uri.t
197197+ ; (* must be uniq *)
198198+ title: title
199199+ ; link: link
200200+ ; description: description
201201+ ; image: channel_image option
202202+ ; items: items
203203+ ; textinput: channel_textinput option }
204204+205205+type channel' =
206206+ [ `Title of title
207207+ | `Link of link
208208+ | `Description of description
209209+ | `Image of channel_image
210210+ | `Items of items
211211+ | `TextInput of channel_textinput
212212+ | `About of Uri.t ]
213213+214214+let make_channel ~pos (l : [< channel'] list) =
215215+ let about =
216216+ match find (function `About _ -> true | _ -> false) l with
217217+ | Some (`About u) -> u
218218+ | _ ->
219219+ raise
220220+ (Error.Error (pos, "Channel elements MUST have a 'about' attribute"))
221221+ in
222222+ let title =
223223+ match find (function `Title _ -> true | _ -> false) l with
224224+ | Some (`Title s) -> s
225225+ | _ ->
226226+ raise
227227+ (Error.Error
228228+ ( pos
229229+ , "<channel> elements MUST contains exactly one <title> element"
230230+ ))
231231+ in
232232+ let link =
233233+ match find (function `Link _ -> true | _ -> false) l with
234234+ | Some (`Link u) -> u
235235+ | _ ->
236236+ raise
237237+ (Error.Error
238238+ ( pos
239239+ , "<channel> elements MUST contains exactly one <link> element" ))
240240+ in
241241+ let description =
242242+ match find (function `Description _ -> true | _ -> false) l with
243243+ | Some (`Description s) -> s
244244+ | _ ->
245245+ raise
246246+ (Error.Error
247247+ ( pos
248248+ , "<channel> elements MUST contains exactly one <description> \
249249+ element" ))
250250+ in
251251+ let image =
252252+ match find (function `Image _ -> true | _ -> false) l with
253253+ | Some (`Image i) -> Some i
254254+ | _ -> None
255255+ in
256256+ let items =
257257+ match find (function `Items _ -> true | _ -> false) l with
258258+ | Some (`Items l) -> l
259259+ | _ ->
260260+ raise
261261+ (Error.Error
262262+ ( pos
263263+ , "<channel> elements MUST contains exactly one <items> element"
264264+ ))
265265+ in
266266+ let textinput =
267267+ match find (function `TextInput _ -> true | _ -> false) l with
268268+ | Some (`TextInput u) -> Some u
269269+ | _ -> None
270270+ in
271271+ `Channel
272272+ ({about; title; link; description; image; items; textinput} : channel)
273273+274274+let about_of_xml ~xmlbase a = `About (XML.resolve ~xmlbase (Uri.of_string a))
275275+let about_of_xml' ~xmlbase a = `About (xmlbase, a)
276276+277277+let channel_of_xml =
278278+ let data_producer =
279279+ [ ("title", title_of_xml); ("link", link_of_xml)
280280+ ; ("description", description_of_xml)
281281+ ; ("image", channel_image_of_xml)
282282+ ; ("items", items_of_xml)
283283+ ; ("textinput", channel_textinput_of_xml) ]
284284+ in
285285+ let attr_producer = [("about", about_of_xml)] in
286286+ generate_catcher ~namespaces ~attr_producer ~data_producer make_channel
287287+288288+let channel_of_xml' =
289289+ let data_producer =
290290+ [ ("title", title_of_xml'); ("link", link_of_xml')
291291+ ; ("description", description_of_xml')
292292+ ; ("image", channel_image_of_xml')
293293+ ; ("items", items_of_xml')
294294+ ; ("textinput", channel_textinput_of_xml') ]
295295+ in
296296+ let attr_producer = [("about", about_of_xml')] in
297297+ generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos:_ x ->
298298+ `Channel x )
299299+300300+type image = {about: Uri.t; title: title; url: url; link: link}
301301+type image' = [`Title of title | `Link of link | `URL of url | `About of Uri.t]
302302+303303+let make_image ~pos (l : [< image'] list) =
304304+ let title =
305305+ match find (function `Title _ -> true | _ -> false) l with
306306+ | Some (`Title t) -> t
307307+ | _ ->
308308+ raise
309309+ (Error.Error
310310+ (pos, "<image> elements MUST contains exactly one <title> element"))
311311+ in
312312+ let link =
313313+ match find (function `Link _ -> true | _ -> false) l with
314314+ | Some (`Link u) -> u
315315+ | _ ->
316316+ raise
317317+ (Error.Error
318318+ (pos, "<image> elements MUST contains exactly one <link> element"))
319319+ in
320320+ let url =
321321+ match find (function `URL _ -> true | _ -> false) l with
322322+ | Some (`URL u) -> u
323323+ | _ ->
324324+ raise
325325+ (Error.Error
326326+ (pos, "<image> elements MUST contains exactly one <url> element"))
327327+ in
328328+ let about =
329329+ match find (function `About _ -> true | _ -> false) l with
330330+ | Some (`About a) -> a
331331+ | _ ->
332332+ raise
333333+ (Error.Error (pos, "Image elements MUST have a 'about' attribute"))
334334+ in
335335+ `Image ({about; title; url; link} : image)
336336+337337+let image_of_xml =
338338+ let data_producer =
339339+ [("title", title_of_xml); ("link", link_of_xml); ("url", url_of_xml)]
340340+ in
341341+ let attr_producer = [("about", about_of_xml)] in
342342+ generate_catcher ~namespaces ~attr_producer ~data_producer make_image
343343+344344+let image_of_xml' =
345345+ let data_producer =
346346+ [("title", title_of_xml'); ("link", link_of_xml'); ("url", url_of_xml')]
347347+ in
348348+ let attr_producer = [("about", about_of_xml')] in
349349+ generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos:_ x ->
350350+ `Image x )
351351+352352+type item =
353353+ {about: Uri.t; title: title; link: link; description: description option}
354354+355355+type item' =
356356+ [ `Title of title
357357+ | `Link of link
358358+ | `Description of description
359359+ | `About of Uri.t ]
360360+361361+let make_item ~pos (l : [< item'] list) =
362362+ let title =
363363+ match find (function `Title _ -> true | _ -> false) l with
364364+ | Some (`Title t) -> t
365365+ | _ ->
366366+ raise
367367+ (Error.Error
368368+ (pos, "<item> elements MUST contains exactly one <title> element"))
369369+ in
370370+ let link =
371371+ match find (function `Link _ -> true | _ -> false) l with
372372+ | Some (`Link u) -> u
373373+ | _ ->
374374+ raise
375375+ (Error.Error
376376+ (pos, "<item> elements MUST contains exactly one <link> element"))
377377+ in
378378+ let description =
379379+ match find (function `Description _ -> true | _ -> false) l with
380380+ | Some (`Description d) -> Some d
381381+ | _ -> None
382382+ in
383383+ let about =
384384+ match find (function `About _ -> true | _ -> false) l with
385385+ | Some (`About u) -> u
386386+ | _ ->
387387+ raise
388388+ (Error.Error (pos, "Item elements MUST have a 'about' attribute"))
389389+ in
390390+ `Item ({about; title; link; description} : item)
391391+392392+let item_of_xml =
393393+ let data_producer =
394394+ [ ("title", title_of_xml); ("link", link_of_xml)
395395+ ; ("description", description_of_xml) ]
396396+ in
397397+ let attr_producer = [("about", about_of_xml)] in
398398+ generate_catcher ~namespaces ~attr_producer ~data_producer make_item
399399+400400+let item_of_xml' =
401401+ let data_producer =
402402+ [ ("title", title_of_xml'); ("link", link_of_xml')
403403+ ; ("description", description_of_xml') ]
404404+ in
405405+ let attr_producer = [("about", about_of_xml')] in
406406+ generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos:_ x ->
407407+ `Item x )
408408+409409+type textinput =
410410+ {about: Uri.t; title: title; description: description; name: name; link: link}
411411+412412+type textinput' =
413413+ [ `About of Uri.t
414414+ | `Title of title
415415+ | `Description of description
416416+ | `Name of name
417417+ | `Link of link ]
418418+419419+let make_textinput ~pos (l : [< textinput'] list) =
420420+ let title =
421421+ match find (function `Title _ -> true | _ -> false) l with
422422+ | Some (`Title s) -> s
423423+ | _ ->
424424+ raise
425425+ (Error.Error
426426+ ( pos
427427+ , "<textinput> elements MUST contains exactly one <title> element"
428428+ ))
429429+ in
430430+ let description =
431431+ match find (function `Description _ -> true | _ -> false) l with
432432+ | Some (`Description s) -> s
433433+ | _ ->
434434+ raise
435435+ (Error.Error
436436+ ( pos
437437+ , "<textinput> elements MUST contains exactly one <description> \
438438+ element" ))
439439+ in
440440+ let name =
441441+ match find (function `Name _ -> true | _ -> false) l with
442442+ | Some (`Name n) -> n
443443+ | _ ->
444444+ raise
445445+ (Error.Error
446446+ ( pos
447447+ , "<textinput> elements MUST contains exactly one <name> element"
448448+ ))
449449+ in
450450+ let link =
451451+ match find (function `Link _ -> true | _ -> false) l with
452452+ | Some (`Link u) -> u
453453+ | _ ->
454454+ raise
455455+ (Error.Error
456456+ ( pos
457457+ , "<textinput> elements MUST contains exactly one <link> element"
458458+ ))
459459+ in
460460+ let about =
461461+ match find (function `About _ -> true | _ -> false) l with
462462+ | Some (`About u) -> u
463463+ | _ ->
464464+ raise
465465+ (Error.Error (pos, "Textinput elements MUST have a 'about' attribute"))
466466+ in
467467+ `TextInput ({about; title; description; name; link} : textinput)
468468+469469+let textinput_of_xml =
470470+ let data_producer =
471471+ [ ("title", title_of_xml)
472472+ ; ("description", description_of_xml)
473473+ ; ("name", name_of_xml); ("link", link_of_xml) ]
474474+ in
475475+ let attr_producer = [("about", about_of_xml)] in
476476+ generate_catcher ~namespaces ~attr_producer ~data_producer make_textinput
477477+478478+let textinput_of_xml' =
479479+ let data_producer =
480480+ [ ("title", title_of_xml')
481481+ ; ("description", description_of_xml')
482482+ ; ("name", name_of_xml'); ("link", link_of_xml') ]
483483+ in
484484+ let attr_producer = [("about", about_of_xml')] in
485485+ generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos:_ x ->
486486+ `TextInput x )
487487+488488+type rdf =
489489+ { channel: channel
490490+ ; image: image option
491491+ ; item: item list
492492+ ; textinput: textinput option }
493493+494494+type rdf' =
495495+ [ `Channel of channel
496496+ | `Image of image
497497+ | `Item of item
498498+ | `TextInput of textinput ]
499499+500500+let make_rdf ~pos (l : [< rdf'] list) =
501501+ let channel =
502502+ match find (function `Channel _ -> true | _ -> false) l with
503503+ | Some (`Channel channel) -> channel
504504+ | _ ->
505505+ raise
506506+ (Error.Error
507507+ (pos, "<rdf> elements MUST contains exactly one <channel> element"))
508508+ in
509509+ let image =
510510+ match find (function `Image _ -> true | _ -> false) l with
511511+ | Some (`Image image) -> Some image
512512+ | _ -> None
513513+ in
514514+ let textinput =
515515+ match find (function `TextInput _ -> true | _ -> false) l with
516516+ | Some (`TextInput textinput) -> Some textinput
517517+ | _ -> None
518518+ in
519519+ let item =
520520+ List.fold_left (fun acc -> function `Item x -> x :: acc | _ -> acc) [] l
521521+ in
522522+ ({channel; image; item; textinput} : rdf)
523523+524524+let rdf_of_xml =
525525+ let data_producer =
526526+ [ ("channel", channel_of_xml)
527527+ ; ("image", image_of_xml); ("item", item_of_xml)
528528+ ; ("textinput", textinput_of_xml) ]
529529+ in
530530+ generate_catcher ~namespaces ~data_producer make_rdf
531531+532532+let rdf_of_xml' =
533533+ let data_producer =
534534+ [ ("channel", channel_of_xml')
535535+ ; ("image", image_of_xml'); ("item", item_of_xml')
536536+ ; ("textinput", textinput_of_xml') ]
537537+ in
538538+ generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> x)
539539+540540+let parse ?xmlbase input =
541541+ match XML.of_xmlm input |> snd with
542542+ | XML.Node (pos, tag, datas) when tag_is tag "RDF" ->
543543+ rdf_of_xml ~xmlbase (pos, tag, datas)
544544+ | _ ->
545545+ raise
546546+ (Error.Error
547547+ ((0, 0), "document MUST contains exactly one <rdf> element"))
548548+549549+let read ?xmlbase fname =
550550+ let fh = open_in fname in
551551+ try
552552+ let x = parse ?xmlbase (XML.input_of_channel fh) in
553553+ close_in fh ; x
554554+ with e -> close_in fh ; raise e
555555+556556+type uri = Uri.t option * string
557557+558558+let unsafe ?xmlbase input =
559559+ match XML.of_xmlm input |> snd with
560560+ | XML.Node (pos, tag, datas) when tag_is tag "RDF" ->
561561+ `RDF (rdf_of_xml' ~xmlbase (pos, tag, datas))
562562+ | _ -> `RDF []
+244
stack/syndic/lib/syndic_rss1.mli
···11+(** [Syndic.Rss1]: compliant with {{: http://web.resource.org/rss/1.0/spec} RSS
22+ 1.0}. *)
33+44+module Error : module type of Syndic_error
55+66+(** A descriptive title for the channel, image, item and textinput. See RSS 1.0
77+ {{: http://web.resource.org/rss/1.0/spec#s5.3.1} § 5.3.1}, {{:
88+ http://web.resource.org/rss/1.0/spec#s5.4.1} § 5.4.1}, {{:
99+ http://web.resource.org/rss/1.0/spec#s5.5.1} § 5.5.1}, and {{:
1010+ http://web.resource.org/rss/1.0/spec#s5.6.1} § 5.6.1}.
1111+1212+ {[ Syntax: <title>{title}</title> Requirement: Required for all Model:
1313+ (#PCDATA) (Suggested) Maximum Length: 40 (characters) for channel, image
1414+ and textinput (Suggested) Maximum Length: 100 for item ]} *)
1515+type title = string
1616+1717+(** The text input field's (variable) name. {{:
1818+ http://web.resource.org/rss/1.0/spec#s5.6.3} See RSS 1.0 § 5.6.3}.
1919+2020+ {[ Syntax: <name>{textinput_varname}</name> Requirement: Required if
2121+ textinput Model: (#PCDATA) (Suggested) Maximum Length: 500 ]} *)
2222+type name = string
2323+2424+(** This can be - a brief description of the channel's content, function,
2525+ source, etc. {{: http://web.resource.org/rss/1.0/spec#s5.3.3} See RSS 1.0
2626+ § 5.3.3}; - or a brief description/abstract of the item. {{:
2727+ http://web.resource.org/rss/1.0/spec#s5.5.3} See RSS 1.0 § 5.5.3}; - or a
2828+ brief description of the textinput field's purpose. For example: "Subscribe
2929+ to our newsletter for..." or "Search our site's archive of..." {{:
3030+ http://web.resource.org/rss/1.0/spec#s5.6.2} See RSS 1.0 § 5.6.2}.
3131+3232+ {[ Syntax: <description>{description}</description> Requirement: Required
3333+ only for channel and textinput Model: (#PCDATA) (Suggested) Maximum Length:
3434+ 500 for channel and item (Suggested) Maximum Length: 100 for textinput ]} *)
3535+type description = string
3636+3737+(** Establishes an RDF association between the optional image element [5.4] and
3838+ this particular RSS channel. The rdf:resource's {image_uri} must be the
3939+ same as the image element's rdf:about {image_uri}. {{:
4040+ http://web.resource.org/rss/1.0/spec#s5.3.4} See RSS 1.0 § 5.3.4}
4141+4242+ {[ Syntax: <image rdf:resource="{image_uri}" /> Requirement: Required only
4343+ if image element present Model: Empty ]} *)
4444+type channel_image = Uri.t
4545+4646+(** The URL of the image to used in the "src" attribute of the channel's image
4747+ tag when rendered as HTML. {{: http://web.resource.org/rss/1.0/spec#s5.4.2}
4848+ See RSS 1.0 § 5.4.2}
4949+5050+ {[ Syntax: <url>{image_url}</url> Requirement: Required if the image
5151+ element is present Model: (#PCDATA) (Suggested) Maximum Length: 500 ]} *)
5252+type url = Uri.t
5353+5454+(** This can be - The URL to which an HTML rendering of the channel title will
5555+ link, commonly the parent site's home or news page. {{:
5656+ http://web.resource.org/rss/1.0/spec#s5.3.2} See RSS 1.0 § 5.3.2} - Or the
5757+ URL to which an HTML rendering of the channel image will link. This, as
5858+ with the channel's title link, is commonly the parent site's home or news
5959+ page. {{: http://web.resource.org/rss/1.0/spec#s5.4.3} See RSS 1.0 §
6060+ 5.4.3} - Or the item's URL. {{:
6161+ http://web.resource.org/rss/1.0/spec#s5.5.2} See RSS 1.0 § 5.5.2} - Or the
6262+ URL to which a textinput submission will be directed (using GET). {{:
6363+ http://web.resource.org/rss/1.0/spec#s5.6.4} See RSS 1.0 § 5.6.4}
6464+6565+ {[ Syntax: <link>{link}</link> Requirement: Required for all Model:
6666+ (#PCDATA) (Suggested) Maximum Length: 500 ]} *)
6767+type link = Uri.t
6868+6969+(** An RDF table of contents, associating the document's items [5.5] with this
7070+ particular RSS channel. Each item's rdf:resource {item_uri} must be the
7171+ same as the associated item element's rdf:about {item_uri}.
7272+7373+ An RDF Seq (sequence) is used to contain all the items rather than an RDF
7474+ Bag to denote item order for rendering and reconstruction.
7575+7676+ Note that items appearing in the document but not as members of the channel
7777+ level items sequence are likely to be discarded by RDF parsers.
7878+7979+ {{: http://web.resource.org/rss/1.0/spec#s5.3.5} See RSS 1.0 § 5.3.5}
8080+8181+ {[ Syntax: <items><rdf:Seq><rdf:li resource="{item_uri}" /> ...
8282+ </rdf:Seq></items> Requirement: Required ]} *)
8383+type items = Uri.t list
8484+8585+(** Establishes an RDF association between the optional textinput element [5.6]
8686+ and this particular RSS channel. The {textinput_uri} rdf:resource must be
8787+ the same as the textinput element's rdf:about {textinput_uri}.
8888+8989+ {{: http://web.resource.org/rss/1.0/spec#s5.3.6} See RSS 1.0 § 5.3.6}
9090+9191+ {[ Syntax: <textinput rdf:resource="{textinput_uri}" /> Requirement:
9292+ Required only if texinput element present Model: Empty ]} *)
9393+type channel_textinput = Uri.t
9494+9595+(** The channel element contains metadata describing the channel itself,
9696+ including a title, brief description, and URL link to the described
9797+ resource (the channel provider's home page, for instance). The \{resource\}
9898+ URL of the channel element's rdf:about attribute must be unique with
9999+ respect to any other rdf:about attributes in the RSS document and is a URI
100100+ which identifies the channel. Most commonly, this is either the URL of the
101101+ homepage being described or a URL where the RSS file can be found.
102102+103103+ {{: http://web.resource.org/rss/1.0/spec#s5.3} See RSS 1.0 § 5.3}
104104+105105+ {[ Syntax: <channel rdf:about="{resource}"> Requirement: Required Required
106106+ Attribute(s): rdf:about Model: (title, link, description, image?, items,
107107+ textinput?) ]} *)
108108+type channel =
109109+ { about: Uri.t (** must be unique *)
110110+ ; title: title
111111+ ; link: link
112112+ ; description: description
113113+ ; image: channel_image option
114114+ ; items: items
115115+ ; textinput: channel_textinput option }
116116+117117+(** An image to be associated with an HTML rendering of the channel. This image
118118+ should be of a format supported by the majority of Web browsers. While the
119119+ later 0.91 specification allowed for a width of 1–144 and height of
120120+ 1–400, convention (and the 0.9 specification) dictate 88×31.
121121+122122+ {{: http://web.resource.org/rss/1.0/spec#s5.4} See RSS 1.0 § 5.4}
123123+124124+ {[ Syntax: <image rdf:about="{image_uri}"> Requirement: Optional; if
125125+ present, must also be present in channel element [5.3.4] Required
126126+ Attribute(s): rdf:about Model: (title, url, link) ]} *)
127127+type image = {about: Uri.t; title: title; url: url; link: link}
128128+129129+(** While commonly a news headline, with RSS 1.0's modular extensibility, this
130130+ can be just about anything: discussion posting, job listing, software patch
131131+ -- any object with a URI. There may be a minimum of one item per RSS
132132+ document. While RSS 1.0 does not enforce an upper limit, for backward
133133+ compatibility with RSS 0.9 and 0.91, a maximum of fifteen items is
134134+ recommended.
135135+136136+ [about] must be unique with respect to any other rdf:about attributes in
137137+ the RSS document and is a URI which identifies the item. The value of
138138+ [about] should be identical to the value of the [link], if possible.
139139+140140+ {{: http://web.resource.org/rss/1.0/spec#s5.5} See RSS 1.0 § 5.5}
141141+142142+ {[ Syntax: <item rdf:about="{item_uri}"> Requirement: >= 1 Recommendation
143143+ (for backward compatibility with 0.9x): 1-15 Required Attribute(s):
144144+ rdf:about Model: (title, link, description?) ]} *)
145145+type item =
146146+ {about: Uri.t; title: title; link: link; description: description option}
147147+148148+(** The textinput element affords a method for submitting form data to an
149149+ arbitrary URL — usually located at the parent website. The form processor
150150+ at the receiving end only is assumed to handle the HTTP GET method.
151151+152152+ The field is typically used as a search box or subscription form — among
153153+ others. While this is of some use when RSS documents are rendered as
154154+ channels (see MNN) and accompanied by human readable title and description,
155155+ the ambiguity in automatic determination of meaning of this overloaded
156156+ element renders it otherwise not particularly useful. RSS 1.0 therefore
157157+ suggests either deprecation or augmentation with some form of resource
158158+ discovery of this element in future versions while maintaining it for
159159+ backward compatiblity with RSS 0.9.
160160+161161+ [about] must be unique with respect to any other rdf:about attributes in
162162+ the RSS document and is a URI which identifies the textinput. [about]
163163+ should be identical to the value of the [link], if possible.
164164+165165+ {{: http://web.resource.org/rss/1.0/spec#s5.6} See RSS 1.0 § 5.6 }
166166+167167+ {[ Syntax: <textinput rdf:about="{textinput_uri}"> Requirement: Optional;
168168+ if present, must also be present in channel element [5.3.6] Required
169169+ Attribute(s): rdf:about Model: (title, description, name, link) ]} *)
170170+type textinput =
171171+ {about: Uri.t; title: title; description: description; name: name; link: link}
172172+173173+(** The outermost level in every RSS 1.0 compliant document is the RDF element.
174174+ The opening RDF tag assocaties the rdf: namespace prefix with the RDF
175175+ syntax schema and establishes the RSS 1.0 schema as the default namespace
176176+ for the document.
177177+178178+ While any valid namespace prefix may be used, document creators are advised
179179+ to consider "rdf:" normative. Those wishing to be strictly
180180+ backward-compatible with RSS 0.9 must use "rdf:".
181181+182182+ {{: http://web.resource.org/rss/1.0/spec#s5.2} See RSS 1.0 § 5.2}
183183+184184+ {[ Syntax: <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
185185+ xmlns="http://purl.org/rss/1.0/"> Requirement: Required exactly as shown,
186186+ aside from any additional namespace declarations Model: (channel, image?,
187187+ item+, textinput?) ]} *)
188188+type rdf =
189189+ { channel: channel
190190+ ; image: image option
191191+ ; item: item list
192192+ ; textinput: textinput option }
193193+194194+val parse : ?xmlbase:Uri.t -> Xmlm.input -> rdf
195195+(** [parse xml] returns the RDF corresponding to [xml].
196196+197197+ @raise Error.raise_expectation if [xml] is not a valid RSS1 document.
198198+199199+ @param xmlbase the base URI against which relative URIs in the XML RSS1
200200+ document are resolved. It is superseded by xml:base present in the document
201201+ (if any). *)
202202+203203+val read : ?xmlbase:Uri.t -> string -> rdf
204204+(** [read fname] reads the file name [fname] and parses it. For the optional
205205+ parameters, see {!parse}. *)
206206+207207+(**/**)
208208+209209+(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not [None],
210210+ gives the base URI against which [uri] must be resolved if it is relative. *)
211211+type uri = Uri.t option * string
212212+213213+val unsafe :
214214+ ?xmlbase:Uri.t
215215+ -> Xmlm.input
216216+ -> [> `RDF of [> `Channel of [> `About of uri
217217+ | `Description of string list
218218+ | `Image of [> `URI of uri] list
219219+ | `Items of [> `Seq of [> `Li of [> `URI of uri]
220220+ list ]
221221+ list ]
222222+ list
223223+ | `Link of [> `URI of uri] list
224224+ | `TextInput of [> `URI of uri] list
225225+ | `Title of string list ]
226226+ list
227227+ | `Image of [> `About of uri
228228+ | `Link of [> `URI of uri] list
229229+ | `Title of string list
230230+ | `URL of [> `URI of uri] list ]
231231+ list
232232+ | `Item of [> `About of uri
233233+ | `Description of string list
234234+ | `Link of [> `URI of uri] list
235235+ | `Title of string list ]
236236+ list
237237+ | `TextInput of [> `About of uri
238238+ | `Description of string list
239239+ | `Link of [> `URI of uri] list
240240+ | `Name of string list
241241+ | `Title of string list ]
242242+ list ]
243243+ list ]
244244+(** Analysis without verification, enjoy ! *)
+1271
stack/syndic/lib/syndic_rss2.ml
···11+open Syndic_common.XML
22+open Syndic_common.Util
33+module XML = Syndic_xml
44+module Atom = Syndic_atom
55+module Date = Syndic_date
66+module Error = Syndic_error
77+88+type image =
99+ { url: Uri.t
1010+ ; title: string
1111+ ; link: Uri.t
1212+ ; width: int
1313+ ; (* default 88 *)
1414+ height: int
1515+ ; (* default 31 *)
1616+ description: string option }
1717+1818+type image' =
1919+ [ `URL of Uri.t
2020+ | `Title of string
2121+ | `Link of Uri.t
2222+ | `Width of int
2323+ | `Height of int
2424+ | `Description of string ]
2525+2626+let make_image ~pos (l : [< image'] list) =
2727+ let url =
2828+ match find (function `URL _ -> true | _ -> false) l with
2929+ | Some (`URL u) -> u
3030+ | _ ->
3131+ raise
3232+ (Error.Error
3333+ (pos, "<image> elements MUST contains exactly one <url> element"))
3434+ in
3535+ let title =
3636+ match find (function `Title _ -> true | _ -> false) l with
3737+ | Some (`Title t) -> t
3838+ | _ ->
3939+ raise
4040+ (Error.Error
4141+ (pos, "<image> elements MUST contains exactly one <title> element"))
4242+ in
4343+ let link =
4444+ match find (function `Link _ -> true | _ -> false) l with
4545+ | Some (`Link l) -> l
4646+ | _ ->
4747+ raise
4848+ (Error.Error
4949+ (pos, "<image> elements MUST contains exactly one <link> element"))
5050+ in
5151+ let width =
5252+ match find (function `Width _ -> true | _ -> false) l with
5353+ | Some (`Width w) -> w
5454+ | _ -> 88
5555+ (* cf. RFC *)
5656+ in
5757+ let height =
5858+ match find (function `Height _ -> true | _ -> false) l with
5959+ | Some (`Height h) -> h
6060+ | _ -> 31
6161+ (* cf. RFC *)
6262+ in
6363+ let description =
6464+ match find (function `Description _ -> true | _ -> false) l with
6565+ | Some (`Description s) -> Some s
6666+ | _ -> None
6767+ in
6868+ `Image ({url; title; link; width; height; description} : image)
6969+7070+let url_of_xml ~xmlbase a = `URL (XML.resolve ~xmlbase (Uri.of_string a))
7171+let url_of_xml' ~xmlbase a = `URL (xmlbase, a)
7272+7373+let image_url_of_xml ~xmlbase (pos, _tag, datas) =
7474+ try url_of_xml ~xmlbase (get_leaf datas) with Not_found ->
7575+ raise
7676+ (Error.Error (pos, "The content of <uri> MUST be a non-empty string"))
7777+7878+let image_title_of_xml ~xmlbase:_ (_pos, _tag, datas) =
7979+ `Title (try get_leaf datas with Not_found -> "")
8080+8181+let image_link_of_xml ~xmlbase (pos, _tag, datas) =
8282+ try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
8383+ with Not_found ->
8484+ raise
8585+ (Error.Error (pos, "The content of <link> MUST be a non-empty string"))
8686+8787+let image_size_of_xml ~max ~xmlbase:_ (pos, tag, datas) =
8888+ try
8989+ let size = int_of_string (get_leaf datas) in
9090+ if size > max then
9191+ raise
9292+ (Error.Error
9393+ ( pos
9494+ , "size of "
9595+ ^ get_tag_name tag
9696+ ^ " exceeded (max is "
9797+ ^ string_of_int max
9898+ ^ ")" ))
9999+ else size
100100+ with
101101+ | Not_found ->
102102+ raise
103103+ (Error.Error
104104+ ( pos
105105+ , "The content of <"
106106+ ^ get_tag_name tag
107107+ ^ "> MUST be a non-empty string" ))
108108+ | Failure _ ->
109109+ raise
110110+ (Error.Error
111111+ (pos, "The content of <" ^ get_tag_name tag ^ "> MUST be an integer"))
112112+113113+let image_width_of_xml ~xmlbase a =
114114+ `Width (image_size_of_xml ~max:144 ~xmlbase a)
115115+116116+let image_height_of_xml ~xmlbase a =
117117+ `Height (image_size_of_xml ~max:400 ~xmlbase a)
118118+119119+let image_description_of_xml ~xmlbase:_ (pos, _tag, datas) =
120120+ try `Description (get_leaf datas) with Not_found ->
121121+ raise
122122+ (Error.Error
123123+ (pos, "The content of <description> MUST be a non-empty string"))
124124+125125+let image_of_xml =
126126+ let data_producer =
127127+ [ ("url", image_url_of_xml)
128128+ ; ("title", image_title_of_xml)
129129+ ; ("link", image_link_of_xml)
130130+ ; ("width", image_width_of_xml)
131131+ ; ("height", image_height_of_xml)
132132+ ; ("description", image_description_of_xml) ]
133133+ in
134134+ generate_catcher ~data_producer make_image
135135+136136+let image_of_xml' =
137137+ let data_producer =
138138+ [ ("url", dummy_of_xml ~ctor:url_of_xml')
139139+ ; ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a))
140140+ ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a)))
141141+ ; ("width", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Width a))
142142+ ; ("height", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Height a))
143143+ ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a))
144144+ ]
145145+ in
146146+ generate_catcher ~data_producer (fun ~pos:_ x -> `Image x)
147147+148148+type cloud = {uri: Uri.t; registerProcedure: string; protocol: string}
149149+150150+type cloud' =
151151+ [ `Domain of string
152152+ | `Port of string
153153+ | `Path of string
154154+ | `RegisterProcedure of string
155155+ | `Protocol of string ]
156156+157157+let make_cloud ~pos (l : [< cloud'] list) =
158158+ let domain =
159159+ match find (function `Domain _ -> true | _ -> false) l with
160160+ | Some (`Domain u) -> u
161161+ | _ ->
162162+ raise
163163+ (Error.Error (pos, "Cloud elements MUST have a 'domain' attribute"))
164164+ in
165165+ let port =
166166+ match find (function `Port _ -> true | _ -> false) l with
167167+ | Some (`Port p) -> int_of_string p
168168+ | _ ->
169169+ raise
170170+ (Error.Error (pos, "Cloud elements MUST have a 'port' attribute"))
171171+ in
172172+ let path =
173173+ match find (function `Path _ -> true | _ -> false) l with
174174+ | Some (`Path p) -> p
175175+ | _ ->
176176+ raise
177177+ (Error.Error (pos, "Cloud elements MUST have a 'path' attribute"))
178178+ in
179179+ let registerProcedure =
180180+ match find (function `RegisterProcedure _ -> true | _ -> false) l with
181181+ | Some (`RegisterProcedure r) -> r
182182+ | _ ->
183183+ raise
184184+ (Error.Error
185185+ (pos, "Cloud elements MUST have a 'registerProcedure' attribute"))
186186+ in
187187+ let protocol =
188188+ match find (function `Protocol _ -> true | _ -> false) l with
189189+ | Some (`Protocol p) -> p
190190+ | _ ->
191191+ raise
192192+ (Error.Error (pos, "Cloud elements MUST have a 'protocol' attribute"))
193193+ in
194194+ let uri = Uri.make ~host:domain ~port ~path () in
195195+ `Cloud ({uri; registerProcedure; protocol} : cloud)
196196+197197+let cloud_attr_producer =
198198+ [ ("domain", fun ~xmlbase:_ a -> `Domain a)
199199+ ; ("port", fun ~xmlbase:_ a -> `Port a)
200200+ ; ("path", fun ~xmlbase:_ a -> `Path a)
201201+ ; (* XXX: it's RFC compliant ? *)
202202+ ("registerProcedure", fun ~xmlbase:_ a -> `RegisterProcedure a)
203203+ ; ("protocol", fun ~xmlbase:_ a -> `Protocol a) ]
204204+205205+let cloud_of_xml =
206206+ generate_catcher ~attr_producer:cloud_attr_producer make_cloud
207207+208208+let cloud_of_xml' =
209209+ generate_catcher ~attr_producer:cloud_attr_producer (fun ~pos:_ x -> `Cloud x)
210210+211211+type textinput = {title: string; description: string; name: string; link: Uri.t}
212212+213213+type textinput' =
214214+ [`Title of string | `Description of string | `Name of string | `Link of Uri.t]
215215+216216+let make_textinput ~pos (l : [< textinput'] list) =
217217+ let title =
218218+ match find (function `Title _ -> true | _ -> false) l with
219219+ | Some (`Title t) -> t
220220+ | _ ->
221221+ raise
222222+ (Error.Error
223223+ ( pos
224224+ , "<textinput> elements MUST contains exactly one <title> element"
225225+ ))
226226+ in
227227+ let description =
228228+ match find (function `Description _ -> true | _ -> false) l with
229229+ | Some (`Description s) -> s
230230+ | _ ->
231231+ raise
232232+ (Error.Error
233233+ ( pos
234234+ , "<textinput> elements MUST contains exactly one <description> \
235235+ element" ))
236236+ in
237237+ let name =
238238+ match find (function `Name _ -> true | _ -> false) l with
239239+ | Some (`Name s) -> s
240240+ | _ ->
241241+ raise
242242+ (Error.Error
243243+ ( pos
244244+ , "<textinput> elements MUST contains exactly one <name> element"
245245+ ))
246246+ in
247247+ let link =
248248+ match find (function `Link _ -> true | _ -> false) l with
249249+ | Some (`Link u) -> u
250250+ | _ ->
251251+ raise
252252+ (Error.Error
253253+ ( pos
254254+ , "<textinput> elements MUST contains exactly one <link> element"
255255+ ))
256256+ in
257257+ `TextInput ({title; description; name; link} : textinput)
258258+259259+let textinput_title_of_xml ~xmlbase:_ (pos, _tag, datas) =
260260+ try `Title (get_leaf datas) with Not_found ->
261261+ raise
262262+ (Error.Error (pos, "The content of <title> MUST be a non-empty string"))
263263+264264+let textinput_description_of_xml ~xmlbase:_ (pos, _tag, datas) =
265265+ try `Description (get_leaf datas) with Not_found ->
266266+ raise
267267+ (Error.Error
268268+ (pos, "The content of <description> MUST be a non-empty string"))
269269+270270+let textinput_name_of_xml ~xmlbase:_ (pos, _tag, datas) =
271271+ try `Name (get_leaf datas) with Not_found ->
272272+ raise
273273+ (Error.Error (pos, "The content of <name> MUST be a non-empty string"))
274274+275275+let textinput_link_of_xml ~xmlbase (pos, _tag, datas) =
276276+ try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
277277+ with Not_found ->
278278+ raise
279279+ (Error.Error (pos, "The content of <link> MUST be a non-empty string"))
280280+281281+let textinput_of_xml =
282282+ let data_producer =
283283+ [ ("title", textinput_title_of_xml)
284284+ ; ("description", textinput_description_of_xml)
285285+ ; ("name", textinput_name_of_xml)
286286+ ; ("link", textinput_link_of_xml) ]
287287+ in
288288+ generate_catcher ~data_producer make_textinput
289289+290290+let textinput_of_xml' =
291291+ let data_producer =
292292+ [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a))
293293+ ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a))
294294+ ; ("name", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Name a))
295295+ ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a))) ]
296296+ in
297297+ generate_catcher ~data_producer (fun ~pos:_ x -> `TextInput x)
298298+299299+type category = {data: string; domain: Uri.t option}
300300+type category' = [`Data of string | `Domain of Uri.t]
301301+302302+let make_category ~pos:_ (l : [< category'] list) =
303303+ let data =
304304+ match find (function `Data _ -> true | _ -> false) l with
305305+ | Some (`Data s) -> s
306306+ | _ -> ""
307307+ in
308308+ let domain =
309309+ match find (function `Domain _ -> true | _ -> false) l with
310310+ | Some (`Domain d) -> Some d
311311+ | _ -> None
312312+ in
313313+ `Category ({data; domain} : category)
314314+315315+let category_of_xml =
316316+ let attr_producer =
317317+ [("domain", fun ~xmlbase:_ a -> `Domain (Uri.of_string a))]
318318+ in
319319+ let leaf_producer ~xmlbase:_ _pos data = `Data data in
320320+ generate_catcher ~attr_producer ~leaf_producer make_category
321321+322322+let category_of_xml' =
323323+ let attr_producer = [("domain", fun ~xmlbase:_ a -> `Domain a)] in
324324+ let leaf_producer ~xmlbase:_ _pos data = `Data data in
325325+ generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Category x)
326326+327327+type enclosure = {url: Uri.t; length: int; mime: string}
328328+type enclosure' = [`URL of Uri.t | `Length of string | `Mime of string]
329329+330330+let make_enclosure ~pos (l : [< enclosure'] list) =
331331+ let url =
332332+ match find (function `URL _ -> true | _ -> false) l with
333333+ | Some (`URL u) -> u
334334+ | _ ->
335335+ raise
336336+ (Error.Error (pos, "Enclosure elements MUST have a 'url' attribute"))
337337+ in
338338+ let length =
339339+ match find (function `Length _ -> true | _ -> false) l with
340340+ | Some (`Length l) -> int_of_string l
341341+ | _ ->
342342+ raise
343343+ (Error.Error
344344+ (pos, "Enclosure elements MUST have a 'length' attribute"))
345345+ in
346346+ let mime =
347347+ match find (function `Mime _ -> true | _ -> false) l with
348348+ | Some (`Mime m) -> m
349349+ | _ ->
350350+ raise
351351+ (Error.Error (pos, "Enclosure elements MUST have a 'type' attribute"))
352352+ in
353353+ `Enclosure ({url; length; mime} : enclosure)
354354+355355+let enclosure_of_xml =
356356+ let attr_producer =
357357+ [ ("url", url_of_xml)
358358+ ; ("length", fun ~xmlbase:_ a -> `Length a)
359359+ ; ("type", fun ~xmlbase:_ a -> `Mime a) ]
360360+ in
361361+ generate_catcher ~attr_producer make_enclosure
362362+363363+let enclosure_of_xml' =
364364+ let attr_producer =
365365+ [ ("url", url_of_xml')
366366+ ; ("length", fun ~xmlbase:_ a -> `Length a)
367367+ ; ("type", fun ~xmlbase:_ a -> `Mime a) ]
368368+ in
369369+ generate_catcher ~attr_producer (fun ~pos:_ x -> `Enclosure x)
370370+371371+type guid = {data: Uri.t; (* must be uniq *) permalink: bool (* default true *)}
372372+type guid' = [`Data of Uri.t option * string | `Permalink of string]
373373+374374+(* Some RSS2 server output <guid isPermaLink="false"></guid> ! *)
375375+let make_guid ~pos:_ (l : [< guid'] list) =
376376+ let permalink =
377377+ match find (function `Permalink _ -> true | _ -> false) l with
378378+ | Some (`Permalink b) -> bool_of_string b
379379+ | _ -> true
380380+ (* cf. RFC *)
381381+ in
382382+ match find (function `Data _ -> true | _ -> false) l with
383383+ | Some (`Data (xmlbase, u)) ->
384384+ if u = "" then `Guid None
385385+ else
386386+ (* When the GUID is declared as a permlink, resolve it using xml:base *)
387387+ let data =
388388+ if permalink then XML.resolve ~xmlbase (Uri.of_string u)
389389+ else Uri.of_string u
390390+ in
391391+ `Guid (Some ({data; permalink} : guid))
392392+ | _ -> `Guid None
393393+394394+let guid_of_xml, guid_of_xml' =
395395+ let attr_producer = [("isPermaLink", fun ~xmlbase:_ a -> `Permalink a)] in
396396+ let leaf_producer ~xmlbase _pos data = `Data (xmlbase, data) in
397397+ ( generate_catcher ~attr_producer ~leaf_producer make_guid
398398+ , generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Guid x) )
399399+400400+type source = {data: string; url: Uri.t}
401401+type source' = [`Data of string | `URL of Uri.t]
402402+403403+let make_source ~pos (l : [< source'] list) =
404404+ let data =
405405+ match find (function `Data _ -> true | _ -> false) l with
406406+ | Some (`Data s) -> s
407407+ | _ ->
408408+ raise
409409+ (Error.Error
410410+ (pos, "The content of <source> MUST be a non-empty string"))
411411+ in
412412+ let url =
413413+ match find (function `URL _ -> true | _ -> false) l with
414414+ | Some (`URL u) -> u
415415+ | _ ->
416416+ raise
417417+ (Error.Error (pos, "Source elements MUST have a 'url' attribute"))
418418+ in
419419+ `Source ({data; url} : source)
420420+421421+let source_of_xml =
422422+ let attr_producer = [("url", url_of_xml)] in
423423+ let leaf_producer ~xmlbase:_ _pos data = `Data data in
424424+ generate_catcher ~attr_producer ~leaf_producer make_source
425425+426426+let source_of_xml' =
427427+ let attr_producer = [("url", url_of_xml')] in
428428+ let leaf_producer ~xmlbase:_ _pos data = `Data data in
429429+ generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Source x)
430430+431431+type story =
432432+ | All of string * Uri.t option * string
433433+ | Title of string
434434+ | Description of Uri.t option * string
435435+436436+type item =
437437+ { story: story
438438+ ; content: Uri.t option * string
439439+ ; link: Uri.t option
440440+ ; author: string option
441441+ ; (* e-mail *)
442442+ categories: category list
443443+ ; comments: Uri.t option
444444+ ; enclosure: enclosure option
445445+ ; guid: guid option
446446+ ; pubDate: Date.t option
447447+ ; (* date *)
448448+ source: source option }
449449+450450+[@@@warning "-34"]
451451+452452+type item' =
453453+ [ `Title of string
454454+ | `Description of Uri.t option * string (* xmlbase, description *)
455455+ | `Content of Uri.t option * string
456456+ | `Link of Uri.t
457457+ | `Author of string (* e-mail *)
458458+ | `Category of category
459459+ | `Comments of Uri.t
460460+ | `Enclosure of enclosure
461461+ | `Guid of guid
462462+ | `PubDate of Date.t
463463+ | `Source of source ]
464464+465465+let make_item ~pos (l : _ list) =
466466+ let story =
467467+ match
468468+ ( find (function `Title _ -> true | _ -> false) l
469469+ , find (function `Description _ -> true | _ -> false) l )
470470+ with
471471+ | Some (`Title t), Some (`Description (x, d)) -> All (t, x, d)
472472+ | Some (`Title t), _ -> Title t
473473+ | _, Some (`Description (x, d)) -> Description (x, d)
474474+ | _, _ ->
475475+ raise (Error.Error (pos, "Item expected <title> or <description> tag"))
476476+ in
477477+ let content =
478478+ match find (function `Content _ -> true | _ -> false) l with
479479+ | Some (`Content (x, c)) -> (x, c)
480480+ | _ -> (None, "")
481481+ in
482482+ let link =
483483+ match find (function `Link _ -> true | _ -> false) l with
484484+ | Some (`Link l) -> l
485485+ | _ -> None
486486+ in
487487+ let author =
488488+ match find (function `Author _ -> true | _ -> false) l with
489489+ | Some (`Author a) -> Some a
490490+ | _ -> None
491491+ in
492492+ let categories =
493493+ let fn = fun acc -> function `Category x -> x :: acc | _ -> acc in
494494+ List.fold_left fn [] l |> List.rev
495495+ in
496496+ let comments =
497497+ match find (function `Comments _ -> true | _ -> false) l with
498498+ | Some (`Comments c) -> Some c
499499+ | _ -> None
500500+ in
501501+ let enclosure =
502502+ match find (function `Enclosure _ -> true | _ -> false) l with
503503+ | Some (`Enclosure e) -> Some e
504504+ | _ -> None
505505+ in
506506+ let guid =
507507+ match find (function `Guid _ -> true | _ -> false) l with
508508+ | Some (`Guid g) -> g
509509+ | _ -> None
510510+ in
511511+ let pubDate =
512512+ match find (function `PubDate _ -> true | _ -> false) l with
513513+ | Some (`PubDate p) -> Some p
514514+ | _ -> None
515515+ in
516516+ let source =
517517+ match find (function `Source _ -> true | _ -> false) l with
518518+ | Some (`Source s) -> Some s
519519+ | _ -> None
520520+ in
521521+ `Item
522522+ ( { story
523523+ ; content
524524+ ; link
525525+ ; author
526526+ ; categories
527527+ ; comments
528528+ ; enclosure
529529+ ; guid
530530+ ; pubDate
531531+ ; source }
532532+ : item )
533533+534534+let item_title_of_xml ~xmlbase:_ (pos, _tag, datas) =
535535+ try `Title (get_leaf datas) with Not_found ->
536536+ raise
537537+ (Error.Error (pos, "The content of <title> MUST be a non-empty string"))
538538+539539+let item_description_of_xml ~xmlbase (_pos, _tag, datas) =
540540+ `Description (xmlbase, try get_leaf datas with Not_found -> "")
541541+542542+let item_content_of_xml ~xmlbase (_pos, _tag, datas) =
543543+ `Content (xmlbase, try get_leaf datas with Not_found -> "")
544544+545545+let item_link_of_xml ~xmlbase (_pos, _tag, datas) =
546546+ `Link
547547+ ( try Some (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
548548+ with Not_found -> None )
549549+550550+let item_author_of_xml ~xmlbase:_ (pos, _tag, datas) =
551551+ try `Author (get_leaf datas) with Not_found ->
552552+ raise
553553+ (Error.Error (pos, "The content of <author> MUST be a non-empty string"))
554554+555555+let item_comments_of_xml ~xmlbase (pos, _tag, datas) =
556556+ try `Comments (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
557557+ with Not_found ->
558558+ raise
559559+ (Error.Error (pos, "The content of <comments> MUST be a non-empty string"))
560560+561561+let item_pubdate_of_xml ~xmlbase:_ (pos, _tag, datas) =
562562+ try `PubDate (Date.of_rfc822 (get_leaf datas)) with Not_found ->
563563+ raise
564564+ (Error.Error (pos, "The content of <pubDate> MUST be a non-empty string"))
565565+566566+let item_namespaces = [""; "http://purl.org/rss/1.0/modules/content/"]
567567+568568+let item_of_xml =
569569+ let data_producer =
570570+ [ ("title", item_title_of_xml)
571571+ ; ("description", item_description_of_xml)
572572+ ; (* <content:encoded> where
573573+ xmlns:content="http://purl.org/rss/1.0/modules/content/" *)
574574+ ("encoded", item_content_of_xml)
575575+ ; ("link", item_link_of_xml)
576576+ ; ("author", item_author_of_xml)
577577+ ; ("category", category_of_xml)
578578+ ; ("comments", item_comments_of_xml)
579579+ ; ("enclosure", enclosure_of_xml)
580580+ ; ("guid", guid_of_xml)
581581+ ; ("pubDate", item_pubdate_of_xml)
582582+ ; ("source", source_of_xml) ]
583583+ in
584584+ generate_catcher ~data_producer make_item ~namespaces:item_namespaces
585585+586586+let item_of_xml' =
587587+ let data_producer =
588588+ [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a))
589589+ ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a))
590590+ ; ("encoded", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Content a))
591591+ ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a)))
592592+ ; ("author", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Author a))
593593+ ; ("category", category_of_xml')
594594+ ; ("comments", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Comments a))
595595+ ; ("enclosure", enclosure_of_xml')
596596+ ; ("guid", guid_of_xml')
597597+ ; ("pubdate", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `PubDate a))
598598+ ; ("source", source_of_xml') ]
599599+ in
600600+ generate_catcher ~data_producer
601601+ (fun ~pos:_ x -> `Item x)
602602+ ~namespaces:item_namespaces
603603+604604+type channel =
605605+ { title: string
606606+ ; link: Uri.t
607607+ ; description: string
608608+ ; language: string option
609609+ ; copyright: string option
610610+ ; managingEditor: string option
611611+ ; webMaster: string option
612612+ ; pubDate: Date.t option
613613+ ; lastBuildDate: Date.t option
614614+ ; category: string option
615615+ ; generator: string option
616616+ ; docs: Uri.t option
617617+ ; cloud: cloud option
618618+ ; ttl: int option
619619+ ; image: image option
620620+ ; rating: int option
621621+ ; textInput: textinput option
622622+ ; skipHours: int option
623623+ ; skipDays: int option
624624+ ; items: item list }
625625+626626+type channel' =
627627+ [ `Title of string
628628+ | `Link of Uri.t
629629+ | `Description of string
630630+ | `Language of string
631631+ | `Copyright of string
632632+ | `ManagingEditor of string
633633+ | `WebMaster of string
634634+ | `PubDate of Date.t
635635+ | `LastBuildDate of Date.t
636636+ | `Category of string
637637+ | `Generator of string
638638+ | `Docs of Uri.t
639639+ | `Cloud of cloud
640640+ | `TTL of int
641641+ | `Image of image
642642+ | `Rating of int
643643+ | `TextInput of textinput
644644+ | `SkipHours of int
645645+ | `SkipDays of int
646646+ | `Item of item ]
647647+648648+let make_channel ~pos (l : [< channel'] list) =
649649+ let title =
650650+ match find (function `Title _ -> true | _ -> false) l with
651651+ | Some (`Title t) -> t
652652+ | _ ->
653653+ raise
654654+ (Error.Error
655655+ ( pos
656656+ , "<channel> elements MUST contains exactly one <title> element"
657657+ ))
658658+ in
659659+ let link =
660660+ match find (function `Link _ -> true | _ -> false) l with
661661+ | Some (`Link l) -> l
662662+ | _ ->
663663+ raise
664664+ (Error.Error
665665+ ( pos
666666+ , "<channel> elements MUST contains exactly one <link> element" ))
667667+ in
668668+ let description =
669669+ match find (function `Description _ -> true | _ -> false) l with
670670+ | Some (`Description l) -> l
671671+ | _ ->
672672+ raise
673673+ (Error.Error
674674+ ( pos
675675+ , "<channel> elements MUST contains exactly one <description> \
676676+ element" ))
677677+ in
678678+ let language =
679679+ match find (function `Language _ -> true | _ -> false) l with
680680+ | Some (`Language a) -> Some a
681681+ | _ -> None
682682+ in
683683+ let copyright =
684684+ match find (function `Copyright _ -> true | _ -> false) l with
685685+ | Some (`Copyright a) -> Some a
686686+ | _ -> None
687687+ in
688688+ let managingEditor =
689689+ match find (function `ManagingEditor _ -> true | _ -> false) l with
690690+ | Some (`ManagingEditor a) -> Some a
691691+ | _ -> None
692692+ in
693693+ let webMaster =
694694+ match find (function `WebMaster _ -> true | _ -> false) l with
695695+ | Some (`WebMaster a) -> Some a
696696+ | _ -> None
697697+ in
698698+ let pubDate =
699699+ match find (function `PubDate _ -> true | _ -> false) l with
700700+ | Some (`PubDate a) -> Some a
701701+ | _ -> None
702702+ in
703703+ let lastBuildDate =
704704+ match find (function `LastBuildDate _ -> true | _ -> false) l with
705705+ | Some (`LastBuildDate a) -> Some a
706706+ | _ -> None
707707+ in
708708+ let category =
709709+ match find (function `Category _ -> true | _ -> false) l with
710710+ | Some (`Category a) -> Some a
711711+ | _ -> None
712712+ in
713713+ let generator =
714714+ match find (function `Generator _ -> true | _ -> false) l with
715715+ | Some (`Generator a) -> Some a
716716+ | _ -> None
717717+ in
718718+ let docs =
719719+ match find (function `Docs _ -> true | _ -> false) l with
720720+ | Some (`Docs a) -> Some a
721721+ | _ -> None
722722+ in
723723+ let cloud =
724724+ match find (function `Cloud _ -> true | _ -> false) l with
725725+ | Some (`Cloud a) -> Some a
726726+ | _ -> None
727727+ in
728728+ let ttl =
729729+ match find (function `TTL _ -> true | _ -> false) l with
730730+ | Some (`TTL a) -> Some a
731731+ | _ -> None
732732+ in
733733+ let image =
734734+ match find (function `Image _ -> true | _ -> false) l with
735735+ | Some (`Image a) -> Some a
736736+ | _ -> None
737737+ in
738738+ let rating =
739739+ match find (function `Rating _ -> true | _ -> false) l with
740740+ | Some (`Rating a) -> Some a
741741+ | _ -> None
742742+ in
743743+ let textInput =
744744+ match find (function `TextInput _ -> true | _ -> false) l with
745745+ | Some (`TextInput a) -> Some a
746746+ | _ -> None
747747+ in
748748+ let skipHours =
749749+ match find (function `SkipHours _ -> true | _ -> false) l with
750750+ | Some (`SkipHours a) -> Some a
751751+ | _ -> None
752752+ in
753753+ let skipDays =
754754+ match find (function `SkipDays _ -> true | _ -> false) l with
755755+ | Some (`SkipDays a) -> Some a
756756+ | _ -> None
757757+ in
758758+ let items =
759759+ List.fold_left (fun acc -> function `Item x -> x :: acc | _ -> acc) [] l
760760+ in
761761+ ( { title
762762+ ; link
763763+ ; description
764764+ ; language
765765+ ; copyright
766766+ ; managingEditor
767767+ ; webMaster
768768+ ; pubDate
769769+ ; lastBuildDate
770770+ ; category
771771+ ; generator
772772+ ; docs
773773+ ; cloud
774774+ ; ttl
775775+ ; image
776776+ ; rating
777777+ ; textInput
778778+ ; skipHours
779779+ ; skipDays
780780+ ; items }
781781+ : channel )
782782+783783+let channel_title_of_xml ~xmlbase:_ (pos, _tag, datas) =
784784+ try `Title (get_leaf datas) with Not_found ->
785785+ raise
786786+ (Error.Error (pos, "The content of <title> MUST be a non-empty string"))
787787+788788+let channel_description_of_xml ~xmlbase:_ (_pos, _tag, datas) =
789789+ `Description (try get_leaf datas with Not_found -> "")
790790+791791+let channel_link_of_xml ~xmlbase (pos, _tag, datas) =
792792+ try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
793793+ with Not_found ->
794794+ raise
795795+ (Error.Error (pos, "The content of <link> MUST be a non-empty string"))
796796+797797+let channel_language_of_xml ~xmlbase:_ (pos, _tag, datas) =
798798+ try `Language (get_leaf datas) with Not_found ->
799799+ raise
800800+ (Error.Error (pos, "The content of <language> MUST be a non-empty string"))
801801+802802+let channel_copyright_of_xml ~xmlbase:_ (_pos, _tag, datas) =
803803+ try `Copyright (get_leaf datas) with Not_found -> `Copyright ""
804804+805805+(* XXX(dinosaure): aempty copyright is allowed. *)
806806+807807+let channel_managingeditor_of_xml ~xmlbase:_ (pos, _tag, datas) =
808808+ try `ManagingEditor (get_leaf datas) with Not_found ->
809809+ raise
810810+ (Error.Error
811811+ (pos, "The content of <managingEditor> MUST be a non-empty string"))
812812+813813+let channel_webmaster_of_xml ~xmlbase:_ (pos, _tag, datas) =
814814+ try `WebMaster (get_leaf datas) with Not_found ->
815815+ raise
816816+ (Error.Error
817817+ (pos, "The content of <webMaster> MUST be a non-empty string"))
818818+819819+let channel_pubdate_of_xml ~xmlbase:_ (pos, _tag, datas) =
820820+ try `PubDate (Date.of_rfc822 (get_leaf datas)) with Not_found ->
821821+ raise
822822+ (Error.Error (pos, "The content of <pubDate> MUST be a non-empty string"))
823823+824824+let channel_lastbuilddate_of_xml ~xmlbase:_ (pos, _tag, datas) =
825825+ try `LastBuildDate (Date.of_rfc822 (get_leaf datas)) with Not_found ->
826826+ raise
827827+ (Error.Error
828828+ (pos, "The content of <lastBuildDate> MUST be a non-empty string"))
829829+830830+let channel_category_of_xml ~xmlbase:_ (pos, _tag, datas) =
831831+ try `Category (get_leaf datas) with Not_found ->
832832+ raise
833833+ (Error.Error (pos, "The content of <category> MUST be a non-empty string"))
834834+835835+let channel_generator_of_xml ~xmlbase:_ (pos, _tag, datas) =
836836+ try `Generator (get_leaf datas) with Not_found ->
837837+ raise
838838+ (Error.Error
839839+ (pos, "The content of <generator> MUST be a non-empty string"))
840840+841841+let channel_docs_of_xml ~xmlbase (pos, _tag, datas) =
842842+ try `Docs (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
843843+ with Not_found ->
844844+ raise
845845+ (Error.Error (pos, "The content of <docs> MUST be a non-empty string"))
846846+847847+let channel_ttl_of_xml ~xmlbase:_ (pos, _tag, datas) =
848848+ try `TTL (int_of_string (get_leaf datas)) with _ ->
849849+ raise
850850+ (Error.Error
851851+ ( pos
852852+ , "The content of <ttl> MUST be a non-empty string representing an \
853853+ integer" ))
854854+855855+let channel_rating_of_xml ~xmlbase:_ (pos, _tag, datas) =
856856+ try `Rating (int_of_string (get_leaf datas)) with _ ->
857857+ raise
858858+ (Error.Error
859859+ ( pos
860860+ , "The content of <rating> MUST be a non-empty string representing \
861861+ an integer" ))
862862+863863+let channel_skipHours_of_xml ~xmlbase:_ (pos, _tag, datas) =
864864+ try `SkipHours (int_of_string (get_leaf datas)) with _ ->
865865+ raise
866866+ (Error.Error
867867+ ( pos
868868+ , "The content of <skipHours> MUST be a non-empty string \
869869+ representing an integer" ))
870870+871871+let channel_skipDays_of_xml ~xmlbase:_ (pos, _tag, datas) =
872872+ try `SkipDays (int_of_string (get_leaf datas)) with _ ->
873873+ raise
874874+ (Error.Error
875875+ ( pos
876876+ , "The content of <skipDays> MUST be a non-empty string representing \
877877+ an integer" ))
878878+879879+let channel_of_xml =
880880+ let data_producer =
881881+ [ ("title", channel_title_of_xml)
882882+ ; ("link", channel_link_of_xml)
883883+ ; ("description", channel_description_of_xml)
884884+ ; ("Language", channel_language_of_xml)
885885+ ; ("copyright", channel_copyright_of_xml)
886886+ ; ("managingeditor", channel_managingeditor_of_xml)
887887+ ; ("webmaster", channel_webmaster_of_xml)
888888+ ; ("pubdate", channel_pubdate_of_xml)
889889+ ; ("lastbuilddate", channel_lastbuilddate_of_xml)
890890+ ; ("category", channel_category_of_xml)
891891+ ; ("generator", channel_generator_of_xml)
892892+ ; ("docs", channel_docs_of_xml)
893893+ ; ("cloud", cloud_of_xml)
894894+ ; ("ttl", channel_ttl_of_xml)
895895+ ; ("image", image_of_xml)
896896+ ; ("rating", channel_rating_of_xml)
897897+ ; ("textinput", textinput_of_xml)
898898+ ; ("skiphours", channel_skipHours_of_xml)
899899+ ; ("skipdays", channel_skipDays_of_xml)
900900+ ; ("item", item_of_xml) ]
901901+ in
902902+ generate_catcher ~data_producer make_channel
903903+904904+let channel_of_xml' =
905905+ let data_producer =
906906+ [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a))
907907+ ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a)))
908908+ ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a))
909909+ ; ("Language", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Language a))
910910+ ; ("copyright", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Copyright a))
911911+ ; ( "managingeditor"
912912+ , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `ManagingEditor a) )
913913+ ; ("webmaster", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WebMaster a))
914914+ ; ("pubdate", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `PubDate a))
915915+ ; ( "lastbuilddate"
916916+ , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `LastBuildDate a) )
917917+ ; ("category", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Category a))
918918+ ; ("generator", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Generator a))
919919+ ; ("docs", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Docs a))
920920+ ; ("cloud", cloud_of_xml')
921921+ ; ("ttl", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `TTL a))
922922+ ; ("image", image_of_xml')
923923+ ; ("rating", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Rating a))
924924+ ; ("textinput", textinput_of_xml')
925925+ ; ("skiphours", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `SkipHours a))
926926+ ; ("skipdays", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `SkipDays a))
927927+ ; ("item", item_of_xml') ]
928928+ in
929929+ generate_catcher ~data_producer (fun ~pos:_ x -> x)
930930+931931+let find_channel l =
932932+ find
933933+ (function
934934+ | XML.Node (_pos, tag, _data) -> tag_is tag "channel"
935935+ | XML.Data _ -> false)
936936+ l
937937+938938+let parse ?xmlbase input =
939939+ match XML.of_xmlm input |> snd with
940940+ | XML.Node (pos, tag, data) -> (
941941+ if tag_is tag "channel" then channel_of_xml ~xmlbase (pos, tag, data)
942942+ else
943943+ match find_channel data with
944944+ | Some (XML.Node (p, t, d)) -> channel_of_xml ~xmlbase (p, t, d)
945945+ | Some (XML.Data _) | _ ->
946946+ raise
947947+ (Error.Error
948948+ ( (0, 0)
949949+ , "document MUST contains exactly one <channel> element" )) )
950950+ | _ ->
951951+ raise
952952+ (Error.Error
953953+ ((0, 0), "document MUST contains exactly one <channel> element"))
954954+955955+let read ?xmlbase fname =
956956+ let fh = open_in fname in
957957+ try
958958+ let x = parse ?xmlbase (XML.input_of_channel fh) in
959959+ close_in fh ; x
960960+ with e -> close_in fh ; raise e
961961+962962+type uri = Uri.t option * string
963963+964964+let unsafe ?xmlbase input =
965965+ match XML.of_xmlm input |> snd with
966966+ | XML.Node (pos, tag, data) -> (
967967+ if tag_is tag "channel" then
968968+ `Channel (channel_of_xml' ~xmlbase (pos, tag, data))
969969+ else
970970+ match find_channel data with
971971+ | Some (XML.Node (p, t, d)) ->
972972+ `Channel (channel_of_xml' ~xmlbase (p, t, d))
973973+ | Some (XML.Data _) | None -> `Channel [] )
974974+ | _ -> `Channel []
975975+976976+(* Conversion to Atom *)
977977+978978+let map_option o f = match o with None -> None | Some v -> Some (f v)
979979+980980+(* Assume ASCII or a superset like UTF-8. *)
981981+let valid_local_part =
982982+ let is_valid c =
983983+ let c = Char.unsafe_chr c in
984984+ ('a' <= c && c <= 'z')
985985+ || ('A' <= c && c <= 'Z')
986986+ || ('0' <= c && c <= '9')
987987+ || c = '.'
988988+ (* shouldn't be the 1st char and not appear twice consecutively *)
989989+ || c = '!'
990990+ || c = '#'
991991+ || c = '$'
992992+ || c = '%'
993993+ || c = '&'
994994+ || c = '\''
995995+ || c = '*'
996996+ || c = '+'
997997+ || c = '-'
998998+ || c = '/'
999999+ || c = '='
10001000+ || c = '?'
10011001+ || c = '^'
10021002+ || c = '_'
10031003+ || c = '`'
10041004+ || c = '{'
10051005+ || c = '|'
10061006+ || c = '}'
10071007+ || c = '~'
10081008+ in
10091009+ Array.init 256 is_valid
10101010+10111011+let is_valid_local_part c = valid_local_part.(Char.code c)
10121012+10131013+let valid_domain_part =
10141014+ let is_valid c =
10151015+ let c = Char.unsafe_chr c in
10161016+ ('a' <= c && c <= 'z')
10171017+ || ('A' <= c && c <= 'Z')
10181018+ || ('0' <= c && c <= '9')
10191019+ || c = '.'
10201020+ || c = '.'
10211021+ in
10221022+ Array.init 256 is_valid
10231023+10241024+let is_valid_domain_part c = valid_domain_part.(Char.code c)
10251025+10261026+(* Valid range [s.[i]], [i0 ≤ i < i1]. *)
10271027+let sub_no_braces s i0 i1 =
10281028+ let i0 = if s.[i0] = '(' then i0 + 1 else i0 in
10291029+ let i1 = if s.[i1 - 1] = ')' then i1 - 1 else i1 in
10301030+ String.sub s i0 (i1 - i0)
10311031+10321032+(* The item author sometimes contains the name and email under the form "name
10331033+ <email>" or "email (name)". Try to extract both compnents. *)
10341034+let extract_name_email a =
10351035+ try
10361036+ let i = String.index a '@' in
10371037+ (* or Not_found *)
10381038+ let len = String.length a in
10391039+ let i0 = ref (i - 1) in
10401040+ while !i0 >= 0 && is_valid_local_part a.[!i0] do
10411041+ decr i0
10421042+ done ;
10431043+ incr i0 ;
10441044+ (* !i0 >= 0 is the first char of the possible email. *)
10451045+ let i1 = ref (i + 1) in
10461046+ while !i1 < len && is_valid_domain_part a.[!i1] do
10471047+ incr i1
10481048+ done ;
10491049+ if !i0 < i && i + 1 < !i1 then (
10501050+ let email = String.sub a !i0 (!i1 - !i0) in
10511051+ if !i0 > 0 && a.[!i0 - 1] = '<' then decr i0 ;
10521052+ if !i1 < len && a.[!i1] = '>' then incr i1 ;
10531053+ while !i1 < len && a.[!i1] = ' ' do
10541054+ incr i1
10551055+ done ;
10561056+ (* skip spaces *)
10571057+ let name =
10581058+ if !i0 <= 0 then
10591059+ if !i1 >= len then email (* no name *) else sub_no_braces a !i1 len
10601060+ else
10611061+ (* !i0 > 0 *)
10621062+ let name0 = String.trim (String.sub a 0 !i0) in
10631063+ if !i1 >= len then name0 else name0 ^ String.sub a !i1 (len - !i1)
10641064+ in
10651065+ (name, Some email) )
10661066+ else (a, None)
10671067+ with Not_found -> (a, None)
10681068+10691069+let looks_like_a_link u =
10701070+ (Uri.scheme u = Some "http" || Uri.scheme u = Some "https")
10711071+ && match Uri.host u with None | Some "" -> false | Some _ -> true
10721072+10731073+let entry_of_item ch_link ch_updated (it : item) : Atom.entry =
10741074+ let author =
10751075+ match it.author with
10761076+ | Some a ->
10771077+ let name, email = extract_name_email a in
10781078+ {Atom.name; uri= None; email}
10791079+ | None ->
10801080+ (* If no author is specified for the item, there is little one can do
10811081+ just using the RSS2 feed. The user will have to set it using Atom
10821082+ convenience functions. *)
10831083+ {Atom.name= ""; uri= None; email= None}
10841084+ in
10851085+ let categories =
10861086+ let fn (c : category) = { Atom.term= c.data; scheme= map_option c.domain (fun d -> d); label= None } in
10871087+ List.map fn it.categories
10881088+ in
10891089+ let (title : Atom.title), content =
10901090+ match it.story with
10911091+ | All (t, xmlbase, d) ->
10921092+ let content =
10931093+ match it.content with
10941094+ | _, "" -> if d = "" then None else Some (Atom.Html (xmlbase, d))
10951095+ | x, c -> Some (Atom.Html (x, c))
10961096+ in
10971097+ (Atom.Text t, content)
10981098+ | Title t ->
10991099+ let content =
11001100+ match it.content with
11011101+ | _, "" -> None
11021102+ | x, c -> Some (Atom.Html (x, c))
11031103+ in
11041104+ (Atom.Text t, content)
11051105+ | Description (xmlbase, d) ->
11061106+ let content =
11071107+ match it.content with
11081108+ | _, "" -> if d = "" then None else Some (Atom.Html (xmlbase, d))
11091109+ | x, c -> Some (Atom.Html (x, c))
11101110+ in
11111111+ (Atom.Text "", content)
11121112+ in
11131113+ let id =
11141114+ match it.guid with
11151115+ | Some g ->
11161116+ if g.permalink || looks_like_a_link g.data then g.data
11171117+ else
11181118+ let d = Digest.to_hex (Digest.string (Uri.to_string g.data)) in
11191119+ Uri.with_fragment ch_link (Some d)
11201120+ | None ->
11211121+ (* The [it.link] may not be a permanent link and may also be used by
11221122+ other items. We use a digest to make it unique. *)
11231123+ let link = match it.link with Some l -> l | None -> ch_link in
11241124+ let s =
11251125+ match it.story with
11261126+ | All (t, _, d) -> t ^ d
11271127+ | Title t -> t
11281128+ | Description (_, d) -> d
11291129+ in
11301130+ let d = Digest.to_hex (Digest.string s) in
11311131+ Uri.with_fragment link (Some d)
11321132+ in
11331133+ let links =
11341134+ match (it.guid, it.link) with
11351135+ | Some g, _ when g.permalink -> [Atom.link g.data ~rel:Atom.Alternate]
11361136+ | _, Some l -> [Atom.link l ~rel:Atom.Alternate]
11371137+ | Some g, _ ->
11381138+ (* Sometimes the guid sets [l.permalink = false] but is nonetheless the
11391139+ only URI we have. *)
11401140+ if looks_like_a_link g.data then [Atom.link g.data ~rel:Atom.Alternate]
11411141+ else []
11421142+ | _, None -> []
11431143+ in
11441144+ let links =
11451145+ match it.comments with
11461146+ | Some l ->
11471147+ { Atom.href= l
11481148+ ; rel= Atom.Related
11491149+ ; type_media= None
11501150+ ; hreflang= None
11511151+ ; title= ""
11521152+ ; length= None }
11531153+ :: links
11541154+ | None -> links
11551155+ in
11561156+ let links =
11571157+ match it.enclosure with
11581158+ | Some e ->
11591159+ { Atom.href= e.url
11601160+ ; rel= Atom.Enclosure
11611161+ ; type_media= Some e.mime
11621162+ ; hreflang= None
11631163+ ; title= ""
11641164+ ; length= Some e.length }
11651165+ :: links
11661166+ | None -> links
11671167+ in
11681168+ let source =
11691169+ match it.source with
11701170+ | Some s ->
11711171+ Some
11721172+ { Atom.authors= [author]
11731173+ ; (* Best guess *)
11741174+ categories= []
11751175+ ; contributors= []
11761176+ ; generator= None
11771177+ ; icon= None
11781178+ ; id= ch_link
11791179+ ; (* declared as the ID of the whole channel *)
11801180+ links=
11811181+ [ { Atom.href= s.url
11821182+ ; rel= Atom.Related
11831183+ ; type_media= None
11841184+ ; hreflang= None
11851185+ ; title= ""
11861186+ ; length= None } ]
11871187+ ; logo= None
11881188+ ; rights= None
11891189+ ; subtitle= None
11901190+ ; title= Atom.Text s.data
11911191+ ; updated= None }
11921192+ | None -> None
11931193+ in
11941194+ { Atom.authors= (author, [])
11951195+ ; categories
11961196+ ; content
11971197+ ; contributors= []
11981198+ ; id
11991199+ ; links
12001200+ ; published= None
12011201+ ; rights= None
12021202+ ; source
12031203+ ; summary= None
12041204+ ; title
12051205+ ; updated= (match it.pubDate with Some d -> d | None -> ch_updated) }
12061206+12071207+let more_recent_of_item date (it : item) =
12081208+ match (date, it.pubDate) with
12091209+ | _, None -> date
12101210+ | None, Some _ -> it.pubDate
12111211+ | Some d, Some di -> if Date.compare d di >= 0 then date else it.pubDate
12121212+12131213+let max_date_opt d = function None -> d | Some d' -> Date.max d d'
12141214+12151215+let to_atom ?self (ch : channel) : Atom.feed =
12161216+ let contributors =
12171217+ match ch.webMaster with
12181218+ | Some p -> [{Atom.name= "Webmaster"; uri= None; email= Some p}]
12191219+ | None -> []
12201220+ in
12211221+ let contributors =
12221222+ match ch.managingEditor with
12231223+ | Some p ->
12241224+ {Atom.name= "Managing Editor"; uri= None; email= Some p}
12251225+ :: contributors
12261226+ | None -> contributors
12271227+ in
12281228+ let links =
12291229+ [ { Atom.href= ch.link
12301230+ ; rel= Atom.Related
12311231+ ; type_media= Some "text/html"
12321232+ ; hreflang= None
12331233+ ; title= ch.title
12341234+ ; length= None } ]
12351235+ in
12361236+ let links =
12371237+ match self with
12381238+ | Some self ->
12391239+ { Atom.href= self
12401240+ ; rel= Atom.Self
12411241+ ; type_media= Some "application/rss+xml"
12421242+ ; hreflang= None
12431243+ ; title= ch.title
12441244+ ; length= None }
12451245+ :: links
12461246+ | None -> links
12471247+ in
12481248+ let updated =
12491249+ match List.fold_left more_recent_of_item None ch.items with
12501250+ | None -> max_date_opt Date.epoch ch.lastBuildDate
12511251+ | Some d -> max_date_opt d ch.lastBuildDate
12521252+ in
12531253+ { Atom.authors= []
12541254+ ; categories=
12551255+ ( match ch.category with
12561256+ | None -> []
12571257+ | Some c -> [{Atom.term= c; scheme= None; label= None}] )
12581258+ ; contributors
12591259+ ; generator=
12601260+ map_option ch.generator (fun g ->
12611261+ {Atom.content= g; version= None; uri= None} )
12621262+ ; icon= None
12631263+ ; id= ch.link
12641264+ ; (* FIXME: Best we can do? *)
12651265+ links
12661266+ ; logo= map_option ch.image (fun i -> i.url)
12671267+ ; rights= map_option ch.copyright (fun c -> (Atom.Text c : Atom.rights))
12681268+ ; subtitle= None
12691269+ ; title= Atom.Text ch.title
12701270+ ; updated
12711271+ ; entries= List.map (entry_of_item ch.link updated) ch.items }
+351
stack/syndic/lib/syndic_rss2.mli
···11+(** [Syndic.Rss2]: compliant with {{:
22+ http://www.rssboard.org/rss-specification} RSS 2.0}. *)
33+44+module Error : module type of Syndic_error
55+66+(** An [image] is an optional sub-element of {!channel}, which contains three
77+ required ([url], [title], [link]) and three optional ([width], [height],
88+ [description]) sub-elements.
99+1010+ {{:
1111+ http://www.rssboard.org/rss-specification#ltimagegtSubelementOfLtchannelgt}
1212+ See RSS 2.0 about <image>}. *)
1313+type image =
1414+ { url: Uri.t
1515+ (** The URL of a GIF, JPEG or PNG image that represents the channel. *)
1616+ ; title: string
1717+ (** Describes the image. It's used in the ALT attribute of the HTML
1818+ <img> tag when the channel is rendered in HTML. *)
1919+ ; link: Uri.t
2020+ (** The URL of the site, when the channel is rendered, the image is a
2121+ link to the site. (Note, in practice the image [title] and [link]
2222+ should have the same value as the {!channel}'s [title] and [link]. *)
2323+ ; width: int
2424+ (** Width of the image in pixels. Maximum value is 144, default value
2525+ is 88. *)
2626+ ; height: int
2727+ (** Height of the image in pixels. Maximum value is 400, default value
2828+ is 31. *)
2929+ ; description: string option
3030+ (** contains text that is included in the TITLE attribute of the link
3131+ formed around the image in the HTML rendering. *) }
3232+3333+(** [cloud] is an optional sub-element of {!channel}. It specifies a web
3434+ service that supports the rssCloud interface which can be implemented in
3535+ HTTP-POST, XML-RPC or SOAP 1.1.
3636+3737+ Its purpose is to allow processes to register with a cloud to be notified
3838+ of updates to the channel, implementing a lightweight publish-subscribe
3939+ protocol for RSS feeds.
4040+4141+ {{:
4242+ http://www.rssboard.org/rss-specification#ltcloudgtSubelementOfLtchannelgt}
4343+ See RSS 2.0 about <cloud> }
4444+4545+ {[ <cloud domain="rpc.sys.com" port="80" path="/RPC2"
4646+ registerProcedure="myCloud.rssPleaseNotify" protocol="xml-rpc" /> ]}
4747+4848+ In this example, to request notification on the channel it appears in, you
4949+ would send an XML-RPC message to rpc.sys.com on port 80, with a path of
5050+ /RPC2. The procedure to call is myCloud.rssPleaseNotify. *)
5151+type cloud =
5252+ { uri: Uri.t (** The URI of the cloud (domain, port, path). *)
5353+ ; registerProcedure: string
5454+ ; protocol: string }
5555+5656+(** A {!channel} may optionally contain a [textInput] sub-element, which
5757+ contains four required sub-elements.
5858+5959+ The purpose of the <textInput> element is something of a mystery. You can
6060+ use it to specify a search engine box. Or to allow a reader to provide
6161+ feedback. Most aggregators ignore it.
6262+6363+ {{:
6464+ http://www.rssboard.org/rss-specification#lttextinputgtSubelementOfLtchannelgt}
6565+ See RSS 2.0 about <textinput>} *)
6666+type textinput =
6767+ { title: string (** The label of the Submit button in the text input area. *)
6868+ ; description: string (** Explains the text input area. *)
6969+ ; name: string (** The name of the text object in the text input area. *)
7070+ ; link: Uri.t
7171+ (** The URL of the CGI script that processes text input requests. *) }
7272+7373+(** [category] is an optional sub-element of {!item}. - [data] is A
7474+ forward-slash-separated string that identifies a hierarchic location in the
7575+ indicated taxonomy. Processors may establish conventions for the
7676+ interpretation of categories. - [domain], if provided, a string that
7777+ identifies a categorization taxonomy.
7878+7979+ {{:
8080+ http://www.rssboard.org/rss-specification#ltcategorygtSubelementOfLtitemgt}
8181+ See RSS 2.0 about <category> }
8282+8383+ Two examples are provided below:
8484+8585+ {[ <category>Grateful Dead</category> ]}
8686+8787+ {[ <category domain="http://www.fool.com/cusips">MSFT</category> ]}
8888+8989+ You may include as many category elements as you need to, for different
9090+ domains, and to have an item cross-referenced in different parts of the
9191+ same domain. *)
9292+type category = {data: string; domain: Uri.t option}
9393+9494+(** [enclosure] is an optional sub-element of {!item}. It has three required
9595+ attributes. - [url] says where the enclosure is located (must be an http
9696+ url), - [length] says how big it is in bytes, and - [mime] says what its
9797+ type is, a standard MIME type.
9898+9999+ {{:
100100+ http://www.rssboard.org/rss-specification#ltenclosuregtSubelementOfLtitemgt}
101101+ See RSS 2.0 about <enclosure> }
102102+103103+ {[ <enclosure url="http://www.scripting.com/mp3s/weatherReportSuite.mp3"
104104+ length="12216320" type="audio/mpeg" /> ]} *)
105105+type enclosure = {url: Uri.t; length: int; mime: string}
106106+107107+(** [guid] is an optional sub-element of {!item}. "guid" stands for globally
108108+ unique identifier. It's a string that uniquely identifies the item. When
109109+ present, an aggregator may choose to use this string to determine if an
110110+ item is new.
111111+112112+ {{: http://www.rssboard.org/rss-specification#ltguidgtSubelementOfLtitemgt}
113113+ See RSS 2.0 about <guid>}
114114+115115+ {[<guid>http://some.server.com/weblogItem3207</guid>]}
116116+117117+ There are no rules for the syntax of a guid. Aggregators must view them as
118118+ a string. It's up to the source of the feed to establish the uniqueness of
119119+ the string.
120120+121121+ If [permalink] is [true], the reader may assume that it is a permalink to
122122+ the item, that is, a url that can be opened in a Web browser, that points
123123+ to the full item described by the <item> element. An example:
124124+125125+ {[<guid
126126+ isPermaLink="true">http://inessential.com/2002/09/01.php#a2</guid>]}
127127+128128+ If [permalink] is [false], the guid may not be assumed to be a url, or a
129129+ url to anything in particular. *)
130130+type guid =
131131+ {data: Uri.t (** Must be unique *); permalink: bool (** default [true] *)}
132132+133133+(** [source] is an optional sub-element of {!item}. - [data] is the name of the
134134+ RSS channel that the item came from, derived from its <title>. - [url]
135135+ links to the XMLization of the source.
136136+137137+ The purpose of this element is to propagate credit for links, to publicize
138138+ the sources of news items. It can be used in the Post command of an
139139+ aggregator. It should be generated automatically when forwarding an item
140140+ from an aggregator to a weblog authoring tool.
141141+142142+ {{:http://www.rssboard.org/rss-specification#ltsourcegtSubelementOfLtitemgt}
143143+ See RSS 2.0 about <source>}
144144+145145+ {[<source url="http://www.tomalak.org/links2.xml">Tomalak's
146146+ Realm</source>]} *)
147147+type source = {data: string; url: Uri.t}
148148+149149+type story =
150150+ | All of string * Uri.t option * string
151151+ (** [All(title, xmlbase, description)] *)
152152+ | Title of string
153153+ | Description of Uri.t option * string
154154+ (** [Description(xmlbase, description)] *)
155155+156156+(** A {!channel} may contain any number of [item]s. An item may represent a
157157+ "story" — much like a story in a newspaper or magazine; if so its
158158+ description is a synopsis of the story, and the link points to the full
159159+ story. An item may also be complete in itself, if so, the description
160160+ contains the text (entity-encoded HTML is allowed; see examples), and the
161161+ link and title may be omitted.
162162+163163+ - [title] : The title of the item. - [link] : The URL of the item. -
164164+ [story] : The item synopsis. - [content] : The possible full story
165165+ ([(_,"")] if not present). (Extension of RSS2, see
166166+ http://purl.org/rss/1.0/modules/content/) The first element of the couple
167167+ is the possible value of xml:base. It can be used to resolve URIs. -
168168+ [author] : Email address of the author of the item. - [category] : Includes
169169+ the item in one or more categories. - [comments] : URL of a page for
170170+ comments relating to the item. - [enclosure] : Describes a media object
171171+ that is attached to the item. - [guid] : A string that uniquely identifies
172172+ the item. - [pubDate] : Indicates when the item was published. - [source] :
173173+ The RSS channel that the item came from.
174174+175175+ {{: http://www.rssboard.org/rss-specification#hrelementsOfLtitemgt} See RSS
176176+ 2.0 about <item> } *)
177177+type item =
178178+ { story: story
179179+ ; content: Uri.t option * string
180180+ ; link: Uri.t option
181181+ ; author: string option
182182+ ; categories: category list
183183+ ; comments: Uri.t option
184184+ ; enclosure: enclosure option
185185+ ; guid: guid option
186186+ ; pubDate: Syndic_date.t option
187187+ ; source: source option }
188188+189189+(** Here's a list of the required channel elements, each with a brief
190190+ description, an example, and where available, a pointer to a more complete
191191+ description.
192192+193193+ - [title]: The name of the channel. It's how people refer to your service.
194194+ If you have an HTML website that contains the same information as your RSS
195195+ file, the title of your channel should be the same as the title of your
196196+ website. - [link]: The URL to the HTML website corresponding to the
197197+ channel. - [description]: Phrase or sentence describing the channel.
198198+199199+ Here's a list of optional channel elements.
200200+201201+ - [language]: The language the channel is written in. This allows
202202+ aggregators to group all Italian language sites, for example, on a single
203203+ page. A list of allowable values for this element, as provided by Netscape,
204204+ is here. You may also use values defined by the W3C. - [copyright]:
205205+ Copyright notice for content in the channel. - [managingEditor]: Email
206206+ address for person responsible for editorial content. - [webMaster]: Email
207207+ address for person responsible for technical issues relating to channel. -
208208+ [pubDate]: The publication date for the content in the channel. For
209209+ example, the New York Times publishes on a daily basis, the publication
210210+ date flips once every 24 hours. That's when the pubDate of the channel
211211+ changes. All date-times in RSS conform to the Date and Time Specification
212212+ of RFC 822, with the exception that the year may be expressed with two
213213+ characters or four characters (four preferred). - [lastBuildDate]: The last
214214+ time the content of the channel changed. - [category]: Specify one or more
215215+ categories that the channel belongs to. Follows the same rules as the
216216+ <item>-level category element. See {!category}. - [generator]: A string
217217+ indicating the program used to generate the channel. - [docs]: A URL that
218218+ points to the documentation for the format used in the RSS file. It's
219219+ probably a pointer to [http://www.rssboard.org/rss-specification]. It's for
220220+ people who might stumble across an RSS file on a Web server 25 years from
221221+ now and wonder what it is. - [cloud]: Allows processes to register with a
222222+ cloud to be notified of updates to the channel, implementing a lightweight
223223+ publish-subscribe protocol for RSS feeds. See {!cloud}. - [ttl]: ttl stands
224224+ for time to live. It's a number of minutes that indicates how long a
225225+ channel can be cached before refreshing from the source. - [image]:
226226+ Specifies a GIF, JPEG or PNG image that can be displayed with the channel.
227227+ See {!image}. - [rating]: The PICS rating for the channel. - [textInput]:
228228+ Specifies a text input box that can be displayed with the channel. See
229229+ {!textinput}. - [skipHours]: A hint for aggregators telling them which
230230+ hours they can skip. This element contains up to 24 <hour> sub-elements
231231+ whose value is a number between 0 and 23, representing a time in GMT, when
232232+ aggregators, if they support the feature, may not read the channel on hours
233233+ listed in the <skipHours> element. The hour beginning at midnight is hour
234234+ zero. - [skipDays]: A hint for aggregators telling them which days they can
235235+ skip. This element contains up to seven <day> sub-elements whose value is
236236+ Monday, Tuesday, Wednesday, Thursday, Friday, Saturday or Sunday.
237237+ Aggregators may not read the channel during days listed in the <skipDays>
238238+ element.
239239+240240+ {{: http://www.rssboard.org/rss-specification#requiredChannelElements} See
241241+ RSS 2.0 about <channel>} *)
242242+type channel =
243243+ { title: string
244244+ ; link: Uri.t
245245+ ; description: string
246246+ ; language: string option
247247+ ; copyright: string option
248248+ ; managingEditor: string option
249249+ ; webMaster: string option
250250+ ; pubDate: Syndic_date.t option
251251+ ; lastBuildDate: Syndic_date.t option
252252+ ; category: string option
253253+ ; generator: string option
254254+ ; docs: Uri.t option
255255+ ; cloud: cloud option
256256+ ; ttl: int option
257257+ (** {{:
258258+ http://www.rssboard.org/rss-specification#ltcloudgtSubelementOfLtchannelgt}
259259+ See RSS 2.0 about <ttl> } *)
260260+ ; image: image option
261261+ ; rating: int option
262262+ ; (* lol *)
263263+ textInput: textinput option
264264+ ; skipHours: int option
265265+ ; skipDays: int option
266266+ ; items: item list }
267267+268268+val parse : ?xmlbase:Uri.t -> Xmlm.input -> channel
269269+(** [parse xml] returns the channel corresponding to [xml].
270270+271271+ Raise [Error.Expected], [Error.Size_Exceeded] or [Error.Item_expectation]
272272+ if [xml] is not a valid RSS2 document. *)
273273+274274+val read : ?xmlbase:Uri.t -> string -> channel
275275+(** [read fname] reads the file name [fname] and parses it. For the optional
276276+ parameters, see {!parse}. *)
277277+278278+val to_atom : ?self:Uri.t -> channel -> Syndic_atom.feed
279279+(** [to_atom ch] returns an Atom feed that (mostly) contains the same
280280+ information.
281281+282282+ @param self the URI from where the current feed was retrieved. Contrarily
283283+ to Atom, RSS2 has no provision to store the URI of the feed itself. Giving
284284+ this information will add an entry to the [links] field of Atom feed with
285285+ [rel = Self]. *)
286286+287287+(**/**)
288288+289289+(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not [None],
290290+ gives the base URI against which [uri] must be resolved if it is relative. *)
291291+type uri = Uri.t option * string
292292+293293+val unsafe :
294294+ ?xmlbase:Uri.t
295295+ -> Xmlm.input
296296+ -> [> `Channel of [> `Category of string
297297+ | `Cloud of [> `Domain of string
298298+ | `Path of string
299299+ | `Port of string
300300+ | `Protocol of string
301301+ | `RegisterProcedure of string ]
302302+ list
303303+ | `Copyright of string
304304+ | `Description of string
305305+ | `Docs of string
306306+ | `Generator of string
307307+ | `Image of [> `Description of string
308308+ | `Height of string
309309+ | `Link of uri
310310+ | `Title of string
311311+ | `URL of uri
312312+ | `Width of string ]
313313+ list
314314+ | `Item of [> `Author of string
315315+ | `Category of [> `Data of string
316316+ | `Domain of string ]
317317+ list
318318+ | `Comments of string
319319+ | `Description of string
320320+ | `Content of string
321321+ | `Enclosure of [> `Length of string
322322+ | `Mime of string
323323+ | `URL of uri ]
324324+ list
325325+ | `Guid of [> `Data of uri
326326+ | `Permalink of string ]
327327+ list
328328+ | `Link of uri
329329+ | `PubDate of string
330330+ | `Source of [> `Data of string | `URL of uri]
331331+ list
332332+ | `Title of string ]
333333+ list
334334+ | `Language of string
335335+ | `LastBuildDate of string
336336+ | `Link of uri
337337+ | `ManagingEditor of string
338338+ | `PubDate of string
339339+ | `Rating of string
340340+ | `SkipDays of string
341341+ | `SkipHours of string
342342+ | `TTL of string
343343+ | `TextInput of [> `Description of string
344344+ | `Link of uri
345345+ | `Name of string
346346+ | `Title of string ]
347347+ list
348348+ | `Title of string
349349+ | `WebMaster of string ]
350350+ list ]
351351+(** Analysis without verification, enjoy ! *)
+126
stack/syndic/lib/syndic_w3c.ml
···11+open Syndic_common.XML
22+open Syndic_common.Util
33+module XML = Syndic_xml
44+module Error = Syndic_error
55+66+type error' =
77+ [ `Line of string
88+ | `Column of string
99+ | `Text of string
1010+ | `Element of string
1111+ | `Parent of string
1212+ | `Value of string ]
1313+1414+type error
1515+type warning
1616+type 'a kind = Error | Warning
1717+1818+let error = Error
1919+let warning = Warning
2020+2121+type 'a t =
2222+ { kind: 'a kind (** Error or warning. *)
2323+ ; line: int
2424+ (** Within the source code of the validated document, refers to the
2525+ line where the error was detected. *)
2626+ ; column: int
2727+ (** Within the source code of the validated document, refers to the
2828+ line where the column was detected. *)
2929+ ; text: string (** The actual error message. *)
3030+ ; element: string
3131+ (** Element in the feed where the message was triggered. *)
3232+ ; parent: string (** In the feed, parent of the element. *)
3333+ ; value: string
3434+ (** If applicable the value of the element, attribute or content which
3535+ triggered the message. *) }
3636+3737+let feed_url = Uri.of_string "http://validator.w3.org/feed/check.cgi"
3838+3939+let url d =
4040+ let q = [("output", ["soap12"])] in
4141+ let q =
4242+ match d with
4343+ | `Data data -> ("rawdata", [data]) :: q
4444+ | `Uri uri -> [("url", [Uri.to_string uri])]
4545+ in
4646+ Uri.with_query feed_url q
4747+4848+let make_error ~kind ~pos:_ (l : [< error'] list) =
4949+ let line =
5050+ match find (function `Line _ -> true | _ -> false) l with
5151+ | Some (`Line line) -> ( try int_of_string line with _ -> 0 )
5252+ | _ -> 0
5353+ in
5454+ let column =
5555+ match find (function `Column _ -> true | _ -> false) l with
5656+ | Some (`Column column) -> ( try int_of_string column with _ -> 0 )
5757+ | _ -> 0
5858+ in
5959+ let text =
6060+ match find (function `Text _ -> true | _ -> false) l with
6161+ | Some (`Text text) -> text
6262+ | _ -> ""
6363+ in
6464+ let element =
6565+ match find (function `Element _ -> true | _ -> false) l with
6666+ | Some (`Element element) -> element
6767+ | _ -> ""
6868+ in
6969+ let parent =
7070+ match find (function `Parent _ -> true | _ -> false) l with
7171+ | Some (`Parent parent) -> parent
7272+ | _ -> ""
7373+ in
7474+ let value =
7575+ match find (function `Value _ -> true | _ -> false) l with
7676+ | Some (`Value value) -> value
7777+ | _ -> ""
7878+ in
7979+ ({kind; line; column; text; element; parent; value} : _ t)
8080+8181+let error_data_producer =
8282+ [ ("line", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Line a))
8383+ ; ("column", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Column a))
8484+ ; ("text", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Text a))
8585+ ; ("element", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Element a))
8686+ ; ("parent", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Parent a))
8787+ ; ("value", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Value a)) ]
8888+8989+let error_of_xml ~kind =
9090+ generate_catcher ~data_producer:error_data_producer (make_error ~kind)
9191+9292+let make_errorlist ~pos:_ (l : _ t list) = l
9393+9494+let errorlist_of_xml =
9595+ let data_producer = [("error", error_of_xml ~kind:Error)] in
9696+ generate_catcher ~data_producer ~xmlbase:None make_errorlist
9797+9898+let warninglist_of_xml =
9999+ let data_producer = [("warning", error_of_xml ~kind:Warning)] in
100100+ generate_catcher ~data_producer ~xmlbase:None make_errorlist
101101+102102+let find_errorlist l =
103103+ recursive_find
104104+ (function XML.Node (_, t, _) -> tag_is t "errorlist" | _ -> false)
105105+ l
106106+107107+let find_warninglist l =
108108+ recursive_find
109109+ (function XML.Node (_, t, _) -> tag_is t "warninglist" | _ -> false)
110110+ l
111111+112112+let to_error {line; column; text; _} = ((line, column), text)
113113+114114+let parse input =
115115+ let _, xml = XML.of_xmlm input in
116116+ let err =
117117+ match find_errorlist xml with
118118+ | Some (XML.Node (p, t, d)) -> errorlist_of_xml (p, t, d)
119119+ | _ -> []
120120+ in
121121+ let warn =
122122+ match find_warninglist xml with
123123+ | Some (XML.Node (p, t, d)) -> warninglist_of_xml (p, t, d)
124124+ | _ -> []
125125+ in
126126+ (err, warn)
+38
stack/syndic/lib/syndic_w3c.mli
···11+(** [Syndic.W3C]: invoke and parse the result of the W3C validator. *)
22+33+module Error : module type of Syndic_error
44+55+type error
66+type warning
77+88+(** Distinguishes an error from a warning. *)
99+type 'a kind
1010+1111+val error : error kind
1212+val warning : warning kind
1313+1414+type 'a t =
1515+ { kind: 'a kind (** Error or warning. *)
1616+ ; line: int
1717+ (** Within the source code of the validated document, refers to the
1818+ line where the error was detected. *)
1919+ ; column: int
2020+ (** Within the source code of the validated document, refers to the
2121+ line where the column was detected. *)
2222+ ; text: string (** The actual error message. *)
2323+ ; element: string
2424+ (** Element in the feed where the message was triggered. *)
2525+ ; parent: string (** In the feed, parent of the element. *)
2626+ ; value: string
2727+ (** If applicable the value of the element, attribute or content which
2828+ triggered the message. *) }
2929+3030+val url : [< `Data of string | `Uri of Uri.t] -> Uri.t
3131+(** Generate url for the W3C Feed Validator API returning a SOAP 12 output.
3232+ Thus URL is supposed to be used with GET. *)
3333+3434+val to_error : _ t -> Error.t
3535+3636+val parse : Xmlm.input -> error t list * warning t list
3737+(** [parse i] takes [i] and returns a list of error, result of
3838+ {{:http://validator.w3.org/feed/docs/soap} W3C Feed Validator}. *)
+52
stack/syndic/lib/syndic_xml.ml
···11+type dtd = string option
22+33+module Error = Syndic_error
44+55+type pos = Xmlm.pos
66+type tag = Xmlm.tag
77+type t = Node of pos * tag * t list | Data of pos * string
88+99+let resolve ~xmlbase uri =
1010+ match xmlbase with None -> uri | Some b -> Uri.resolve "" b uri
1111+1212+(* Specialized version of the Xmlm.make_input one. *)
1313+let input_of_channel fh =
1414+ (* Xmlm.make_input does not raise any exception. *)
1515+ Xmlm.make_input (`Channel fh)
1616+1717+let of_xmlm input =
1818+ let el tag datas = Node (Xmlm.pos input, tag, datas) in
1919+ let data data = Data (Xmlm.pos input, data) in
2020+ try Xmlm.input_doc_tree ~el ~data input with Xmlm.Error (pos, e) ->
2121+ raise (Error.Error (pos, Xmlm.error_message e))
2222+2323+let get_position = function Node (pos, _, _) -> pos | Data (pos, _) -> pos
2424+2525+let rec t_to_xmlm t output =
2626+ match t with
2727+ | Data (_pos, d) -> (
2828+ try Xmlm.output output (`Data d) with Xmlm.Error (pos, e) ->
2929+ raise (Error.Error (pos, Xmlm.error_message e)) )
3030+ | Node (_pos, tag, t_sub) -> (
3131+ Xmlm.output output (`El_start tag) ;
3232+ List.iter (fun t -> t_to_xmlm t output) t_sub ;
3333+ try Xmlm.output output `El_end with Xmlm.Error (pos, e) ->
3434+ raise (Error.Error (pos, Xmlm.error_message e)) )
3535+3636+(* Specialized version of the Xmlm one. *)
3737+let make_output ?ns_prefix dest =
3838+ (* Xmlm.make_output does not raise any exception. *)
3939+ Xmlm.make_output dest ~decl:true ?ns_prefix
4040+4141+let to_xmlm ?dtd t output =
4242+ ( try Xmlm.output output (`Dtd dtd) with Xmlm.Error (pos, e) ->
4343+ raise (Error.Error (pos, Xmlm.error_message e)) ) ;
4444+ t_to_xmlm t output
4545+4646+let to_buffer ?ns_prefix t b =
4747+ let output = Xmlm.make_output ~decl:false (`Buffer b) ?ns_prefix in
4848+ to_xmlm t output
4949+5050+let to_string ?ns_prefix t =
5151+ let b = Buffer.create 4096 in
5252+ to_buffer ?ns_prefix t b ; Buffer.contents b
+27
stack/syndic/lib/syndic_xml.mli
···11+(** Common module for XML parsing. *)
22+33+(** The type for the optional {{:http://www.w3.org/TR/REC-xml/#dt-doctype}DTD}. *)
44+type dtd = string option
55+66+type pos = Xmlm.pos
77+type tag = Xmlm.tag
88+99+(** A XML tree. *)
1010+type t = Node of pos * tag * t list | Data of pos * string
1111+1212+val resolve : xmlbase:Uri.t option -> Uri.t -> Uri.t
1313+(** [resolve base uri] resolve the [uri] against the possible base. *)
1414+1515+val get_position : t -> pos
1616+val input_of_channel : in_channel -> Xmlm.input
1717+1818+val of_xmlm : Xmlm.input -> dtd * t
1919+(** [of_xmlm doc] converts an XML document [doc] into a DTD and a tree
2020+ representing the document. *)
2121+2222+val make_output :
2323+ ?ns_prefix:(string -> string option) -> Xmlm.dest -> Xmlm.output
2424+2525+val to_xmlm : ?dtd:string -> t -> Xmlm.output -> unit
2626+val to_string : ?ns_prefix:(string -> string option) -> t -> string
2727+val to_buffer : ?ns_prefix:(string -> string option) -> t -> Buffer.t -> unit