syndic · anil.recoil.org/slop@c5ceabd

+11

stack/syndic/.gitignore

··· 1 + _build 2 + setup.data 3 + setup.log 4 + doc/*.html 5 + *.native 6 + *.byte 7 + *.docdir 8 + lib/syndic_conf.ml 9 + *.tar.gz 10 + .merlin 11 + *.install

+6

stack/syndic/.ocamlformat

··· 1 + module-item-spacing=compact 2 + break-struct=natural 3 + break-infix=fit-or-vertical 4 + parens-tuple=always 5 + wrap-comments=true 6 + break-collection-expressions=wrap

+41

stack/syndic/.travis-ci.sh

··· 1 + echo "yes" | sudo add-apt-repository ppa:avsm/ppa 2 + sudo apt-get update -qq 3 + sudo apt-get install ocaml ocaml-native-compilers camlp4-extra opam opam 4 + 5 + export OPAMYES=1 6 + opam init 7 + opam update 8 + opam install oasis ocamlfind calendar xmlm uri lwt cohttp 9 + eval `opam config env` 10 + 11 + ./configure --enable-tests 12 + make 13 + make test 14 + 15 + if [ "$TRAVIS_REPO_SLUG" == "Cumulus/Syndic" ] \ 16 + && [ "$TRAVIS_PULL_REQUEST" == "false" ] \ 17 + && [ "$TRAVIS_BRANCH" == "master" ]; then 18 + 19 + echo -e "Publishing ocamldoc...\n" 20 + 21 + git config --global user.email "travis@travis-ci.org" 22 + git config --global user.name "travis-ci" 23 + git clone https://${GH_TOKEN}@github.com/Cumulus/Syndic .documentation 24 + 25 + cd .documentation 26 + git fetch 27 + git checkout gh-pages 28 + git merge master --commit -m "Merge master into gh-pages" 29 + 30 + ./configure 31 + make doc 32 + 33 + git add -f doc/ 34 + 35 + if [ -n "$(git status --untracked-files=no --porcelain)" ]; then 36 + git commit -m "Update documentation $TRAVIS_BUILD_NUMBER" 37 + git push -fq origin gh-pages 38 + fi 39 + 40 + echo -e "Published ocamldoc to gh-pages.\n" 41 + fi

+13

stack/syndic/.travis.yml

··· 1 + language: c 2 + install: 3 + - wget https://raw.githubusercontent.com/ocaml/ocaml-travisci-skeleton/master/.travis-opam.sh 4 + script: bash -ex .travis-opam.sh 5 + sudo: true 6 + env: 7 + matrix: 8 + - PACKAGE="syndic" OCAML_VERSION=4.04 TESTS=true 9 + - PACKAGE="syndic" OCAML_VERSION=4.05 TESTS=true 10 + - PACKAGE="syndic" OCAML_VERSION=4.06 TESTS=true 11 + - PACKAGE="syndic" OCAML_VERSION=4.07 TESTS=true 12 + global: 13 + secure: P2npPkd5gMklBsWxF9fG22BapaeOxvQK/W2IVcjgtv9mYqp66a3qhKNks6vEgc57AFafNV0kwlmwv+DgA0KOOiC0fQwgR7rPYsYje9J1FJ+0K+SFqJsQweTSWCscEweh0dthNtchEOXyf0A58p9du67y4yA+1la1NYAl+Je7P5s=

+22

stack/syndic/CHANGES.md

··· 1 + v1.7.0 05-11-2025 Paris (France) 2 + -------------------------------- 3 + 4 + * Fix categories on RSS2 (@yawaramin, @dinosaure, #87) 5 + 6 + v1.6.1 05-20-2019 Mons (Belgium) 7 + -------------------------------- 8 + 9 + * `Syndic.Date.of_rfc822` accepts dates such as “May 15th, 2019”. 10 + 11 + v1.6.0 01-21-2019 Paris (France) 12 + -------------------------------- 13 + 14 + * Lost support of OCaml 4.03.0 15 + * Support of OCaml 4.07.0 16 + * Fix tests 17 + * Move to `ocurl` 18 + * Add materials to test in distribution 19 + * Accept entries with empty author (@thomas-huet) 20 + * Dunify project 21 + * Added Z timezone to comply with RFC 822 and reality (@wictory) 22 + * Accept empty CDATA in copyright field (@sgrove, @dinosaure)

+20

stack/syndic/LICENSE

··· 1 + The MIT License (MIT) 2 + 3 + Copyright (c) 2014 Cumulus 4 + 5 + Permission is hereby granted, free of charge, to any person obtaining a copy of 6 + this software and associated documentation files (the "Software"), to deal in 7 + the Software without restriction, including without limitation the rights to 8 + use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 + the Software, and to permit persons to whom the Software is furnished to do so, 10 + subject to the following conditions: 11 + 12 + The above copyright notice and this permission notice shall be included in all 13 + copies or substantial portions of the Software. 14 + 15 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 + FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 + COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+19

stack/syndic/README.md

··· 1 + Syndic 2 + ====== 3 + 4 + RSS and Atom feed parsing 5 + 6 + Documentation 7 + ============= 8 + 9 + You can find it [here](http://cumulus.github.io/Syndic/). 10 + 11 + Build Requirements 12 + ================== 13 + 14 + * OCaml >= 4.01.0 15 + * Ptime >= 0.8.0 16 + * Xmlm >= 1.2.0 17 + * Uri >= 1.3.1 18 + 19 + [![Build Status](https://travis-ci.org/Cumulus/Syndic.svg?branch=master)](https://travis-ci.org/Cumulus/Syndic)

+3

stack/syndic/bin/dune

··· 1 + (executable 2 + (name main) 3 + (libraries syndic))

+12

stack/syndic/bin/main.ml

··· 1 + open Syndic 2 + 3 + let () = 4 + Printexc.record_backtrace true ; 5 + try 6 + let lst, _ = W3C.parse (Xmlm.make_input (`Channel stdin)) in 7 + List.iter 8 + (fun (_, err) -> Printf.printf "E: %s\n%!" err) 9 + (List.map Syndic.W3C.to_error lst) 10 + with 11 + | W3C.Error.Error ((l, c), err) -> Printf.printf "[%d;%d]: %s\n%!" l c err 12 + | _ -> Printexc.print_backtrace stderr

+109

stack/syndic/doc/style.css

··· 1 + /* A style for ocamldoc. Daniel C. Buenzli */ 2 + 3 + /* Reset a few things. */ 4 + html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre, 5 + a,abbr,acronym,address,big,cite,code,del,dfn,em,font,img,ins,kbd,q,s,samp, 6 + small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset, 7 + form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td 8 + { margin: 0; padding: 0; border: 0 none; outline: 0; font-size: 100%; 9 + font-weight: inherit; font-style:inherit; font-family:inherit; 10 + line-height: inherit; vertical-align: baseline; text-align:inherit; 11 + color:inherit; background: transparent; } 12 + 13 + table { border-collapse: collapse; border-spacing: 0; } 14 + 15 + /* Basic page layout */ 16 + 17 + body { font: normal 10pt/1.375em helvetica, arial, sans-serif; text-align:left; 18 + margin: 1.375em 10%; min-width: 40ex; max-width: 72ex; 19 + color: black; background: white /* url(line-height-22.gif) */; } 20 + 21 + b { font-weight: bold } 22 + em { font-style: italic } 23 + 24 + tt, code, pre { font-family: WorkAroundWebKitAndMozilla, monospace; 25 + font-size: 1em; } 26 + pre code { font-size : inherit; } 27 + .codepre { margin-bottom:1.375em /* after code example we introduce space. */ } 28 + 29 + .superscript,.subscript 30 + { font-size : 0.813em; line-height:0; margin-left:0.4ex;} 31 + .superscript { vertical-align: super; } 32 + .subscript { vertical-align: sub; } 33 + 34 + /* ocamldoc markup workaround hacks */ 35 + 36 + 37 + 38 + hr, hr + br, div + br, center + br, span + br, ul + br, ol + br, pre + br 39 + { display: none } /* annoying */ 40 + 41 + div.info + br { display:block} 42 + 43 + .codepre br + br { display: none } 44 + h1 + pre { margin-bottom:1.375em} /* Toplevel module description */ 45 + 46 + /* Sections and document divisions */ 47 + 48 + /* .navbar { margin-bottom: -1.375em } */ 49 + h1 { font-weight: bold; font-size: 1.5em; /* margin-top:1.833em; */ 50 + margin-top:0.917em; padding-top:0.875em; 51 + border-top-style:solid; border-width:1px; border-color:#AAA; } 52 + h2 { font-weight: bold; font-size: 1.313em; margin-top: 1.048em } 53 + h3 { font-weight: bold; font-size: 1.125em; margin-top: 1.222em } 54 + h3 { font-weight: bold; font-size: 1em; margin-top: 1.375em} 55 + h4 { font-style: italic; } 56 + 57 + /* Used by OCaml's own library documentation. */ 58 + h6 { font-weight: bold; font-size: 1.125em; margin-top: 1.222em } 59 + .h7 { font-weight: bold; font-size: 1em; margin-top: 1.375em } 60 + 61 + p { margin-top: 1.375em } 62 + pre { margin-top: 1.375em } 63 + .info { margin: 0.458em 0em -0.458em 2em;}/* Description of types values etc. */ 64 + td .info { margin:0; padding:0; margin-left: 2em;} /* Description in indexes */ 65 + 66 + ul, ol { margin-top:0.688em; padding-bottom:0.687em; 67 + list-style-position:outside} 68 + ul + p, ol + p { margin-top: 0em } 69 + ul { list-style-type: square } 70 + 71 + 72 + /* h2 + ul, h3 + ul, p + ul { } */ 73 + ul > li { margin-left: 1.375em; } 74 + ol > li { margin-left: 1.7em; } 75 + /* Links */ 76 + 77 + a, a:link, a:visited, a:active, a:hover { color : #00B; text-decoration: none } 78 + a:hover { text-decoration : underline } 79 + *:target {background-color: #FFFF99;} /* anchor highlight */ 80 + 81 + /* Code */ 82 + 83 + .keyword { font-weight: bold; } 84 + .comment { color : red } 85 + .constructor { color : green } 86 + .string { color : brown } 87 + .warning { color : red ; font-weight : bold } 88 + 89 + /* Functors */ 90 + 91 + .paramstable { border-style : hidden ; padding-bottom:1.375em} 92 + .paramstable code { margin-left: 1ex; margin-right: 1ex } 93 + .sig_block {margin-left: 1em} 94 + 95 + /* Images */ 96 + 97 + img { margin-top: 1.375em; display:block } 98 + li img { margin-top: 0em; } 99 + 100 + 101 + 102 + 103 + 104 + 105 + 106 + 107 + 108 + 109 +

+3

stack/syndic/dune-project

··· 1 + (lang dune 1.0) 2 + (name syndic) 3 + (version v1.7.0)

+26

stack/syndic/examples/minimal.xml

··· 1 + <?xml version="1.0" encoding="utf-8"?> 2 + <feed xmlns="http://www.w3.org/2005/Atom" xml:base="http://example.org/"> 3 + 4 + <title>Example Feed</title> 5 + <link href="/"/> 6 + <updated>2003-12-13T18:30:02Z</updated> 7 + <author> 8 + <name>John Doe</name> 9 + </author> 10 + <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id> 11 + 12 + <entry xml:base="http://nowhere.org/"> 13 + <title>Atom-Powered Robots Run Amok</title> 14 + <link href="2003/12/13/atom03"/> 15 + <id>nowhere.org/2003/12/13/atom03</id> 16 + <updated>2003-12-13T18:30:02Z</updated> 17 + <summary>Some text.</summary> 18 + </entry> 19 + 20 + <entry> 21 + <title>A B C</title> 22 + <link xml:base="http://a.b.c/" href="2009/"/> 23 + <id>A.B.C.2009</id> 24 + <updated>2003-12-13T18:30:02Z</updated> 25 + </entry> 26 + </feed>

+13

stack/syndic/lib/conf/conf.ml

··· 1 + let () = 2 + let version, homepage, output = 3 + match Sys.argv with 4 + | [|_; "--version"; version; "--homepage"; homepage; "-o"; output|] -> 5 + (version, homepage, output) 6 + | _ -> 7 + invalid_arg 8 + "%s --version ${VERSION} --homepage ${HOMEPAGE} -o <output>" 9 + in 10 + let oc = open_out output in 11 + Printf.fprintf oc "let version = \"%s\" and homepage = Uri.of_string \"%s\"" 12 + version homepage ; 13 + close_out oc

+9

stack/syndic/lib/dune

··· 1 + (library 2 + (name syndic) 3 + (public_name syndic) 4 + (libraries xmlm uri ptime)) 5 + 6 + (rule 7 + (targets syndic_conf.ml) 8 + (deps (:conf conf/conf.ml)) 9 + (action (run %{ocaml} %{conf} --version ${version:syndic} --homepage https://ocaml.org/ -o %{targets})))

+7

stack/syndic/lib/syndic.ml

··· 1 + module Rss1 = Syndic_rss1 2 + module Rss2 = Syndic_rss2 3 + module Atom = Syndic_atom 4 + module Opml1 = Syndic_opml1 5 + module W3C = Syndic_w3c 6 + module Date = Syndic_date 7 + module XML = Syndic_xml

+1605

stack/syndic/lib/syndic_atom.ml

··· 1 + open Syndic_common.XML 2 + open Syndic_common.Util 3 + module XML = Syndic_xml 4 + module Error = Syndic_error 5 + module Date = Syndic_date 6 + 7 + let atom_ns = "http://www.w3.org/2005/Atom" 8 + let xhtml_ns = "http://www.w3.org/1999/xhtml" 9 + let namespaces = [atom_ns] 10 + 11 + type rel = Alternate | Related | Self | Enclosure | Via | Link of Uri.t 12 + 13 + type link = 14 + { href: Uri.t 15 + ; rel: rel 16 + ; type_media: string option 17 + ; hreflang: string option 18 + ; title: string 19 + ; length: int option } 20 + 21 + let link ?type_media ?hreflang ?(title = "") ?length ?(rel = Alternate) href = 22 + {href; rel; type_media; hreflang; title; length} 23 + 24 + type link' = 25 + [ `HREF of Uri.t 26 + | `Rel of string 27 + | `Type of string 28 + | `HREFLang of string 29 + | `Title of string 30 + | `Length of string ] 31 + 32 + (* The actual XML content is supposed to be inside a <div> which is NOT part of 33 + the content. *) 34 + let rec get_xml_content xml0 = function 35 + | XML.Data (_, s) :: tl -> 36 + if only_whitespace s then get_xml_content xml0 tl 37 + else xml0 (* unexpected *) 38 + | XML.Node (_pos, tag, data) :: tl when tag_is tag "div" -> 39 + let is_space = 40 + List.for_all 41 + (function XML.Data (_, s) -> only_whitespace s | _ -> false) 42 + tl 43 + in 44 + if is_space then data else xml0 45 + | _ -> xml0 46 + 47 + let no_namespace = Some "" 48 + let rm_namespace _ = no_namespace 49 + 50 + (* For HTML, the spec says the whole content needs to be escaped 51 + http://tools.ietf.org/html/rfc4287#section-3.1.1.2 (some feeds use <![CDATA[ 52 + ]]>) so a single data item should be present. If not, assume the HTML was 53 + properly parsed and convert it back to a string as it should. *) 54 + let get_html_content html = 55 + match html with 56 + | [XML.Data (_, d)] -> d 57 + | h -> 58 + (* It is likely that, when the HTML was parsed, the Atom namespace was 59 + applied. Remove it. *) 60 + String.concat "" (List.map (XML.to_string ~ns_prefix:rm_namespace) h) 61 + 62 + type text_construct = 63 + | Text of string 64 + | Html of Uri.t option * string 65 + | Xhtml of Uri.t option * XML.t list 66 + 67 + let text_construct_of_xml ~xmlbase 68 + ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) = 69 + let xmlbase = xmlbase_of_attr ~xmlbase attr in 70 + match find (fun a -> attr_is a "type") attr with 71 + | Some (_, "html") -> Html (xmlbase, get_html_content data) 72 + | Some (_, "application/xhtml+xml") | Some (_, "xhtml") -> 73 + Xhtml (xmlbase, get_xml_content data data) 74 + | _ -> Text (get_leaf data) 75 + 76 + type author = {name: string; uri: Uri.t option; email: string option} 77 + 78 + let empty_author = {name= ""; uri= None; email= None} 79 + let not_empty_author a = a.name <> "" || a.uri <> None || a.email <> None 80 + let author ?uri ?email name = {uri; email; name} 81 + 82 + type person' = [`Name of string | `URI of Uri.t | `Email of string] 83 + 84 + let make_person datas ~pos:_ (l : [< person'] list) = 85 + (* element atom:name { text } *) 86 + let name = 87 + match find (function `Name _ -> true | _ -> false) l with 88 + | Some (`Name s) -> s 89 + | _ -> 90 + (* The spec mandates that <author><name>name</name></author> but 91 + several feeds just do <author>name</author> *) 92 + get_leaf datas 93 + in 94 + (* element atom:uri { atomUri }? *) 95 + let uri = 96 + match find (function `URI _ -> true | _ -> false) l with 97 + | Some (`URI u) -> Some u 98 + | _ -> None 99 + in 100 + (* element atom:email { atomEmailAddress }? *) 101 + let email = 102 + match find (function `Email _ -> true | _ -> false) l with 103 + | Some (`Email e) -> Some e 104 + | _ -> None 105 + in 106 + ({name; uri; email} : author) 107 + 108 + let make_author datas ~pos a = `Author (make_person datas ~pos a) 109 + 110 + let person_name_of_xml ~xmlbase:_ (_pos, _tag, datas) = 111 + `Name (try get_leaf datas with Not_found -> "") 112 + 113 + (* mandatory ? *) 114 + 115 + let person_uri_of_xml ~xmlbase (pos, _tag, datas) = 116 + try `URI (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 117 + with Not_found -> 118 + raise 119 + (Error.Error (pos, "The content of <uri> MUST be a non-empty string")) 120 + 121 + let person_email_of_xml ~xmlbase:_ (_pos, _tag, datas) = 122 + `Email (try get_leaf datas with Not_found -> "") 123 + 124 + (* mandatory ? *) 125 + 126 + (* {[ atomAuthor = element atom:author { atomPersonConstruct } ]} where 127 + 128 + atomPersonConstruct = atomCommonAttributes, (element atom:name { text } & 129 + element atom:uri { atomUri }? & element atom:email { atomEmailAddress }? & 130 + extensionElement * ) 131 + 132 + This specification assigns no significance to the order of appearance of the 133 + child elements in a Person construct. *) 134 + let person_data_producer = 135 + [ ("name", person_name_of_xml) 136 + ; ("uri", person_uri_of_xml) 137 + ; ("email", person_email_of_xml) ] 138 + 139 + let author_of_xml ~xmlbase ((_, _, datas) as xml) = 140 + generate_catcher ~namespaces ~data_producer:person_data_producer 141 + (make_author datas) ~xmlbase xml 142 + 143 + type uri = Uri.t option * string 144 + type person = [`Email of string | `Name of string | `URI of uri] list 145 + 146 + let person_data_producer' = 147 + [ ("name", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Name a)) 148 + ; ("uri", dummy_of_xml ~ctor:(fun ~xmlbase a -> `URI (xmlbase, a))) 149 + ; ("email", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Email a)) ] 150 + 151 + let author_of_xml' = 152 + generate_catcher ~namespaces ~data_producer:person_data_producer' 153 + (fun ~pos:_ x -> `Author x ) 154 + 155 + type category = {term: string; scheme: Uri.t option; label: string option} 156 + 157 + let category ?scheme ?label term = {scheme; label; term} 158 + 159 + type category' = [`Term of string | `Scheme of Uri.t | `Label of string] 160 + 161 + let make_category ~pos (l : [< category'] list) = 162 + (* attribute term { text } *) 163 + let term = 164 + match find (function `Term _ -> true | _ -> false) l with 165 + | Some (`Term t) -> t 166 + | _ -> 167 + raise 168 + (Error.Error (pos, "Category elements MUST have a 'term' attribute")) 169 + in 170 + (* attribute scheme { atomUri }? *) 171 + let scheme = 172 + match find (function `Scheme _ -> true | _ -> false) l with 173 + | Some (`Scheme u) -> Some u 174 + | _ -> None 175 + in 176 + (* attribute label { text }? *) 177 + let label = 178 + match find (function `Label _ -> true | _ -> false) l with 179 + | Some (`Label l) -> Some l 180 + | _ -> None 181 + in 182 + `Category ({term; scheme; label} : category) 183 + 184 + let scheme_of_xml ~xmlbase a = `Scheme (XML.resolve ~xmlbase (Uri.of_string a)) 185 + 186 + (* atomCategory = element atom:category { atomCommonAttributes, attribute term 187 + { text }, attribute scheme { atomUri }?, attribute label { text }?, 188 + undefinedContent } *) 189 + let category_attr_producer = 190 + [ ("term", fun ~xmlbase:_ a -> `Term a) 191 + ; ("label", fun ~xmlbase:_ a -> `Label a) ] 192 + 193 + let category_of_xml = 194 + let attr_producer = ("scheme", scheme_of_xml) :: category_attr_producer in 195 + generate_catcher ~attr_producer make_category 196 + 197 + let category_of_xml' = 198 + let attr_producer = 199 + ("scheme", fun ~xmlbase:_ a -> `Scheme a) :: category_attr_producer 200 + in 201 + generate_catcher ~attr_producer (fun ~pos:_ x -> `Category x) 202 + 203 + let make_contributor datas ~pos a = `Contributor (make_person datas ~pos a) 204 + 205 + let contributor_of_xml ~xmlbase ((_, _, datas) as xml) = 206 + generate_catcher ~namespaces ~data_producer:person_data_producer 207 + (make_contributor datas) ~xmlbase xml 208 + 209 + let contributor_of_xml' = 210 + generate_catcher ~namespaces ~data_producer:person_data_producer' 211 + (fun ~pos:_ x -> `Contributor x ) 212 + 213 + type generator = {version: string option; uri: Uri.t option; content: string} 214 + 215 + let generator ?uri ?version content = {uri; version; content} 216 + 217 + type generator' = [`URI of Uri.t | `Version of string | `Content of string] 218 + 219 + let make_generator ~pos (l : [< generator'] list) = 220 + (* text *) 221 + let content = 222 + match find (function `Content _ -> true | _ -> false) l with 223 + | Some (`Content c) -> c 224 + | _ -> 225 + raise 226 + (Error.Error 227 + (pos, "The content of <generator> MUST be a non-empty string")) 228 + in 229 + (* attribute version { text }? *) 230 + let version = 231 + match find (function `Version _ -> true | _ -> false) l with 232 + | Some (`Version v) -> Some v 233 + | _ -> None 234 + in 235 + (* attribute uri { atomUri }? *) 236 + let uri = 237 + match find (function `URI _ -> true | _ -> false) l with 238 + | Some (`URI u) -> Some u 239 + | _ -> None 240 + in 241 + `Generator ({version; uri; content} : generator) 242 + 243 + (* URI, if present, MUST be an IRI reference [RFC3987]. The definition of "IRI" 244 + excludes relative references but we resolve it anyway in case this is not 245 + respected by the generator. *) 246 + let generator_uri_of_xml ~xmlbase a = 247 + `URI (XML.resolve ~xmlbase (Uri.of_string a)) 248 + 249 + (* atomGenerator = element atom:generator { atomCommonAttributes, attribute uri 250 + { atomUri }?, attribute version { text }?, text } *) 251 + let generator_of_xml = 252 + let attr_producer = 253 + [("version", fun ~xmlbase:_ a -> `Version a); ("uri", generator_uri_of_xml)] 254 + in 255 + let leaf_producer ~xmlbase:_ _pos data = `Content data in 256 + generate_catcher ~attr_producer ~leaf_producer make_generator 257 + 258 + let generator_of_xml' = 259 + let attr_producer = 260 + [ ("version", fun ~xmlbase:_ a -> `Version a) 261 + ; ("uri", fun ~xmlbase a -> `URI (xmlbase, a)) ] 262 + in 263 + let leaf_producer ~xmlbase:_ _pos data = `Content data in 264 + generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Generator x) 265 + 266 + type icon = Uri.t 267 + 268 + let make_icon ~pos (l : Uri.t list) = 269 + (* (atomUri) *) 270 + let uri = 271 + match l with 272 + | u :: _ -> u 273 + | [] -> 274 + raise 275 + (Error.Error (pos, "The content of <icon> MUST be a non-empty string")) 276 + in 277 + `Icon uri 278 + 279 + (* atomIcon = element atom:icon { atomCommonAttributes, } *) 280 + let icon_of_xml = 281 + let leaf_producer ~xmlbase _pos data = 282 + XML.resolve ~xmlbase (Uri.of_string data) 283 + in 284 + generate_catcher ~leaf_producer make_icon 285 + 286 + let icon_of_xml' = 287 + let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in 288 + generate_catcher ~leaf_producer (fun ~pos:_ x -> `Icon x) 289 + 290 + type id = Uri.t 291 + 292 + let make_id ~pos (l : string list) = 293 + (* (atomUri) *) 294 + let id = 295 + match l with 296 + | u :: _ -> Uri.of_string u 297 + | [] -> 298 + raise 299 + (Error.Error (pos, "The content of <id> MUST be a non-empty string")) 300 + in 301 + `ID id 302 + 303 + (* atomId = element atom:id { atomCommonAttributes, (atomUri) } *) 304 + let id_of_xml, id_of_xml' = 305 + let leaf_producer ~xmlbase:_ _pos data = data in 306 + ( generate_catcher ~leaf_producer make_id 307 + , generate_catcher ~leaf_producer (fun ~pos:_ x -> `ID x) ) 308 + 309 + let rel_of_string s = 310 + match String.lowercase_ascii (String.trim s) with 311 + | "alternate" -> Alternate 312 + | "related" -> Related 313 + | "self" -> Self 314 + | "enclosure" -> Enclosure 315 + | "via" -> Via 316 + | uri -> 317 + (* RFC 4287 § 4.2.7.2: the use of a relative reference other than a 318 + simple name is not allowed. Thus no need to resolve against xml:base. *) 319 + Link (Uri.of_string uri) 320 + 321 + let make_link ~pos (l : [< link'] list) = 322 + (* attribute href { atomUri } *) 323 + let href = 324 + match find (function `HREF _ -> true | _ -> false) l with 325 + | Some (`HREF u) -> u 326 + | _ -> 327 + raise (Error.Error (pos, "Link elements MUST have a 'href' attribute")) 328 + in 329 + (* attribute rel { atomNCName | atomUri }? *) 330 + let rel = 331 + match find (function `Rel _ -> true | _ -> false) l with 332 + | Some (`Rel r) -> rel_of_string r 333 + | _ -> Alternate 334 + (* cf. RFC 4287 § 4.2.7.2 *) 335 + in 336 + (* attribute type { atomMediaType }? *) 337 + let type_media = 338 + match find (function `Type _ -> true | _ -> false) l with 339 + | Some (`Type t) -> Some t 340 + | _ -> None 341 + in 342 + (* attribute hreflang { atomLanguageTag }? *) 343 + let hreflang = 344 + match find (function `HREFLang _ -> true | _ -> false) l with 345 + | Some (`HREFLang l) -> Some l 346 + | _ -> None 347 + in 348 + (* attribute title { text }? *) 349 + let title = 350 + match find (function `Title _ -> true | _ -> false) l with 351 + | Some (`Title s) -> s 352 + | _ -> "" 353 + in 354 + (* attribute length { text }? *) 355 + let length = 356 + match find (function `Length _ -> true | _ -> false) l with 357 + | Some (`Length i) -> Some (int_of_string i) 358 + | _ -> None 359 + in 360 + `Link ({href; rel; type_media; hreflang; title; length} : link) 361 + 362 + let link_href_of_xml ~xmlbase a = 363 + `HREF (XML.resolve ~xmlbase (Uri.of_string a)) 364 + 365 + (* atomLink = element atom:link { atomCommonAttributes, attribute href { 366 + atomUri }, attribute rel { atomNCName | atomUri }?, attribute type { 367 + atomMediaType }?, attribute hreflang { atomLanguageTag }?, attribute title { 368 + text }?, attribute length { text }?, undefinedContent } *) 369 + let link_attr_producer = 370 + [ ("rel", fun ~xmlbase:_ a -> `Rel a) 371 + ; ("type", fun ~xmlbase:_ a -> `Type a) 372 + ; ("hreflang", fun ~xmlbase:_ a -> `HREFLang a) 373 + ; ("title", fun ~xmlbase:_ a -> `Title a) 374 + ; ("length", fun ~xmlbase:_ a -> `Length a) ] 375 + 376 + let link_of_xml = 377 + let attr_producer = ("href", link_href_of_xml) :: link_attr_producer in 378 + generate_catcher ~attr_producer make_link 379 + 380 + let link_of_xml' = 381 + let attr_producer = 382 + ("href", fun ~xmlbase:_ a -> `HREF a) :: link_attr_producer 383 + in 384 + generate_catcher ~attr_producer (fun ~pos:_ x -> `Link x) 385 + 386 + type logo = Uri.t 387 + 388 + let make_logo ~pos (l : Uri.t list) = 389 + (* (atomUri) *) 390 + let uri = 391 + match l with 392 + | u :: _ -> u 393 + | [] -> 394 + raise 395 + (Error.Error (pos, "The content of <logo> MUST be a non-empty string")) 396 + in 397 + `Logo uri 398 + 399 + (* atomLogo = element atom:logo { atomCommonAttributes, (atomUri) } *) 400 + let logo_of_xml = 401 + let leaf_producer ~xmlbase _pos data = 402 + XML.resolve ~xmlbase (Uri.of_string data) 403 + in 404 + generate_catcher ~leaf_producer make_logo 405 + 406 + let logo_of_xml' = 407 + let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in 408 + generate_catcher ~leaf_producer (fun ~pos:_ x -> `Logo x) 409 + 410 + type published = Date.t 411 + type published' = [`Date of string] 412 + 413 + let make_published ~pos (l : [< published'] list) = 414 + (* atom:published { atomDateConstruct } *) 415 + let date = 416 + match find (fun (`Date _) -> true) l with 417 + | Some (`Date d) -> Date.of_rfc3339 d 418 + | _ -> 419 + raise 420 + (Error.Error 421 + (pos, "The content of <published> MUST be a non-empty string")) 422 + in 423 + `Published date 424 + 425 + (* atomPublished = element atom:published { atomDateConstruct } *) 426 + let published_of_xml, published_of_xml' = 427 + let leaf_producer ~xmlbase:_ _pos data = `Date data in 428 + ( generate_catcher ~leaf_producer make_published 429 + , generate_catcher ~leaf_producer (fun ~pos:_ x -> `Published x) ) 430 + 431 + type rights = text_construct 432 + 433 + let rights_of_xml ~xmlbase a = `Rights (text_construct_of_xml ~xmlbase a) 434 + 435 + (* atomRights = element atom:rights { atomTextConstruct } *) 436 + let rights_of_xml' ~xmlbase:_ 437 + ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) = 438 + `Rights data 439 + 440 + type title = text_construct 441 + 442 + let title_of_xml ~xmlbase a = `Title (text_construct_of_xml ~xmlbase a) 443 + 444 + (* atomTitle = element atom:title { atomTextConstruct } *) 445 + let title_of_xml' ~xmlbase:_ 446 + ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) = 447 + `Title data 448 + 449 + type subtitle = text_construct 450 + 451 + let subtitle_of_xml ~xmlbase a = `Subtitle (text_construct_of_xml ~xmlbase a) 452 + 453 + (* atomSubtitle = element atom:subtitle { atomTextConstruct } *) 454 + let subtitle_of_xml' ~xmlbase:_ 455 + ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) = 456 + `Subtitle data 457 + 458 + type updated = Date.t 459 + type updated' = [`Date of string] 460 + 461 + let make_updated ~pos (l : [< updated'] list) = 462 + (* atom:updated { atomDateConstruct } *) 463 + let updated = 464 + match find (fun (`Date _) -> true) l with 465 + | Some (`Date d) -> Date.of_rfc3339 d 466 + | _ -> 467 + raise 468 + (Error.Error 469 + (pos, "The content of <updated> MUST be a non-empty string")) 470 + in 471 + `Updated updated 472 + 473 + (* atomUpdated = element atom:updated { atomDateConstruct } *) 474 + let updated_of_xml, updated_of_xml' = 475 + let leaf_producer ~xmlbase:_ _pos data = `Date data in 476 + ( generate_catcher ~leaf_producer make_updated 477 + , generate_catcher ~leaf_producer (fun ~pos:_ x -> `Updated x) ) 478 + 479 + type source = 480 + { authors: author list 481 + ; categories: category list 482 + ; contributors: author list 483 + ; generator: generator option 484 + ; icon: icon option 485 + ; id: id 486 + ; links: link list 487 + ; logo: logo option 488 + ; rights: rights option 489 + ; subtitle: subtitle option 490 + ; title: title 491 + ; updated: updated option } 492 + 493 + let source ?(categories = []) ?(contributors = []) ?generator ?icon 494 + ?(links = []) ?logo ?rights ?subtitle ?updated ~authors ~id title = 495 + { authors 496 + ; categories 497 + ; contributors 498 + ; generator 499 + ; icon 500 + ; id 501 + ; links 502 + ; logo 503 + ; rights 504 + ; subtitle 505 + ; title 506 + ; updated } 507 + 508 + type source' = 509 + [ `Author of author 510 + | `Category of category 511 + | `Contributor of author 512 + | `Generator of generator 513 + | `Icon of icon 514 + | `ID of id 515 + | `Link of link 516 + | `Logo of logo 517 + | `Subtitle of subtitle 518 + | `Title of title 519 + | `Rights of rights 520 + | `Updated of updated ] 521 + 522 + let make_source ~pos (l : [< source'] list) = 523 + (* atomAuthor* *) 524 + let authors = 525 + List.fold_left 526 + (fun acc -> function `Author x -> x :: acc | _ -> acc) 527 + [] l 528 + in 529 + (* atomCategory* *) 530 + let categories = 531 + List.fold_left 532 + (fun acc -> function `Category x -> x :: acc | _ -> acc) 533 + [] l 534 + in 535 + (* atomContributor* *) 536 + let contributors = 537 + List.fold_left 538 + (fun acc -> function `Contributor x -> x :: acc | _ -> acc) 539 + [] l 540 + in 541 + (* atomGenerator? *) 542 + let generator = 543 + match find (function `Generator _ -> true | _ -> false) l with 544 + | Some (`Generator g) -> Some g 545 + | _ -> None 546 + in 547 + (* atomIcon? *) 548 + let icon = 549 + match find (function `Icon _ -> true | _ -> false) l with 550 + | Some (`Icon u) -> Some u 551 + | _ -> None 552 + in 553 + (* atomId? *) 554 + let id = 555 + match find (function `ID _ -> true | _ -> false) l with 556 + | Some (`ID i) -> i 557 + | _ -> 558 + raise 559 + (Error.Error 560 + (pos, "<source> elements MUST contains exactly one <id> elements")) 561 + in 562 + (* atomLink* *) 563 + let links = 564 + List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l 565 + in 566 + (* atomLogo? *) 567 + let logo = 568 + match find (function `Logo _ -> true | _ -> false) l with 569 + | Some (`Logo u) -> Some u 570 + | _ -> None 571 + in 572 + (* atomRights? *) 573 + let rights = 574 + match find (function `Rights _ -> true | _ -> false) l with 575 + | Some (`Rights r) -> Some r 576 + | _ -> None 577 + in 578 + (* atomSubtitle? *) 579 + let subtitle = 580 + match find (function `Subtitle _ -> true | _ -> false) l with 581 + | Some (`Subtitle s) -> Some s 582 + | _ -> None 583 + in 584 + (* atomTitle? *) 585 + let title = 586 + match find (function `Title _ -> true | _ -> false) l with 587 + | Some (`Title s) -> s 588 + | _ -> 589 + raise 590 + (Error.Error 591 + ( pos 592 + , "<source> elements MUST contains exactly one <title> elements" 593 + )) 594 + in 595 + (* atomUpdated? *) 596 + let updated = 597 + match find (function `Updated _ -> true | _ -> false) l with 598 + | Some (`Updated d) -> Some d 599 + | _ -> None 600 + in 601 + `Source 602 + ( { authors 603 + ; categories 604 + ; contributors 605 + ; generator 606 + ; icon 607 + ; id 608 + ; links 609 + ; logo 610 + ; rights 611 + ; subtitle 612 + ; title 613 + ; updated } 614 + : source ) 615 + 616 + (* atomSource = element atom:source { atomCommonAttributes, (atomAuthor* & 617 + atomCategory* & atomContributor* & atomGenerator? & atomIcon? & atomId? & 618 + atomLink* & atomLogo? & atomRights? & atomSubtitle? & atomTitle? & 619 + atomUpdated? & extensionElement * ) } *) 620 + let source_of_xml = 621 + let data_producer = 622 + [ ("author", author_of_xml) 623 + ; ("category", category_of_xml) 624 + ; ("contributor", contributor_of_xml) 625 + ; ("generator", generator_of_xml) 626 + ; ("icon", icon_of_xml); ("id", id_of_xml); ("link", link_of_xml) 627 + ; ("logo", logo_of_xml); ("rights", rights_of_xml) 628 + ; ("subtitle", subtitle_of_xml) 629 + ; ("title", title_of_xml) 630 + ; ("updated", updated_of_xml) ] 631 + in 632 + generate_catcher ~namespaces ~data_producer make_source 633 + 634 + let source_of_xml' = 635 + let data_producer = 636 + [ ("author", author_of_xml') 637 + ; ("category", category_of_xml') 638 + ; ("contributor", contributor_of_xml') 639 + ; ("generator", generator_of_xml') 640 + ; ("icon", icon_of_xml'); ("id", id_of_xml'); ("link", link_of_xml') 641 + ; ("logo", logo_of_xml'); ("rights", rights_of_xml') 642 + ; ("subtitle", subtitle_of_xml') 643 + ; ("title", title_of_xml') 644 + ; ("updated", updated_of_xml') ] 645 + in 646 + generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Source x) 647 + 648 + type mime = string 649 + 650 + type content = 651 + | Text of string 652 + | Html of Uri.t option * string 653 + | Xhtml of Uri.t option * Syndic_xml.t list 654 + | Mime of mime * string 655 + | Src of mime option * Uri.t 656 + 657 + [@@@warning "-34"] 658 + 659 + type content' = [`Type of string | `SRC of string | `Data of Syndic_xml.t list] 660 + 661 + (* atomInlineTextContent = element atom:content { atomCommonAttributes, 662 + attribute type { "text" | "html" }?, (text)* } 663 + 664 + atomInlineXHTMLContent = element atom:content { atomCommonAttributes, 665 + attribute type { "xhtml" }, xhtmlDiv } 666 + 667 + atomInlineOtherContent = element atom:content { atomCommonAttributes, 668 + attribute type { atomMediaType }?, (text|anyElement)* } 669 + 670 + atomOutOfLineContent = element atom:content { atomCommonAttributes, 671 + attribute type { atomMediaType }?, attribute src { atomUri }, empty } 672 + 673 + atomContent = atomInlineTextContent | atomInlineXHTMLContent | 674 + atomInlineOtherContent | atomOutOfLineContent *) 675 + let content_of_xml ~xmlbase 676 + ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) = 677 + (* MIME ::= attribute type { "text" | "html" }? | attribute type { "xhtml" } 678 + | attribute type { atomMediaType }? *) 679 + (* attribute src { atomUri } | none If src s present, [data] MUST be empty. *) 680 + match find (fun a -> attr_is a "src") attr with 681 + | Some (_, src) -> 682 + let mime = 683 + match find (fun a -> attr_is a "type") attr with 684 + | Some (_, ty) -> Some ty 685 + | None -> None 686 + in 687 + `Content (Src (mime, XML.resolve ~xmlbase (Uri.of_string src))) 688 + | None -> 689 + (* (text)* 690 + * | xhtmlDiv 691 + * | (text|anyElement)* 692 + * | none *) 693 + `Content 694 + ( match find (fun a -> attr_is a "type") attr with 695 + | Some (_, "text") | None -> Text (get_leaf data) 696 + | Some (_, "html") -> Html (xmlbase, get_html_content data) 697 + | Some (_, "xhtml") -> Xhtml (xmlbase, get_xml_content data data) 698 + | Some (_, mime) -> Mime (mime, get_leaf data) ) 699 + 700 + let content_of_xml' ~xmlbase:_ 701 + ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) = 702 + let l = 703 + match find (fun a -> attr_is a "src") attr with 704 + | Some (_, src) -> [`SRC src] 705 + | None -> [] 706 + in 707 + let l = 708 + match find (fun a -> attr_is a "type") attr with 709 + | Some (_, ty) -> `Type ty :: l 710 + | None -> l 711 + in 712 + `Content (`Data data :: l) 713 + 714 + type summary = text_construct 715 + 716 + (* atomSummary = element atom:summary { atomTextConstruct } *) 717 + let summary_of_xml ~xmlbase a = `Summary (text_construct_of_xml ~xmlbase a) 718 + 719 + let summary_of_xml' ~xmlbase:_ ((_, (_, _), data) : XML.pos * XML.tag * t list) 720 + = 721 + `Summary data 722 + 723 + type entry = 724 + { authors: author * author list 725 + ; categories: category list 726 + ; content: content option 727 + ; contributors: author list 728 + ; id: id 729 + ; links: link list 730 + ; published: published option 731 + ; rights: rights option 732 + ; source: source option 733 + ; summary: summary option 734 + ; title: title 735 + ; updated: updated } 736 + 737 + let entry ?(categories = []) ?content ?(contributors = []) ?(links = []) 738 + ?published ?rights ?source ?summary ~id ~authors ~title ~updated () = 739 + { authors 740 + ; categories 741 + ; content 742 + ; contributors 743 + ; id 744 + ; links 745 + ; published 746 + ; rights 747 + ; source 748 + ; summary 749 + ; title 750 + ; updated } 751 + 752 + type entry' = 753 + [ `Author of author 754 + | `Category of category 755 + | `Contributor of author 756 + | `ID of id 757 + | `Link of link 758 + | `Published of published 759 + | `Rights of rights 760 + | `Source of source 761 + | `Content of content 762 + | `Summary of summary 763 + | `Title of title 764 + | `Updated of updated ] 765 + 766 + module LinkOrder : Set.OrderedType with type t = string * string = struct 767 + type t = string * string 768 + 769 + let compare (a : t) (b : t) = 770 + match compare (fst a) (fst b) with 0 -> compare (snd a) (snd b) | n -> n 771 + end 772 + 773 + module LinkSet = Set.Make (LinkOrder) 774 + 775 + let uniq_link_alternate ~pos (l : link list) = 776 + let string_of_duplicate_link {href; type_media; hreflang; _} 777 + (type_media', hreflang') = 778 + let ty = (function Some a -> a | None -> "(none)") type_media in 779 + let hl = (function Some a -> a | None -> "(none)") hreflang in 780 + let ty' = (function "" -> "(none)" | s -> s) type_media' in 781 + let hl' = (function "" -> "(none)" | s -> s) hreflang' in 782 + Printf.sprintf 783 + "Duplicate link between <link href=\"%s\" hreflang=\"%s\" type=\"%s\" \ 784 + ..> and <link hreflang=\"%s\" type=\"%s\" ..>" 785 + (Uri.to_string href) hl ty hl' ty' 786 + in 787 + let raise_error link link' = 788 + raise (Error.Error (pos, string_of_duplicate_link link link')) 789 + in 790 + let rec aux acc = function 791 + | [] -> l 792 + | ({rel; type_media= Some ty; hreflang= Some hl; _} as x) :: r 793 + when rel = Alternate -> 794 + if LinkSet.mem (ty, hl) acc then 795 + raise_error x (LinkSet.find (ty, hl) acc) 796 + else aux (LinkSet.add (ty, hl) acc) r 797 + | ({rel; type_media= None; hreflang= Some hl; _} as x) :: r 798 + when rel = Alternate -> 799 + if LinkSet.mem ("", hl) acc then 800 + raise_error x (LinkSet.find ("", hl) acc) 801 + else aux (LinkSet.add ("", hl) acc) r 802 + | ({rel; type_media= Some ty; hreflang= None; _} as x) :: r 803 + when rel = Alternate -> 804 + if LinkSet.mem (ty, "") acc then 805 + raise_error x (LinkSet.find (ty, "") acc) 806 + else aux (LinkSet.add (ty, "") acc) r 807 + | ({rel; type_media= None; hreflang= None; _} as x) :: r 808 + when rel = Alternate -> 809 + if LinkSet.mem ("", "") acc then 810 + raise_error x (LinkSet.find ("", "") acc) 811 + else aux (LinkSet.add ("", "") acc) r 812 + | _ :: r -> aux acc r 813 + in 814 + aux LinkSet.empty l 815 + 816 + type feed' = 817 + [ `Author of author 818 + | `Category of category 819 + | `Contributor of author 820 + | `Generator of generator 821 + | `Icon of icon 822 + | `ID of id 823 + | `Link of link 824 + | `Logo of logo 825 + | `Rights of rights 826 + | `Subtitle of subtitle 827 + | `Title of title 828 + | `Updated of updated 829 + | `Entry of entry ] 830 + 831 + let dummy_name = "\000" 832 + 833 + let make_entry ~pos l = 834 + let authors = 835 + List.fold_left 836 + (fun acc -> function `Author x -> x :: acc | _ -> acc) 837 + [] l 838 + in 839 + (* atomSource? *) 840 + let sources = 841 + List.fold_left 842 + (fun acc -> function `Source x -> x :: acc | _ -> acc) 843 + [] l 844 + in 845 + let source = 846 + match sources with 847 + | [] -> None 848 + | [s] -> Some s 849 + | _ -> 850 + (* RFC 4287 § 4.1.2 *) 851 + let msg = 852 + "<entry> elements MUST NOT contain more than one <source> element." 853 + in 854 + raise (Error.Error (pos, msg)) 855 + in 856 + let authors = 857 + match (authors, source) with 858 + | a0 :: a, _ -> (a0, a) 859 + | [], Some (s : source) -> ( 860 + (* If an atom:entry element does not contain atom:author elements, then 861 + the atom:author elements of the contained atom:source element are 862 + considered to apply. http://tools.ietf.org/html/rfc4287#section-4.2.1 *) 863 + match s.authors with 864 + | a0 :: a -> (a0, a) 865 + | [] -> 866 + let msg = 867 + "<entry> does not contain an <author> and its <source> neither does" 868 + in 869 + raise (Error.Error (pos, msg)) ) 870 + | [], None -> ({name= dummy_name; uri= None; email= None}, []) 871 + (* unacceptable value, see fix_author below *) 872 + (* atomCategory* *) 873 + in 874 + let categories = 875 + List.fold_left 876 + (fun acc -> function `Category x -> x :: acc | _ -> acc) 877 + [] l 878 + (* atomContributor* *) 879 + in 880 + let contributors = 881 + List.fold_left 882 + (fun acc -> function `Contributor x -> x :: acc | _ -> acc) 883 + [] l 884 + in 885 + (* atomId *) 886 + let id = 887 + match find (function `ID _ -> true | _ -> false) l with 888 + | Some (`ID i) -> i 889 + | _ -> 890 + raise 891 + (Error.Error 892 + (pos, "<entry> elements MUST contains exactly one <id> elements")) 893 + (* atomLink* *) 894 + in 895 + let links = 896 + List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l 897 + in 898 + (* atomPublished? *) 899 + let published = 900 + match find (function `Published _ -> true | _ -> false) l with 901 + | Some (`Published s) -> Some s 902 + | _ -> None 903 + in 904 + (* atomRights? *) 905 + let rights = 906 + match find (function `Rights _ -> true | _ -> false) l with 907 + | Some (`Rights r) -> Some r 908 + | _ -> None 909 + in 910 + (* atomContent? *) 911 + let content = 912 + match find (function `Content _ -> true | _ -> false) l with 913 + | Some (`Content c) -> Some c 914 + | _ -> None 915 + in 916 + (* atomSummary? *) 917 + let summary = 918 + match find (function `Summary _ -> true | _ -> false) l with 919 + | Some (`Summary s) -> Some s 920 + | _ -> None 921 + in 922 + (* atomTitle *) 923 + let title = 924 + match find (function `Title _ -> true | _ -> false) l with 925 + | Some (`Title t) -> t 926 + | _ -> 927 + raise 928 + (Error.Error 929 + ( pos 930 + , "<entry> elements MUST contains exactly one <title> elements" )) 931 + in 932 + (* atomUpdated *) 933 + let updated = 934 + match find (function `Updated _ -> true | _ -> false) l with 935 + | Some (`Updated u) -> u 936 + | _ -> 937 + raise 938 + (Error.Error 939 + ( pos 940 + , "<entry> elements MUST contains exactly one <updated> elements" 941 + )) 942 + in 943 + `Entry 944 + ( pos 945 + , ( { authors 946 + ; categories 947 + ; content 948 + ; contributors 949 + ; id 950 + ; links= uniq_link_alternate ~pos links 951 + ; published 952 + ; rights 953 + ; source 954 + ; summary 955 + ; title 956 + ; updated } 957 + : entry ) ) 958 + 959 + (* atomEntry = element atom:entry { atomCommonAttributes, (atomAuthor* & 960 + atomCategory* & atomContent? & atomContributor* & atomId & atomLink* & 961 + atomPublished? & atomRights? & atomSource? & atomSummary? & atomTitle & 962 + atomUpdated & extensionElement * ) } *) 963 + let entry_of_xml = 964 + let data_producer = 965 + [ ("author", author_of_xml) 966 + ; ("category", category_of_xml) 967 + ; ("contributor", contributor_of_xml) 968 + ; ("id", id_of_xml); ("link", link_of_xml) 969 + ; ("published", published_of_xml) 970 + ; ("rights", rights_of_xml); ("source", source_of_xml) 971 + ; ("content", content_of_xml) 972 + ; ("summary", summary_of_xml) 973 + ; ("title", title_of_xml) 974 + ; ("updated", updated_of_xml) ] 975 + in 976 + generate_catcher ~namespaces ~data_producer make_entry 977 + 978 + let entry_of_xml' = 979 + let data_producer = 980 + [ ("author", author_of_xml') 981 + ; ("category", category_of_xml') 982 + ; ("contributor", contributor_of_xml') 983 + ; ("id", id_of_xml'); ("link", link_of_xml') 984 + ; ("published", published_of_xml') 985 + ; ("rights", rights_of_xml'); ("source", source_of_xml') 986 + ; ("content", content_of_xml') 987 + ; ("summary", summary_of_xml') 988 + ; ("title", title_of_xml') 989 + ; ("updated", updated_of_xml') ] 990 + in 991 + generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Entry x) 992 + 993 + type feed = 994 + { authors: author list 995 + ; categories: category list 996 + ; contributors: author list 997 + ; generator: generator option 998 + ; icon: icon option 999 + ; id: id 1000 + ; links: link list 1001 + ; logo: logo option 1002 + ; rights: rights option 1003 + ; subtitle: subtitle option 1004 + ; title: title 1005 + ; updated: updated 1006 + ; entries: entry list } 1007 + 1008 + let feed ?(authors = []) ?(categories = []) ?(contributors = []) ?generator 1009 + ?icon ?(links = []) ?logo ?rights ?subtitle ~id ~title ~updated entries = 1010 + { authors 1011 + ; categories 1012 + ; contributors 1013 + ; generator 1014 + ; icon 1015 + ; id 1016 + ; links 1017 + ; logo 1018 + ; rights 1019 + ; subtitle 1020 + ; title 1021 + ; updated 1022 + ; entries } 1023 + 1024 + let make_feed ~pos (l : _ list) = 1025 + (* atomAuthor* *) 1026 + let authors = 1027 + List.fold_left 1028 + (fun acc -> function `Author x -> x :: acc | _ -> acc) 1029 + [] l 1030 + in 1031 + (* atomCategory* *) 1032 + let categories = 1033 + List.fold_left 1034 + (fun acc -> function `Category x -> x :: acc | _ -> acc) 1035 + [] l 1036 + in 1037 + (* atomContributor* *) 1038 + let contributors = 1039 + List.fold_left 1040 + (fun acc -> function `Contributor x -> x :: acc | _ -> acc) 1041 + [] l 1042 + in 1043 + (* atomLink* *) 1044 + let links = 1045 + List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l 1046 + in 1047 + (* atomGenerator? *) 1048 + let generator = 1049 + match find (function `Generator _ -> true | _ -> false) l with 1050 + | Some (`Generator g) -> Some g 1051 + | _ -> None 1052 + in 1053 + (* atomIcon? *) 1054 + let icon = 1055 + match find (function `Icon _ -> true | _ -> false) l with 1056 + | Some (`Icon i) -> Some i 1057 + | _ -> None 1058 + in 1059 + (* atomId *) 1060 + let id = 1061 + match find (function `ID _ -> true | _ -> false) l with 1062 + | Some (`ID i) -> i 1063 + | _ -> 1064 + raise 1065 + (Error.Error 1066 + (pos, "<feed> elements MUST contains exactly one <id> elements")) 1067 + in 1068 + (* atomLogo? *) 1069 + let logo = 1070 + match find (function `Logo _ -> true | _ -> false) l with 1071 + | Some (`Logo l) -> Some l 1072 + | _ -> None 1073 + in 1074 + (* atomRights? *) 1075 + let rights = 1076 + match find (function `Rights _ -> true | _ -> false) l with 1077 + | Some (`Rights r) -> Some r 1078 + | _ -> None 1079 + in 1080 + (* atomSubtitle? *) 1081 + let subtitle = 1082 + match find (function `Subtitle _ -> true | _ -> false) l with 1083 + | Some (`Subtitle s) -> Some s 1084 + | _ -> None 1085 + in 1086 + (* atomTitle *) 1087 + let title = 1088 + match find (function `Title _ -> true | _ -> false) l with 1089 + | Some (`Title t) -> t 1090 + | _ -> 1091 + raise 1092 + (Error.Error 1093 + (pos, "<feed> elements MUST contains exactly one <title> elements")) 1094 + in 1095 + (* atomUpdated *) 1096 + let updated = 1097 + match find (function `Updated _ -> true | _ -> false) l with 1098 + | Some (`Updated u) -> u 1099 + | _ -> 1100 + raise 1101 + (Error.Error 1102 + ( pos 1103 + , "<feed> elements MUST contains exactly one <updated> elements" 1104 + )) 1105 + in 1106 + (* atomEntry* *) 1107 + let fix_author pos (e : entry) = 1108 + match e.authors with 1109 + | a, [] when a.name = dummy_name -> ( 1110 + (* In an Atom Feed Document, the atom:author elements of the containing 1111 + atom:feed element are considered to apply to the entry if there are no 1112 + atom:author elements in the locations described above. 1113 + http://tools.ietf.org/html/rfc4287#section-4.2.1 *) 1114 + match authors with 1115 + | a0 :: a -> {e with authors= (a0, a)} 1116 + | [] -> 1117 + let msg = 1118 + "<entry> elements MUST contains at least an <author> element or \ 1119 + <feed> element MUST contains one or more <author> elements" 1120 + in 1121 + raise (Error.Error (pos, msg)) ) 1122 + | _ -> e 1123 + in 1124 + let entries = 1125 + List.fold_left 1126 + (fun acc -> function `Entry (pos, e) -> fix_author pos e :: acc 1127 + | _ -> acc ) 1128 + [] l 1129 + in 1130 + ( { authors 1131 + ; categories 1132 + ; contributors 1133 + ; generator 1134 + ; icon 1135 + ; id 1136 + ; links 1137 + ; logo 1138 + ; rights 1139 + ; subtitle 1140 + ; title 1141 + ; updated 1142 + ; entries } 1143 + : feed ) 1144 + 1145 + (* atomFeed = element atom:feed { atomCommonAttributes, (atomAuthor* & 1146 + atomCategory* & atomContributor* & atomGenerator? & atomIcon? & atomId & 1147 + atomLink* & atomLogo? & atomRights? & atomSubtitle? & atomTitle & 1148 + atomUpdated & extensionElement * ), atomEntry* } *) 1149 + 1150 + let feed_of_xml = 1151 + let data_producer = 1152 + [ ("author", author_of_xml) 1153 + ; ("category", category_of_xml) 1154 + ; ("contributor", contributor_of_xml) 1155 + ; ("generator", generator_of_xml) 1156 + ; ("icon", icon_of_xml); ("id", id_of_xml); ("link", link_of_xml) 1157 + ; ("logo", logo_of_xml); ("rights", rights_of_xml) 1158 + ; ("subtitle", subtitle_of_xml) 1159 + ; ("title", title_of_xml) 1160 + ; ("updated", updated_of_xml) 1161 + ; ("entry", entry_of_xml) ] 1162 + in 1163 + generate_catcher ~namespaces ~data_producer make_feed 1164 + 1165 + let feed_of_xml' = 1166 + let data_producer = 1167 + [ ("author", author_of_xml') 1168 + ; ("category", category_of_xml') 1169 + ; ("contributor", contributor_of_xml') 1170 + ; ("generator", generator_of_xml') 1171 + ; ("icon", icon_of_xml'); ("id", id_of_xml'); ("link", link_of_xml') 1172 + ; ("logo", logo_of_xml'); ("rights", rights_of_xml') 1173 + ; ("subtitle", subtitle_of_xml') 1174 + ; ("title", title_of_xml') 1175 + ; ("updated", updated_of_xml') 1176 + ; ("entry", entry_of_xml') ] 1177 + in 1178 + generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> x) 1179 + 1180 + (* Remove all tags *) 1181 + let rec add_to_buffer buf = function 1182 + | XML.Node (_, _, subs) -> List.iter (add_to_buffer buf) subs 1183 + | XML.Data (_, d) -> Buffer.add_string buf d 1184 + 1185 + let xhtml_to_string xhtml = 1186 + let buf = Buffer.create 128 in 1187 + List.iter (add_to_buffer buf) xhtml ; 1188 + Buffer.contents buf 1189 + 1190 + let string_of_text_construct = function 1191 + (* FIXME: Once we use a proper HTML library, we probably would like to parse 1192 + the HTML and remove the tags *) 1193 + | (Text s : text_construct) | Html (_, s) -> s 1194 + | Xhtml (_, x) -> xhtml_to_string x 1195 + 1196 + let parse ?self ?xmlbase input = 1197 + let feed = 1198 + match XML.of_xmlm input |> snd with 1199 + | XML.Node (pos, tag, datas) when tag_is tag "feed" -> 1200 + feed_of_xml ~xmlbase (pos, tag, datas) 1201 + | _ -> 1202 + raise 1203 + (Error.Error 1204 + ((0, 0), "document MUST contains exactly one <feed> element")) 1205 + in 1206 + (* FIXME: the spec says that an entry can appear as the top-level element *) 1207 + match self with 1208 + | None -> feed 1209 + | Some self -> 1210 + if List.exists (fun l -> l.rel = Self) feed.links then feed 1211 + else 1212 + let links = 1213 + { href= self 1214 + ; rel= Self 1215 + ; type_media= Some "application/atom+xml" 1216 + ; hreflang= None 1217 + ; title= string_of_text_construct feed.title 1218 + ; length= None } 1219 + :: feed.links 1220 + in 1221 + {feed with links} 1222 + 1223 + let read ?self ?xmlbase fname = 1224 + let fh = open_in fname in 1225 + try 1226 + let x = parse ?self ?xmlbase (XML.input_of_channel fh) in 1227 + close_in fh ; x 1228 + with e -> close_in fh ; raise e 1229 + 1230 + let set_self_link feed ?hreflang ?length url = 1231 + match List.partition (fun l -> l.rel = Self) feed.links with 1232 + | l :: _, links -> 1233 + let hreflang = 1234 + match hreflang with None -> l.hreflang | Some _ -> hreflang 1235 + in 1236 + let length = match length with None -> l.length | Some _ -> length in 1237 + let self = {l with href= url; hreflang; length} in 1238 + {feed with links= self :: links} 1239 + | [], links -> 1240 + let links = 1241 + { href= url 1242 + ; rel= Self 1243 + ; type_media= Some "application/atom+xml" 1244 + ; hreflang 1245 + ; title= string_of_text_construct feed.title 1246 + ; length } 1247 + :: links 1248 + in 1249 + {feed with links} 1250 + 1251 + let get_self_link feed = 1252 + try Some (List.find (fun l -> l.rel = Self) feed.links) with Not_found -> 1253 + None 1254 + 1255 + let unsafe ?xmlbase input = 1256 + match XML.of_xmlm input |> snd with 1257 + | XML.Node (pos, tag, datas) when tag_is tag "feed" -> 1258 + `Feed (feed_of_xml' ~xmlbase (pos, tag, datas)) 1259 + | _ -> `Feed [] 1260 + 1261 + let remove_empty_authors a = List.filter not_empty_author a 1262 + 1263 + (* [normalize_authors a authors] returns (a', authors') where [authors'] is 1264 + [authors] where the empty authors and the author [a] have been removed and 1265 + [a'] is [a] possibly completed with the information found for [a] in 1266 + [authors]. *) 1267 + let rec normalize_authors (a : author) = function 1268 + | [] -> (a, []) 1269 + | a0 :: tl -> 1270 + if not_empty_author a0 then 1271 + if a0.name = a.name then 1272 + (* Merge [a0] and [a]. *) 1273 + let uri = match a.uri with None -> a0.uri | Some _ -> a.uri in 1274 + let email = 1275 + match a.email with None -> a0.email | Some _ -> a.email 1276 + in 1277 + normalize_authors {name= a.name; uri; email} tl 1278 + else 1279 + let a', authors' = normalize_authors a tl in 1280 + (a', a0 :: authors') 1281 + else normalize_authors a tl 1282 + 1283 + (* drop the empty author *) 1284 + 1285 + let set_main_author_entry author (e : entry) = 1286 + (* If the entry has a source, then [author] should be ignored and the one 1287 + from the [source] should be used instead. *) 1288 + let author, author_ok, source = 1289 + match e.source with 1290 + | None -> (author, true, None) 1291 + | Some s -> ( 1292 + let s_authors = remove_empty_authors s.authors in 1293 + let s_contributors = remove_empty_authors s.contributors in 1294 + let s = 1295 + Some {s with authors= s_authors; contributors= s_contributors} 1296 + in 1297 + (* A source exists. If it contains no author, one should not change the 1298 + entry authors with [author] because that may wrongly attribute the 1299 + post. *) 1300 + match s_authors with 1301 + | [] -> (author, false, s) 1302 + | s_author :: _ -> (s_author, true, s) ) 1303 + in 1304 + let a0, a = e.authors in 1305 + let authors = 1306 + match remove_empty_authors (a0 :: a) with 1307 + | a0 :: a -> (a0, a) 1308 + | [] -> ((if author_ok then author else empty_author), []) 1309 + in 1310 + let contributors = remove_empty_authors e.contributors in 1311 + {e with authors; contributors; source} 1312 + 1313 + let set_main_author feed author = 1314 + let author, authors = normalize_authors author feed.authors in 1315 + let contributors = remove_empty_authors feed.contributors in 1316 + let entries = List.map (set_main_author_entry author) feed.entries in 1317 + {feed with authors= author :: authors; contributors; entries} 1318 + 1319 + (* Conversion to XML *) 1320 + 1321 + (* Tag with the Atom namespace *) 1322 + let atom name : XML.tag = ((atom_ns, name), []) 1323 + 1324 + let add_attr_xmlbase ~xmlbase attrs = 1325 + match xmlbase with 1326 + | Some u -> ((Xmlm.ns_xml, "base"), Uri.to_string u) :: attrs 1327 + | None -> attrs 1328 + 1329 + let text_construct_to_xml tag_name (t : text_construct) = 1330 + match t with 1331 + | Text t -> 1332 + XML.Node 1333 + ( dummy_pos 1334 + , ((atom_ns, tag_name), [(("", "type"), "text")]) 1335 + , [XML.Data (dummy_pos, t)] ) 1336 + | Html (xmlbase, t) -> 1337 + let attr = add_attr_xmlbase ~xmlbase [(("", "type"), "html")] in 1338 + XML.Node 1339 + (dummy_pos, ((atom_ns, tag_name), attr), [XML.Data (dummy_pos, t)]) 1340 + | Xhtml (xmlbase, x) -> 1341 + let div = 1342 + XML.Node 1343 + (dummy_pos, ((xhtml_ns, "div"), [(("", "xmlns"), xhtml_ns)]), x) 1344 + in 1345 + let attr = add_attr_xmlbase ~xmlbase [(("", "type"), "xhtml")] in 1346 + XML.Node (dummy_pos, ((atom_ns, tag_name), attr), [div]) 1347 + 1348 + let person_to_xml name (a : author) = 1349 + XML.Node 1350 + ( dummy_pos 1351 + , atom name 1352 + , [node_data (atom "name") a.name] 1353 + |> add_node_uri (atom "uri") a.uri 1354 + |> add_node_data (atom "email") a.email ) 1355 + 1356 + let author_to_xml a = person_to_xml "author" a 1357 + let contributor_to_xml a = person_to_xml "contributor" a 1358 + 1359 + let category_to_xml (c : category) = 1360 + let attrs = 1361 + [(("", "term"), c.term)] 1362 + |> add_attr_uri ("", "scheme") c.scheme 1363 + |> add_attr ("", "label") c.label 1364 + in 1365 + XML.Node (dummy_pos, ((atom_ns, "category"), attrs), []) 1366 + 1367 + let generator_to_xml (g : generator) = 1368 + let attr = 1369 + [] |> add_attr ("", "version") g.version |> add_attr_uri ("", "uri") g.uri 1370 + in 1371 + XML.Node 1372 + ( dummy_pos 1373 + , ((atom_ns, "generator"), attr) 1374 + , [XML.Data (dummy_pos, g.content)] ) 1375 + 1376 + let string_of_rel = function 1377 + | Alternate -> "alternate" 1378 + | Related -> "related" 1379 + | Self -> "self" 1380 + | Enclosure -> "enclosure" 1381 + | Via -> "via" 1382 + | Link l -> Uri.to_string l 1383 + 1384 + let link_to_xml (l : link) = 1385 + let attr = 1386 + [(("", "href"), Uri.to_string l.href); (("", "rel"), string_of_rel l.rel)] 1387 + |> add_attr ("", "type") l.type_media 1388 + |> add_attr ("", "hreflang") l.hreflang 1389 + in 1390 + let attr = if l.title = "" then attr else (("", "title"), l.title) :: attr in 1391 + let attr = 1392 + match l.length with 1393 + | Some len -> (("", "length"), string_of_int len) :: attr 1394 + | None -> attr 1395 + in 1396 + XML.Node (dummy_pos, ((atom_ns, "link"), attr), []) 1397 + 1398 + let add_node_date tag date nodes = 1399 + match date with 1400 + | None -> nodes 1401 + | Some d -> node_data tag (Date.to_rfc3339 d) :: nodes 1402 + 1403 + let source_to_xml (s : source) = 1404 + let nodes = 1405 + node_data (atom "id") (Uri.to_string s.id) 1406 + :: text_construct_to_xml "title" s.title 1407 + :: List.map author_to_xml s.authors 1408 + |> add_nodes_rev_map category_to_xml s.categories 1409 + |> add_nodes_rev_map contributor_to_xml s.contributors 1410 + |> add_node_option generator_to_xml s.generator 1411 + |> add_node_option (node_uri (atom "icon")) s.icon 1412 + |> add_nodes_rev_map link_to_xml s.links 1413 + |> add_node_option (node_uri (atom "logo")) s.logo 1414 + |> add_node_option (text_construct_to_xml "rights") s.rights 1415 + |> add_node_option (text_construct_to_xml "subtitle") s.subtitle 1416 + |> add_node_date (atom "updated") s.updated 1417 + in 1418 + XML.Node (dummy_pos, atom "source", nodes) 1419 + 1420 + let content_to_xml (c : content) = 1421 + match c with 1422 + | Text t -> 1423 + XML.Node 1424 + ( dummy_pos 1425 + , ((atom_ns, "content"), [(("", "type"), "text")]) 1426 + , [XML.Data (dummy_pos, t)] ) 1427 + | Html (xmlbase, t) -> 1428 + let attrs = add_attr_xmlbase ~xmlbase [(("", "type"), "html")] in 1429 + XML.Node 1430 + (dummy_pos, ((atom_ns, "content"), attrs), [XML.Data (dummy_pos, t)]) 1431 + | Xhtml (xmlbase, x) -> 1432 + let div = 1433 + XML.Node 1434 + (dummy_pos, ((xhtml_ns, "div"), [(("", "xmlns"), xhtml_ns)]), x) 1435 + in 1436 + let attrs = add_attr_xmlbase ~xmlbase [(("", "type"), "xhtml")] in 1437 + XML.Node (dummy_pos, ((atom_ns, "content"), attrs), [div]) 1438 + | Mime (mime, d) -> 1439 + XML.Node 1440 + ( dummy_pos 1441 + , ((atom_ns, "content"), [(("", "type"), mime)]) 1442 + , [XML.Data (dummy_pos, d)] ) 1443 + | Src (mime, uri) -> 1444 + let attr = 1445 + [(("", "src"), Uri.to_string uri)] |> add_attr ("", "type") mime 1446 + in 1447 + XML.Node (dummy_pos, ((atom_ns, "content"), attr), []) 1448 + 1449 + let entry_to_xml (e : entry) = 1450 + let a0, a = e.authors in 1451 + let nodes = 1452 + node_data (atom "id") (Uri.to_string e.id) 1453 + :: text_construct_to_xml "title" e.title 1454 + :: node_data (atom "updated") (Date.to_rfc3339 e.updated) 1455 + :: author_to_xml a0 1456 + :: List.map author_to_xml a 1457 + |> add_nodes_rev_map category_to_xml e.categories 1458 + |> add_node_option content_to_xml e.content 1459 + |> add_nodes_rev_map contributor_to_xml e.contributors 1460 + |> add_nodes_rev_map link_to_xml e.links 1461 + |> add_node_date (atom "published") e.published 1462 + |> add_node_option (text_construct_to_xml "rights") e.rights 1463 + |> add_node_option source_to_xml e.source 1464 + |> add_node_option (text_construct_to_xml "summary") e.summary 1465 + in 1466 + XML.Node (dummy_pos, atom "entry", nodes) 1467 + 1468 + let to_xml (f : feed) = 1469 + let nodes = 1470 + node_data (atom "id") (Uri.to_string f.id) 1471 + :: text_construct_to_xml "title" f.title 1472 + :: node_data (atom "updated") (Date.to_rfc3339 f.updated) 1473 + :: List.map entry_to_xml f.entries 1474 + |> add_nodes_rev_map author_to_xml (List.rev f.authors) 1475 + |> add_nodes_rev_map category_to_xml f.categories 1476 + |> add_nodes_rev_map contributor_to_xml f.contributors 1477 + |> add_node_option generator_to_xml f.generator 1478 + |> add_node_option (node_uri (atom "icon")) f.icon 1479 + |> add_nodes_rev_map link_to_xml f.links 1480 + |> add_node_option (node_uri (atom "logo")) f.logo 1481 + |> add_node_option (text_construct_to_xml "rights") f.rights 1482 + |> add_node_option (text_construct_to_xml "subtitle") f.subtitle 1483 + in 1484 + XML.Node (dummy_pos, ((atom_ns, "feed"), [(("", "xmlns"), atom_ns)]), nodes) 1485 + 1486 + (* Atom and XHTML have been declared well in the above XML representation. One 1487 + can remove them. *) 1488 + let output_ns_prefix s = if s = atom_ns || s = xhtml_ns then Some "" else None 1489 + 1490 + let output feed dest = 1491 + let o = XML.make_output dest ~ns_prefix:output_ns_prefix in 1492 + XML.to_xmlm (to_xml feed) o 1493 + 1494 + let write feed fname = 1495 + let fh = open_out fname in 1496 + try 1497 + output feed (`Channel fh) ; 1498 + close_out fh 1499 + with e -> close_out fh ; raise e 1500 + 1501 + (* Comparing entries *) 1502 + 1503 + let entry_date e = match e.published with Some d -> d | None -> e.updated 1504 + 1505 + let ascending (e1 : entry) (e2 : entry) = 1506 + Date.compare (entry_date e1) (entry_date e2) 1507 + 1508 + let descending (e1 : entry) (e2 : entry) = 1509 + Date.compare (entry_date e2) (entry_date e1) 1510 + 1511 + (* Feed aggregation *) 1512 + 1513 + let syndic_generator = 1514 + { version= Some Syndic_conf.version 1515 + ; uri= Some Syndic_conf.homepage 1516 + ; content= "OCaml Syndic.Atom feed aggregator" } 1517 + 1518 + let ocaml_icon = Uri.of_string "http://ocaml.org/img/colour-icon-170x148.png" 1519 + let default_title : text_construct = Text "Syndic.Atom aggregated feed" 1520 + 1521 + let[@warning "-32"] is_alternate_Atom (l : link) = 1522 + match l.type_media with 1523 + | None -> false 1524 + | Some ty -> ty = "application/atom+xml" && l.rel = Alternate 1525 + 1526 + let add_entries_of_feed entries feed : entry list = 1527 + let source_of_feed = 1528 + Some 1529 + { authors= feed.authors 1530 + ; categories= feed.categories 1531 + ; contributors= feed.contributors 1532 + ; generator= feed.generator 1533 + ; icon= feed.icon 1534 + ; id= feed.id 1535 + ; links= feed.links 1536 + ; logo= feed.logo 1537 + ; rights= feed.rights 1538 + ; subtitle= feed.subtitle 1539 + ; title= feed.title 1540 + ; updated= Some feed.updated } 1541 + in 1542 + let add_entry entries (e : entry) = 1543 + match e.source with 1544 + | Some _ -> e :: entries (* if a source is present, do not overwrite it. *) 1545 + | None -> {e with source= source_of_feed} :: entries 1546 + in 1547 + List.fold_left add_entry entries feed.entries 1548 + 1549 + let entries_of_feeds feeds = List.fold_left add_entries_of_feed [] feeds 1550 + 1551 + let more_recent d1 (e : entry) = 1552 + if Date.compare d1 e.updated >= 0 then d1 else e.updated 1553 + 1554 + let aggregate ?self ?id ?updated ?subtitle ?(title = default_title) 1555 + ?(sort = `Newest_first) ?n feeds : feed = 1556 + let entries = entries_of_feeds feeds in 1557 + let entries = 1558 + match sort with 1559 + | `Newest_first -> List.sort descending entries 1560 + | `Oldest_first -> List.sort ascending entries 1561 + | `None -> entries 1562 + in 1563 + let entries = match n with Some n -> take entries n | None -> entries in 1564 + let id = 1565 + match id with 1566 + | Some id -> id 1567 + | None -> 1568 + (* Collect all ids of the entries and "digest" them. *) 1569 + let b = Buffer.create 4096 in 1570 + let add_id (e : entry) = Buffer.add_string b (Uri.to_string e.id) in 1571 + List.iter add_id entries ; 1572 + let d = Digest.to_hex (Digest.string (Buffer.contents b)) in 1573 + (* FIXME: use urn:uuid *) 1574 + Uri.of_string ("urn:md5:" ^ d) 1575 + in 1576 + let links = 1577 + match self with 1578 + | Some u -> 1579 + [ link u 1580 + ~title:(string_of_text_construct title) 1581 + ~rel:Self ~type_media:"application/atom+xml" ] 1582 + | None -> [] 1583 + in 1584 + let updated = 1585 + match updated with 1586 + | Some d -> d 1587 + | None -> ( 1588 + (* Use the more recent date of the entries. *) 1589 + match entries with 1590 + | [] -> Date.epoch 1591 + | e0 :: el -> List.fold_left more_recent e0.updated el ) 1592 + in 1593 + { authors= [] 1594 + ; categories= [] 1595 + ; contributors= [] 1596 + ; generator= Some syndic_generator 1597 + ; icon= Some ocaml_icon 1598 + ; id 1599 + ; links 1600 + ; logo= None 1601 + ; rights= None 1602 + ; subtitle 1603 + ; title 1604 + ; updated 1605 + ; entries }

+597

stack/syndic/lib/syndic_atom.mli

··· 1 + (** [Syndic.Atom]: {{: http://tools.ietf.org/html/rfc4287} RFC 4287} compliant 2 + Atom parser. *) 3 + 4 + module Error : module type of Syndic_error 5 + 6 + (** {2 Structure of Atom document} *) 7 + 8 + (** A {{:http://tools.ietf.org/html/rfc4287#section-3.1}text construct}. It 9 + contains human-readable text, usually in small quantities. The content of 10 + Text constructs is Language-Sensitive. 11 + 12 + Since the constructors [Text], [Html] or [Xhtml] are shadowed by those of 13 + the same name in the definition of {!type:content}, you may need a type 14 + annotation to disambiguate the two. *) 15 + type text_construct = 16 + | Text of string (** [Text(content)] *) 17 + | Html of Uri.t option * string 18 + (** [Html(xmlbase, content)] where the content is left unparsed. *) 19 + | Xhtml of Uri.t option * Syndic_xml.t list (** [Xhtml(xmlbase, content)] *) 20 + 21 + (** Describes a person, corporation, or similar entity (hereafter, 'person') 22 + that indicates the author of the entry or feed. {{: 23 + http://tools.ietf.org/html/rfc4287#section-3.2} See RFC 4287 § 3.2}. 24 + Person constructs allow extension Metadata elements (see {{: 25 + http://tools.ietf.org/html/rfc4287#section-6.4}Section 6.4}). 26 + 27 + They are used for authors 28 + ({{:http://tools.ietf.org/html/rfc4287#section-4.2.1} See RFC 4287 29 + § 4.2.1}) and contributors 30 + ({{:http://tools.ietf.org/html/rfc4287#section-4.2.3} See RFC 4287 31 + § 4.2.3}) *) 32 + type author = {name: string; uri: Uri.t option; email: string option} 33 + 34 + val author : ?uri:Uri.t -> ?email:string -> string -> author 35 + 36 + (** The [category] element conveys information about a category associated with 37 + an entry or feed. This specification assigns no meaning to the content (if 38 + any) of this element. {{:http://tools.ietf.org/html/rfc4287#section-4.2.2} 39 + See RFC 4287 § 4.2.2}. 40 + 41 + - [term] is a string that identifies the category to which the entry or 42 + feed belongs. {{: http://tools.ietf.org/html/rfc4287#section-4.2.2.2} See 43 + RFC 4287 § 4.2.2.2} - [scheme], if present, is an IRI that identifies a 44 + categorization scheme. {{: 45 + http://tools.ietf.org/html/rfc4287#section-4.2.2.3} See RFC 4287 § 46 + 4.2.2.3} - [label], if present, is a human-readable label for display in 47 + end-user applications. The content of the "label" attribute is 48 + Language-Sensitive. {{: http://tools.ietf.org/html/rfc4287#section-4.2.2.1} 49 + See RFC 4287 § 4.2.2.1} *) 50 + type category = {term: string; scheme: Uri.t option; label: string option} 51 + 52 + val category : ?scheme:Uri.t -> ?label:string -> string -> category 53 + 54 + (** The [generator] element's content identifies the agent used to generate a 55 + feed, for debugging and other purposes. - [content] is a human-readable 56 + name for the generating agent. - [uri], if present, SHOULD produce a 57 + representation that is relevant to that agent. - [version], if present, 58 + indicates the version of the generating agent. 59 + 60 + See {{: http://tools.ietf.org/html/rfc4287#section-4.2.4}RFC 4287 § 61 + 4.2.4}. *) 62 + type generator = {version: string option; uri: Uri.t option; content: string} 63 + 64 + val generator : ?uri:Uri.t -> ?version:string -> string -> generator 65 + 66 + (** The [icon] element's content is an IRI reference [RFC3987] that identifies 67 + an image that provides iconic visual identification for a feed. 68 + 69 + The image SHOULD have an aspect ratio of one (horizontal) to one (vertical) 70 + and SHOULD be suitable for presentation at a small size. 71 + 72 + {{:http://tools.ietf.org/html/rfc4287#section-4.2.5} See RFC 4287 § 4.2.5} *) 73 + type icon = Uri.t 74 + 75 + (** The [id] element conveys a permanent, universally unique identifier for an 76 + entry or feed. 77 + 78 + Its content MUST be an IRI, as defined by [RFC3987]. Note that the 79 + definition of "IRI" excludes relative references. Though the IRI might use 80 + a dereferencable scheme, Atom Processors MUST NOT assume it can be 81 + dereferenced. 82 + 83 + There is more information in the RFC but they are not necessary here, at 84 + least, they can not be checked here. 85 + 86 + {{: http://tools.ietf.org/html/rfc4287#section-4.2.6} See RFC 4287 § 4.2.6 87 + } *) 88 + type id = Uri.t 89 + 90 + (** Indicates the link relation type. See {{: 91 + http://tools.ietf.org/html/rfc4287#section-4.2.7.2} RFC 4287 § 4.2.7.2}. *) 92 + type rel = 93 + | Alternate 94 + (** Signifies that the URI in the value of the link [href] field 95 + identifies an alternate version of the resource described by the 96 + containing element. *) 97 + | Related 98 + (** Signifies that the URI in the value of the link [href] field 99 + identifies a resource related to the resource described by the 100 + containing element. *) 101 + | Self 102 + (** Signifies that the URI in the value of the link [href] field 103 + identifies a resource equivalent to the containing element. *) 104 + | Enclosure 105 + (** Signifies that the IRI in the value of the link [href] field 106 + identifies a related resource that is potentially large in size and 107 + might require special handling. When [Enclosure] is specified, the 108 + length attribute SHOULD be provided. *) 109 + | Via 110 + (** Signifies that the IRI in the value of the link [href] field 111 + identifies a resource that is the source of the information provided 112 + in the containing element. *) 113 + | Link of Uri.t 114 + (** The URI MUST be non-empty and match either the "isegment-nz-nc" or 115 + the "IRI" production in {{:http://tools.ietf.org/html/rfc3987} 116 + RFC3987}. Note that use of a relative reference other than a simple 117 + name is not allowed. *) 118 + 119 + (** [link] defines a reference from an entry or feed to a Web resource. See {{: 120 + http://tools.ietf.org/html/rfc4287#section-4.2.7} RFC 4287 § 4.2.7}. 121 + 122 + - [href] contains the link's IRI. The value MUST be a IRI reference, 123 + {{:http://tools.ietf.org/html/rfc3987} RFC3987}. See {{: 124 + http://tools.ietf.org/html/rfc4287#section-4.2.7.1} RFC 4287 § 4.2.7.1}. - 125 + [type_media] is an advisory media type: it is a hint about the type of the 126 + representation that is expected to be returned when the value of the href 127 + attribute is dereferenced. Note that the type attribute does not override 128 + the actual media type returned with the representation. The value of 129 + [type_media], if given, MUST conform to the syntax of a MIME media type, 130 + {{:http://tools.ietf.org/html/rfc4287#ref-MIMEREG} MIMEREG}. See {{: 131 + http://tools.ietf.org/html/rfc4287#section-4.2.7.3} RFC 4287 § 4.2.7.3}. - 132 + [hreflang] describes the language of the resource pointed to by the href 133 + attribute. When used together with the [rel=Alternate], it implies a 134 + translated version of the entry. The value of [hreflang] MUST be a language 135 + tag, {{:http://tools.ietf.org/html/rfc3066} RFC3066}. See {{: 136 + http://tools.ietf.org/html/rfc4287#section-4.2.7.4} RFC 4287 § 4.2.7.4}. - 137 + [title] conveys human-readable information about the link. The content of 138 + the "title" attribute is Language-Sensitive. The value [""] means that no 139 + title is provided. See {{: 140 + http://tools.ietf.org/html/rfc4287#section-4.2.7.5} RFC 4287 § 4.2.7.5}. - 141 + [length] indicates an advisory length of the linked content in octets; it 142 + is a hint about the content length of the representation returned when the 143 + IRI in the href attribute is mapped to a URI and dereferenced. Note that 144 + the length attribute does not override the actual content length of the 145 + representation as reported by the underlying protocol. See {{: 146 + http://tools.ietf.org/html/rfc4287#section-4.2.7.6} RFC 4287 § 4.2.7.6}. *) 147 + type link = 148 + { href: Uri.t 149 + ; rel: rel 150 + ; type_media: string option 151 + ; hreflang: string option 152 + ; title: string 153 + ; length: int option } 154 + 155 + val link : 156 + ?type_media:string 157 + -> ?hreflang:string 158 + -> ?title:string 159 + -> ?length:int 160 + -> ?rel:rel 161 + -> Uri.t 162 + -> link 163 + (** [link uri] creates a link element. 164 + 165 + @param rel The [rel] attribute of the link. It defaults to [Alternate] 166 + since {{:http://tools.ietf.org/html/rfc4287#section-4.2.7.2} RFC 4287 § 167 + 4.2.7.2} says that {i if the "rel" attribute is not present, the link 168 + element MUST be interpreted as if the link relation type is "alternate".} 169 + 170 + The other optional arguments all default to [None] (i.e., not specified). *) 171 + 172 + (** [logo] is an IRI reference [RFC3987] that identifies an image that provides 173 + visual identification for a feed. 174 + 175 + The image SHOULD have an aspect ratio of 2 (horizontal) to 1 (vertical). 176 + 177 + {{: http://tools.ietf.org/html/rfc4287#section-4.2.8} See RFC 4287 § 178 + 4.2.8} *) 179 + type logo = Uri.t 180 + 181 + (** [published] is a Date construct indicating an instant in time associated 182 + with an event early in the life cycle of the entry. 183 + 184 + Typically, [published] will be associated with the initial creation or 185 + first availability of the resource. 186 + 187 + {{: http://tools.ietf.org/html/rfc4287#section-4.2.9} See RFC 4287 § 188 + 4.2.9} *) 189 + type published = Syndic_date.t 190 + 191 + (** [rights] is a Text construct that conveys information about rights held in 192 + and over an entry or feed. The [rights] element SHOULD NOT be used to 193 + convey machine-readable licensing information. 194 + 195 + If an atom:entry element does not contain an atom:rights element, then the 196 + atom:rights element of the containing atom:feed element, if present, is 197 + considered to apply to the entry. 198 + 199 + See {{: http://tools.ietf.org/html/rfc4287#section-4.2.10} RFC 4287 § 200 + 4.2.10 } *) 201 + type rights = text_construct 202 + 203 + (** [title] is a Text construct that conveys a human-readable title for an 204 + entry or feed. {{: http://tools.ietf.org/html/rfc4287#section-4.2.14} See 205 + RFC 4287 § 4.2.14 } *) 206 + type title = text_construct 207 + 208 + (** [subtitle] is a Text construct that conveys a human-readable description or 209 + subtitle for a feed. {{: http://tools.ietf.org/html/rfc4287#section-4.2.12} 210 + See RFC 4287 § 4.2.12 } *) 211 + type subtitle = text_construct 212 + 213 + (** [updated] is a Date construct indicating the most recent instant in time 214 + when an entry or feed was modified in a way the publisher considers 215 + significant. Therefore, not all modifications necessarily result in a 216 + changed [updated] value. 217 + 218 + Publishers MAY change the value of this element over time. 219 + 220 + {{: http://tools.ietf.org/html/rfc4287#section-4.2.15} See RFC 4287 § 221 + 4.2.15 } *) 222 + type updated = Syndic_date.t 223 + 224 + (** If an {!entry} is copied from one feed into another feed, then the source 225 + {!feed}'s metadata (all child elements of atom:feed other than the 226 + atom:entry elements) MAY be preserved within the copied entry by adding an 227 + atom:source child element, if it is not already present in the entry, and 228 + including some or all of the source feed's Metadata elements as the 229 + atom:source element's children. Such metadata SHOULD be preserved if the 230 + source atom:feed contains any of the child elements atom:author, 231 + atom:contributor, atom:rights, or atom:category and those child elements 232 + are not present in the source atom:entry. 233 + 234 + {{: http://tools.ietf.org/html/rfc4287#section-4.2.11} See RFC 4287 § 235 + 4.2.11 } 236 + 237 + The atom:source element is designed to allow the aggregation of entries 238 + from different feeds while retaining information about an entry's source 239 + feed. For this reason, Atom Processors that are performing such aggregation 240 + SHOULD include at least the required feed-level Metadata fields ([id], 241 + [title], and [updated]) in the [source] element. 242 + 243 + {{: http://tools.ietf.org/html/rfc4287#section-4.1.2} See RFC 4287 § 4.1.2 244 + for more details.} *) 245 + type source = 246 + { authors: author list 247 + ; categories: category list 248 + ; contributors: author list 249 + (** {{: http://tools.ietf.org/html/rfc4287#section-4.2.3} See RFC 4287 250 + § 4.2.3 } *) 251 + ; generator: generator option 252 + ; icon: icon option 253 + ; id: id 254 + ; links: link list 255 + ; logo: logo option 256 + ; rights: rights option 257 + ; subtitle: subtitle option 258 + ; title: title 259 + ; updated: updated option } 260 + 261 + val source : 262 + ?categories:category list 263 + -> ?contributors:author list 264 + -> ?generator:generator 265 + -> ?icon:icon 266 + -> ?links:link list 267 + -> ?logo:logo 268 + -> ?rights:rights 269 + -> ?subtitle:subtitle 270 + -> ?updated:updated 271 + -> authors:author list 272 + -> id:id 273 + -> title 274 + -> source 275 + 276 + (** A MIME type that conform to the syntax of a MIME media type, but MUST NOT 277 + be a composite type (see Section 4.2.6 of [MIMEREG]). 278 + 279 + {{: http://tools.ietf.org/html/rfc4287#section-4.1.3.1} See RFC 4287 § 280 + 4.1.3.1 } *) 281 + type mime = string 282 + 283 + (** [content] either contains or links to the content of the entry. The value 284 + of [content] is Language-Sensitive. {{: 285 + http://tools.ietf.org/html/rfc4287#section-4.1.3} See RFC 4287 § 4.1.3} 286 + 287 + - [Text], [Html], [Xhtml] or [Mime] means that the content was part of the 288 + document and is provided as an argument. The first argument to [Html] and 289 + [Xhtml] is the possible xml:base value. 290 + {{:http://tools.ietf.org/html/rfc4287#section-3.1.1} See RFC 4287 § 3.1.1} 291 + - [Src(m, iri)] means that the content is to be found at [iri] and has MIME 292 + type [m]. Atom Processors MAY use the IRI to retrieve the content and MAY 293 + choose to ignore remote content or to present it in a different manner than 294 + local content. The value of [m] is advisory; that is to say, when the 295 + corresponding URI (mapped from an IRI, if necessary) is dereferenced, if 296 + the server providing that content also provides a media type, the 297 + server-provided media type is authoritative. See {{: 298 + http://tools.ietf.org/html/rfc4287#section-4.1.3.2} RFC 4287 § 4.1.3.2} *) 299 + type content = 300 + | Text of string 301 + | Html of Uri.t option * string 302 + | Xhtml of Uri.t option * Syndic_xml.t list 303 + | Mime of mime * string 304 + | Src of mime option * Uri.t 305 + 306 + (** [summary] is a Text construct that conveys a short summary, abstract, or 307 + excerpt of an entry. 308 + 309 + It is not advisable for [summary] to duplicate {!title} or {!content} 310 + because Atom Processors might assume there is a useful summary when there 311 + is none. 312 + 313 + {{: http://tools.ietf.org/html/rfc4287#section-4.2.13} See RFC 4287 § 314 + 4.2.13 } *) 315 + type summary = text_construct 316 + 317 + (** [entry] represents an individual entry, acting as a container for metadata 318 + and data associated with the entry. This element can appear as a child of 319 + the atom:feed element, or it can appear as the document (i.e., top-level) 320 + element of a stand-alone Atom Entry Document. 321 + 322 + The specification mandates that each entry contains an author unless it 323 + contains some sources or the feed contains an author element. This library 324 + ensures that the authors are properly dispatched to all locations. 325 + 326 + The following child elements are defined by this specification (note that 327 + it requires the presence of some of these elements): 328 + 329 + - if [content = None], then [links] MUST contain at least one element with 330 + a rel attribute value of [Alternate]. - There MUST NOT be more than one 331 + element of [links] with a rel attribute value of [Alternate] that has the 332 + same combination of type and hreflang attribute values. - There MAY be 333 + additional elements of [links] beyond those described above. - There MUST 334 + be an [summary] in either of the following cases: {ul {- the atom:entry 335 + contains an atom:content that has a "src" attribute (and is thus empty).} 336 + {- the atom:entry contains content that is encoded in Base64; i.e., the 337 + "type" attribute of atom:content is a MIME media type [MIMEREG], but is not 338 + an XML media type [RFC3023], does not begin with "text/", and does not end 339 + with "/xml" or "+xml".}} 340 + 341 + {{: http://tools.ietf.org/html/rfc4287#section-4.1.2} See RFC 4287 § 342 + 4.1.2} *) 343 + type entry = 344 + { authors: author * author list 345 + ; categories: category list 346 + ; content: content option 347 + ; contributors: author list 348 + ; id: id 349 + ; links: link list 350 + ; published: published option 351 + ; rights: rights option 352 + ; source: source option 353 + ; summary: summary option 354 + ; title: title 355 + ; updated: updated } 356 + 357 + val entry : 358 + ?categories:category list 359 + -> ?content:content 360 + -> ?contributors:author list 361 + -> ?links:link list 362 + -> ?published:published 363 + -> ?rights:rights 364 + -> ?source:source 365 + -> ?summary:summary 366 + -> id:id 367 + -> authors:author * author list 368 + -> title:title 369 + -> updated:updated 370 + -> unit 371 + -> entry 372 + 373 + (** [feed] is the document (i.e., top-level) element of an Atom Feed Document, 374 + acting as a container for metadata and data associated with the feed. Its 375 + element children consist of metadata elements followed by zero or more 376 + atom:entry child elements. 377 + 378 + - one of the [links] SHOULD have a [rel] attribute value of [Self]. This is 379 + the preferred URI for retrieving Atom Feed Documents representing this Atom 380 + feed. - There MUST NOT be more than one element of [links] with a rel 381 + attribute value of [Alternate] that has the same combination of type and 382 + hreflang attribute values. - There may be additional elements in [links] 383 + beyond those described above. 384 + 385 + If multiple {!entry} elements with the same {!id} value appear in an Atom 386 + Feed Document, they represent the same entry. Their {!updated} timestamps 387 + SHOULD be different. If an Atom Feed Document contains multiple entries 388 + with the same {!id}, Atom Processors MAY choose to display all of them or 389 + some subset of them. One typical behavior would be to display only the 390 + entry with the latest {!updated} timestamp. 391 + 392 + {{: http://tools.ietf.org/html/rfc4287#section-4.1.1} See RFC 4287 § 393 + 4.1.1} *) 394 + type feed = 395 + { authors: author list 396 + ; categories: category list 397 + ; contributors: author list 398 + ; generator: generator option 399 + ; icon: icon option 400 + ; id: id 401 + ; links: link list 402 + ; logo: logo option 403 + ; rights: rights option 404 + ; subtitle: subtitle option 405 + ; title: title 406 + ; updated: updated 407 + ; entries: entry list } 408 + 409 + val feed : 410 + ?authors:author list 411 + -> ?categories:category list 412 + -> ?contributors:author list 413 + -> ?generator:generator 414 + -> ?icon:icon 415 + -> ?links:link list 416 + -> ?logo:logo 417 + -> ?rights:rights 418 + -> ?subtitle:subtitle 419 + -> id:id 420 + -> title:title 421 + -> updated:updated 422 + -> entry list 423 + -> feed 424 + 425 + (** {2 Input and output} *) 426 + 427 + val parse : ?self:Uri.t -> ?xmlbase:Uri.t -> Xmlm.input -> feed 428 + (** [parse xml] returns the feed corresponding to [xml]. Beware that [xml] is 429 + mutable, so when the parsing fails, one has to create a new copy of [xml] 430 + to use it with another function. If you retrieve [xml] from a URL, you 431 + should use that URL as [~xmlbase]. 432 + 433 + Raise [Error.Expected], [Expected_Data] or [Error.Duplicate_Link] if [xml] 434 + is not a valid Atom document. 435 + 436 + @param xmlbase default xml:base to resolve relative URLs (of course 437 + xml:base attributes in the XML Atom document take precedence over this). 438 + See {{:http://www.w3.org/TR/xmlbase/}XML Base}. 439 + 440 + @param self the URI from where the current feed was retrieved. Giving this 441 + information will add an entry to [links] with [rel = Self] unless one 442 + already exists. *) 443 + 444 + val read : ?self:Uri.t -> ?xmlbase:Uri.t -> string -> feed 445 + (** [read fname] reads the file name [fname] and parses it. For the optional 446 + parameters, see {!parse}. *) 447 + 448 + val to_xml : feed -> Syndic_xml.t 449 + (** [to_xml f] converts the feed [f] to an XML tree. *) 450 + 451 + val output : feed -> Xmlm.dest -> unit 452 + (** [output f dest] writes the XML tree of the feed [f] to [dest]. *) 453 + 454 + val write : feed -> string -> unit 455 + (** [write f fname] writes the XML tree of the feed [f] to the file named 456 + [fname]. *) 457 + 458 + (** {2 Convenience functions} *) 459 + 460 + val ascending : entry -> entry -> int 461 + (** Compare entries so that older dates are smaller. The date of the entry is 462 + taken from the [published] field, if available, or otherwise [updated] is 463 + used. *) 464 + 465 + val descending : entry -> entry -> int 466 + (** Compare entries so that more recent dates are smaller. The date of the 467 + entry is taken from the [published] field, if available, or otherwise 468 + [updated] is used. *) 469 + 470 + val aggregate : 471 + ?self:Uri.t 472 + -> ?id:id 473 + -> ?updated:updated 474 + -> ?subtitle:subtitle 475 + -> ?title:text_construct 476 + -> ?sort:[`Newest_first | `Oldest_first | `None] 477 + -> ?n:int 478 + -> feed list 479 + -> feed 480 + (** [aggregate feeds] returns a single feed containing all the posts in 481 + [feeds]. In order to track the origin of each post in the aggrated feed, it 482 + is recommended that each feed in [feeds] possesses a link with 483 + [rel = Self] so that the [source] added to each entry contains a link to 484 + the original feed. If an entry contains a [source], il will {i not} be 485 + overwritten. 486 + 487 + @param self The preferred URI for retrieving this aggregayed Atom Feed. 488 + While not mandatory, it is good practice to set this. 489 + 490 + @param id the universally unique identifier for the aggregated feed. If it 491 + is not provided a URN is built from the [feeds] IDs. @param sort whether to 492 + sort the entries of the final feed. The default is [`Newest_first] because 493 + it is generally desired. @param n number of entries of the (sorted) 494 + aggregated feed to return. *) 495 + 496 + val set_self_link : feed -> ?hreflang:string -> ?length:int -> Uri.t -> feed 497 + (** [set_self feed url] add or replace the URI in the self link of the feed. 498 + You can also set the [hreflang] and [length] of the self link. *) 499 + 500 + val get_self_link : feed -> link option 501 + (** [get_self feed] return the self link of the feed, if any is present. *) 502 + 503 + val set_main_author : feed -> author -> feed 504 + (** [set_main_author feed author] will add [author] in front of the list of 505 + authors of the [feed] (if an author with the same name already exists, the 506 + optional information are merged, the ones in [author] taking precedence). 507 + Also remove all empty authors (name = "" and no URI, no email) and replace 508 + them with [author] if no author is left and an authors is mandatory. *) 509 + 510 + (**/**) 511 + 512 + (** An URI is given by (xmlbase, uri). The value of [xmlbase], if not [None], 513 + gives the base URI against which [uri] must be resolved if it is relative. *) 514 + type uri = Uri.t option * string 515 + 516 + type person = [`Email of string | `Name of string | `URI of uri] list 517 + 518 + val unsafe : 519 + ?xmlbase:Uri.t 520 + -> Xmlm.input 521 + -> [> `Feed of [> `Author of person 522 + | `Category of [> `Label of string 523 + | `Scheme of string 524 + | `Term of string ] 525 + list 526 + | `Contributor of person 527 + | `Entry of [> `Author of person 528 + | `Category of [> `Label of string 529 + | `Scheme of string 530 + | `Term of string ] 531 + list 532 + | `Content of [> `Data of Syndic_xml.t list 533 + | `SRC of string 534 + | `Type of string ] 535 + list 536 + | `Contributor of person 537 + | `ID of string list 538 + | `Link of [> `HREF of string 539 + | `HREFLang of string 540 + | `Length of string 541 + | `Rel of string 542 + | `Title of string 543 + | `Type of string ] 544 + list 545 + | `Published of [> `Date of string] list 546 + | `Rights of Syndic_xml.t list 547 + | `Source of [> `Author of person 548 + | `Category of [> `Label of string 549 + | `Scheme of string 550 + | `Term of string ] 551 + list 552 + | `Contributor of person 553 + | `Generator of [> `Content of string 554 + | `URI of uri 555 + | `Version of string 556 + ] 557 + list 558 + | `ID of string list 559 + | `Icon of [> `URI of uri] list 560 + | `Link of [> `HREF of string 561 + | `HREFLang of string 562 + | `Length of string 563 + | `Rel of string 564 + | `Title of string 565 + | `Type of string ] 566 + list 567 + | `Logo of [> `URI of uri] list 568 + | `Rights of Syndic_xml.t list 569 + | `Subtitle of Syndic_xml.t list 570 + | `Title of Syndic_xml.t list 571 + | `Updated of [> `Date of string] 572 + list ] 573 + list 574 + | `Summary of Syndic_xml.t list 575 + | `Title of Syndic_xml.t list 576 + | `Updated of [> `Date of string] list ] 577 + list 578 + | `Generator of [> `Content of string 579 + | `URI of uri 580 + | `Version of string ] 581 + list 582 + | `ID of string list 583 + | `Icon of [> `URI of uri] list 584 + | `Link of [> `HREF of string 585 + | `HREFLang of string 586 + | `Length of string 587 + | `Rel of string 588 + | `Title of string 589 + | `Type of string ] 590 + list 591 + | `Logo of [> `URI of uri] list 592 + | `Rights of Syndic_xml.t list 593 + | `Subtitle of Syndic_xml.t list 594 + | `Title of Syndic_xml.t list 595 + | `Updated of [> `Date of string] list ] 596 + list ] 597 + (** Analysis without verification, enjoy ! *)

+152

stack/syndic/lib/syndic_common.ml

··· 1 + (* XML *) 2 + 3 + module XML = struct 4 + include Syndic_xml 5 + 6 + type node = pos * tag * t list 7 + 8 + let xmlbase_tag = (Xmlm.ns_xml, "base") 9 + 10 + let xmlbase_of_attr ~xmlbase attr = 11 + try 12 + let new_base = List.assoc xmlbase_tag attr in 13 + Some (Syndic_xml.resolve ~xmlbase (Uri.of_string new_base)) 14 + with Not_found -> xmlbase 15 + 16 + let generate_catcher ?(namespaces = [""]) ?(attr_producer = []) 17 + ?(data_producer = []) ?leaf_producer maker = 18 + let in_namespaces ((prefix, _), _) = List.mem prefix namespaces in 19 + let get_attr_name (((_prefix, name), _) : Xmlm.attribute) = name in 20 + let get_attr_value ((_, value) : Xmlm.attribute) = value in 21 + let get_tag_name (((_prefix, name), _) : tag) = name in 22 + let get_attrs ((_, attrs) : tag) = attrs in 23 + let get_producer name map = 24 + try Some (List.assoc name map) with _ -> None 25 + in 26 + let rec catch_attr ~xmlbase acc pos = function 27 + | attr :: r -> ( 28 + match get_producer (get_attr_name attr) attr_producer with 29 + | Some f when in_namespaces attr -> 30 + let acc = f ~xmlbase (get_attr_value attr) :: acc in 31 + catch_attr ~xmlbase acc pos r 32 + | _ -> catch_attr ~xmlbase acc pos r ) 33 + | [] -> acc 34 + in 35 + let rec catch_datas ~xmlbase acc = function 36 + | Node (pos, tag, datas) :: r -> ( 37 + match get_producer (get_tag_name tag) data_producer with 38 + | Some f when in_namespaces tag -> 39 + let acc = f ~xmlbase (pos, tag, datas) :: acc in 40 + catch_datas ~xmlbase acc r 41 + | _ -> catch_datas ~xmlbase acc r ) 42 + | Data (pos, str) :: r -> ( 43 + match leaf_producer with 44 + | Some f -> catch_datas ~xmlbase (f ~xmlbase pos str :: acc) r 45 + | None -> catch_datas ~xmlbase acc r ) 46 + | [] -> acc 47 + in 48 + let generate ~xmlbase ((pos, tag, datas) : node) = 49 + (* The spec says that "The base URI for a URI reference appearing in any 50 + other attribute value, including default attribute values, is the base 51 + URI of the element bearing the attribute" so get xml:base first. *) 52 + let xmlbase = xmlbase_of_attr ~xmlbase (get_attrs tag) in 53 + let acc = catch_attr ~xmlbase [] pos (get_attrs tag) in 54 + maker ~pos (catch_datas ~xmlbase acc datas) 55 + in 56 + generate 57 + 58 + let dummy_of_xml ~ctor = 59 + let leaf_producer ~xmlbase _pos data = ctor ~xmlbase data in 60 + let head ~pos:_ = function [] -> ctor ~xmlbase:None "" | x :: _ -> x in 61 + generate_catcher ~leaf_producer head 62 + end 63 + 64 + (* Util *) 65 + 66 + module Util = struct 67 + let find f l = try Some (List.find f l) with Not_found -> None 68 + 69 + exception Found of XML.t 70 + 71 + let recursive_find f root = 72 + let rec aux = function 73 + | [] -> None 74 + | x :: _ when f x -> raise (Found x) 75 + | XML.Node (_, _, x) :: r -> ( 76 + aux x 77 + |> function 78 + | Some x -> raise (Found x) (* assert false ? *) | None -> aux r ) 79 + | XML.Data _ :: r -> aux r 80 + in 81 + try aux [root] with Found x -> Some x | _ -> None 82 + 83 + let rec filter_map l f = 84 + match l with 85 + | [] -> [] 86 + | x :: tl -> ( 87 + match f x with None -> filter_map tl f | Some x -> x :: filter_map tl f ) 88 + 89 + let rec take l n = 90 + match l with 91 + | [] -> [] 92 + | e :: tl -> if n > 0 then e :: take tl (n - 1) else [] 93 + 94 + let tag_is (((_prefix, name), _attrs) : XML.tag) = ( = ) name 95 + let attr_is (((_prefix, name), _value) : Xmlm.attribute) = ( = ) name 96 + let datas_has_leaf = List.exists (function XML.Data _ -> true | _ -> false) 97 + 98 + let get_leaf l = 99 + match find (function XML.Data _ -> true | _ -> false) l with 100 + | Some (XML.Data (_, s)) -> s 101 + | _ -> raise Not_found 102 + 103 + let get_attrs ((_, attrs) : XML.tag) = attrs 104 + let get_value ((_, value) : Xmlm.attribute) = value 105 + let get_attr_name (((_prefix, name), _) : Xmlm.attribute) = name 106 + let get_tag_name (((_prefix, name), _) : XML.tag) = name 107 + let is_space c = c = ' ' || c = '\t' || c = '\n' || c = '\r' 108 + 109 + let only_whitespace s = 110 + let r = ref true in 111 + let i = ref 0 and len = String.length s in 112 + while !r && !i < len do 113 + r := is_space s.[!i] ; 114 + incr i 115 + done ; 116 + !r 117 + 118 + (* Output feeds to XML *) 119 + 120 + let add_attr name v_opt attr = 121 + match v_opt with None | Some "" -> attr | Some v -> (name, v) :: attr 122 + 123 + let add_attr_uri name v_opt attr = 124 + match v_opt with None -> attr | Some v -> (name, Uri.to_string v) :: attr 125 + 126 + let tag name = (("", name), []) 127 + let dummy_pos = (0, 0) 128 + 129 + (* Do smarter positions make sense? *) 130 + 131 + let node_data tag content = 132 + XML.Node (dummy_pos, tag, [XML.Data (dummy_pos, content)]) 133 + 134 + let node_uri tag uri = node_data tag (Uri.to_string uri) 135 + 136 + let add_node_data tag c nodes = 137 + match c with 138 + | None -> nodes 139 + | Some content -> node_data tag content :: nodes 140 + 141 + let add_node_uri tag c nodes = 142 + match c with 143 + | None -> nodes 144 + | Some uri -> node_data tag (Uri.to_string uri) :: nodes 145 + 146 + (* Add to [nodes] those coming from mapping [f] on [els] *) 147 + let add_nodes_rev_map f els nodes = 148 + List.fold_left (fun nodes el -> f el :: nodes) nodes els 149 + 150 + let add_node_option f op nodes = 151 + match op with None -> nodes | Some v -> f v :: nodes 152 + end

+70

stack/syndic/lib/syndic_common.mli

··· 1 + module XML : sig 2 + type t = Syndic_xml.t 3 + type node = Syndic_xml.pos * Syndic_xml.tag * t list 4 + 5 + val generate_catcher : 6 + ?namespaces:string list 7 + -> ?attr_producer:(string * (xmlbase:Uri.t option -> string -> 'a)) list 8 + -> ?data_producer:(string * (xmlbase:Uri.t option -> node -> 'a)) list 9 + -> ?leaf_producer:(xmlbase:Uri.t option -> Xmlm.pos -> string -> 'a) 10 + -> (pos:Xmlm.pos -> 'a list -> 'b) 11 + -> xmlbase:Uri.t option 12 + -> node 13 + -> 'b 14 + 15 + val dummy_of_xml : 16 + ctor:(xmlbase:Uri.t option -> string -> 'a) 17 + -> xmlbase:Uri.t option 18 + -> node 19 + -> 'a 20 + 21 + val xmlbase_of_attr : 22 + xmlbase:Uri.t option -> Xmlm.attribute list -> Uri.t option 23 + end 24 + 25 + module Util : sig 26 + val find : ('a -> bool) -> 'a list -> 'a option 27 + val recursive_find : (XML.t -> bool) -> XML.t -> XML.t option 28 + val filter_map : 'a list -> ('a -> 'b option) -> 'b list 29 + val take : 'a list -> int -> 'a list 30 + val tag_is : Xmlm.tag -> string -> bool 31 + val attr_is : Xmlm.attribute -> string -> bool 32 + val datas_has_leaf : XML.t list -> bool 33 + val get_leaf : XML.t list -> string 34 + val get_attrs : Xmlm.tag -> Xmlm.attribute list 35 + val get_value : Xmlm.attribute -> string 36 + val get_attr_name : Xmlm.attribute -> string 37 + val get_tag_name : Xmlm.tag -> string 38 + val only_whitespace : string -> bool 39 + 40 + (** {2 Helpers to output XML} *) 41 + 42 + val dummy_pos : Xmlm.pos 43 + (** A dummy position when generating XML files. *) 44 + 45 + val add_attr : 46 + Xmlm.name -> string option -> Xmlm.attribute list -> Xmlm.attribute list 47 + 48 + val add_attr_uri : 49 + Xmlm.name -> Uri.t option -> Xmlm.attribute list -> Xmlm.attribute list 50 + 51 + val tag : string -> Xmlm.tag 52 + (** [tag n] returns a tag with name [n], no namespace, and no attributes. *) 53 + 54 + val node_data : Xmlm.tag -> string -> XML.t 55 + (** [node_data tag content] returns a node named [tag] with data set to 56 + [content]. *) 57 + 58 + val node_uri : Xmlm.tag -> Uri.t -> XML.t 59 + val add_node_data : Xmlm.tag -> string option -> XML.t list -> XML.t list 60 + val add_node_uri : Xmlm.tag -> Uri.t option -> XML.t list -> XML.t list 61 + 62 + val add_nodes_rev_map : ('a -> XML.t) -> 'a list -> XML.t list -> XML.t list 63 + (** [add_nodes_rev_map f l nodes] apply [f] to each element of [l] and add 64 + the resulting HTML trees in reverse order in front of [nodes]. *) 65 + 66 + val add_node_option : ('a -> XML.t) -> 'a option -> XML.t list -> XML.t list 67 + (** [add_node_option f o nodes]: if [o] is [None], return [nodes]; otherwise 68 + apply [f] to the value carried by [o] and add the resulting XML tree in 69 + front of [nodes]. *) 70 + end

+196

stack/syndic/lib/syndic_date.ml

··· 1 + open Printf 2 + open Scanf 3 + 4 + type t = Ptime.t 5 + 6 + let epoch = Ptime.epoch 7 + let compare = Ptime.compare 8 + let max d1 d2 = if compare d1 d2 < 0 then d2 else d1 9 + let min d1 d2 = if compare d1 d2 < 0 then d1 else d2 10 + let month_to_int = Hashtbl.create 12 11 + 12 + let () = 13 + let add m i = Hashtbl.add month_to_int m i in 14 + add "Jan" 1 ; 15 + add "Feb" 2 ; 16 + add "Mar" 3 ; 17 + add "Apr" 4 ; 18 + add "May" 5 ; 19 + add "Jun" 6 ; 20 + add "Jul" 7 ; 21 + add "Aug" 8 ; 22 + add "Sep" 9 ; 23 + add "Oct" 10 ; 24 + add "Nov" 11 ; 25 + add "Dec" 12 26 + 27 + let map f = function Some x -> f x | None -> None 28 + let map2 f a b = match (a, b) with Some a, Some b -> f a b | _ -> None 29 + 30 + (* RFC3339 date *) 31 + let of_rfc3339 s = 32 + match Ptime.of_rfc3339 ~strict:false s with 33 + | Result.Error _ -> 34 + invalid_arg (sprintf "Syndic.Date.of_string: cannot parse %S" s) 35 + | Result.Ok (t, tz_offset_s, _) -> ( 36 + match Ptime.of_date_time @@ Ptime.to_date_time ?tz_offset_s t with 37 + | Some x -> x 38 + | None -> invalid_arg (sprintf "Syndic.Data.of_string: cannot part %S" s) ) 39 + 40 + (* Format: 41 + http://www.rssboard.org/rss-specification#ltpubdategtSubelementOfLtitemgt 42 + Examples: Sun, 19 May 2002 15:21:36 GMT Sat, 25 Sep 2010 08:01:00 -0700 20 43 + Mar 2013 03:47:14 +0000 *) 44 + let of_rfc822 s = 45 + let make_date day month year h m maybe_s z = 46 + let month = 47 + if String.length month <= 3 then month else String.sub month 0 3 48 + in 49 + let month = Hashtbl.find month_to_int month in 50 + let date = Ptime.of_date (year, month, day) in 51 + let s = 52 + if maybe_s <> "" && maybe_s.[0] = ':' then 53 + float_of_string (String.sub maybe_s 1 (String.length maybe_s - 1)) 54 + else 0. 55 + in 56 + let span = Ptime.Span.of_int_s ((h * 3600) + (m * 60)) in 57 + let span = 58 + map (fun x -> Some (Ptime.Span.add span x)) (Ptime.Span.of_float_s s) 59 + in 60 + let date_and_time = 61 + if z = "" || z = "GMT" || z = "UT" || z = "Z" then 62 + map2 (fun date span -> Ptime.add_span date span) date span 63 + |> map (fun x -> Some (Ptime.to_date_time x)) 64 + else 65 + (* FIXME: this should be made more robust. *) 66 + let tz_offset_s = 67 + match z with 68 + | "EST" -> -5 * 3600 69 + | "EDT" -> -4 * 3600 70 + | "CST" -> -6 * 3600 71 + | "CDT" -> -5 * 3600 72 + | "MST" -> -7 * 3600 73 + | "MDT" -> -6 * 3600 74 + | "PST" -> -8 * 3600 75 + | "PDT" -> -7 * 3600 76 + | "A" -> -1 * 3600 77 + | "M" -> -12 * 3600 78 + | "N" -> 1 * 3600 79 + | "Y" -> 12 * 3600 80 + | _ -> 81 + let zh = sscanf (String.sub z 0 3) "%i" (fun i -> i) in 82 + let zm = sscanf (String.sub z 3 2) "%i" (fun i -> i) in 83 + let tz_sign = if zh < 0 then -1 else 1 in 84 + if zh < 0 then tz_sign * ((-zh * 3600) + (zm * 60)) 85 + else tz_sign * ((zh * 3600) + (zm * 60)) 86 + in 87 + let rt = map2 (fun date span -> Ptime.add_span date span) date span in 88 + (* XXX: We lose minutes with this conversion, but Calendar does not 89 + propose to handle minutes. *) 90 + map (fun x -> Some (Ptime.to_date_time ~tz_offset_s x)) rt 91 + in 92 + match map Ptime.of_date_time date_and_time with 93 + | Some x -> x 94 + | None -> invalid_arg (sprintf "Syndic.Date.of_rfc822: cannot parse") 95 + in 96 + try 97 + if 'A' <= s.[0] && s.[0] <= 'Z' then 98 + try sscanf s "%_s %i %s %i %i:%i%s %s" make_date with _ -> 99 + try sscanf s "%_s %ist %s %i %i:%i%s %s" make_date with _ -> 100 + (* For e.g. "May 15th, 2019" — even though it is not standard *) 101 + sscanf s "%s %i%_s %i" (fun m d y -> make_date d m y 0 0 "" "UT") 102 + else 103 + try sscanf s "%i %s %i %i:%i%s %s" make_date with _ -> 104 + sscanf s "%i %s %i" (fun d m y -> make_date d m y 0 0 "" "UT") 105 + with _ -> 106 + (* Fallback: Some RSS feeds use RFC3339 dates instead of RFC822 *) 107 + try of_rfc3339 s 108 + with _ -> invalid_arg (sprintf "Syndic.Date.of_string+: cannot parse %S" s) 109 + 110 + type month = 111 + | Jan 112 + | Feb 113 + | Mar 114 + | Apr 115 + | May 116 + | Jun 117 + | Jul 118 + | Aug 119 + | Sep 120 + | Oct 121 + | Nov 122 + | Dec 123 + 124 + type day = Thu | Fri | Sat | Sun | Mon | Tue | Wed 125 + 126 + let string_of_month = function 127 + | Jan -> "Jan" 128 + | Feb -> "Feb" 129 + | Mar -> "Mar" 130 + | Apr -> "Apr" 131 + | May -> "May" 132 + | Jun -> "Jun" 133 + | Jul -> "Jul" 134 + | Aug -> "Aug" 135 + | Sep -> "Sep" 136 + | Oct -> "Oct" 137 + | Nov -> "Nov" 138 + | Dec -> "Dec" 139 + 140 + let month_of_date = 141 + let months = 142 + [|Jan; Feb; Mar; Apr; May; Jun; Jul; Aug; Sep; Oct; Nov; Dec|] 143 + in 144 + fun t -> 145 + let _, i, _ = Ptime.to_date t in 146 + months.(i - 1) 147 + 148 + let to_rfc3339 d = 149 + (* Example: 2014-03-19T15:51:25.050-07:00 *) 150 + Ptime.to_rfc3339 d 151 + 152 + (* Convenience functions *) 153 + 154 + let day_of_week = 155 + let wday = [|Thu; Fri; Sat; Sun; Mon; Tue; Wed|] in 156 + fun t -> 157 + let i = fst Ptime.(Span.to_d_ps @@ to_span t) mod 7 in 158 + wday.((if i < 0 then 7 + i else i)) 159 + 160 + let string_of_day = function 161 + | Thu -> "Thu" 162 + | Fri -> "Fri" 163 + | Sat -> "Sat" 164 + | Sun -> "Sun" 165 + | Mon -> "Mon" 166 + | Tue -> "Tue" 167 + | Wed -> "Wed" 168 + 169 + let year t = 170 + let year, _, _ = Ptime.to_date t in 171 + year 172 + 173 + let month = month_of_date 174 + 175 + let day t = 176 + let (_, _, day), _ = Ptime.to_date_time t in 177 + day 178 + 179 + let hour t = 180 + let _, ((hh, _, _), _) = Ptime.to_date_time t in 181 + hh 182 + 183 + let minute t = 184 + let _, ((_, mm, _), _) = Ptime.to_date_time t in 185 + mm 186 + 187 + let second t = 188 + let _, ((_, _, ss), _) = Ptime.to_date_time t in 189 + float_of_int ss 190 + 191 + let to_rfc822 t = 192 + (* Example: Sat, 25 Sep 2010 08:01:00 -0700 *) 193 + let ds = day_of_week t |> string_of_day in 194 + let ms = month_of_date t |> string_of_month in 195 + let (y, _m, d), ((hh, mm, ss), t) = Ptime.to_date_time t in 196 + Printf.sprintf "%s, %d %s %d %02d:%02d:%02d %04d" ds d ms y hh mm ss t

+54

stack/syndic/lib/syndic_date.mli

··· 1 + (** Minimal date module required by Syndic. *) 2 + 3 + (** A date with time. *) 4 + type t = Ptime.t 5 + 6 + val epoch : t 7 + (** The POSIX time, i.e. Thursday, 1 January 1970 00:00:00 (UTC). *) 8 + 9 + val compare : t -> t -> int 10 + (** Compare dates in increasing order. *) 11 + 12 + val max : t -> t -> t 13 + (** [max d1 d2] return the maximum (i.e. more recent) of the dates [d1] and 14 + [d2]. *) 15 + 16 + val min : t -> t -> t 17 + (** [min d1 d2] return the minimum (i.e. less recent) of the dates [d1] and 18 + [d2]. *) 19 + 20 + val of_rfc822 : string -> t 21 + val to_rfc822 : t -> string 22 + val of_rfc3339 : string -> t 23 + val to_rfc3339 : t -> string 24 + 25 + (** Month of the year. *) 26 + type month = 27 + | Jan 28 + | Feb 29 + | Mar 30 + | Apr 31 + | May 32 + | Jun 33 + | Jul 34 + | Aug 35 + | Sep 36 + | Oct 37 + | Nov 38 + | Dec 39 + 40 + val string_of_month : month -> string 41 + (** Return the 3 letters identifying the month in English. *) 42 + 43 + val year : t -> int 44 + (** Return the 4 digit year of the date. *) 45 + 46 + val month : t -> month 47 + (** Return the month of the date. *) 48 + 49 + val day : t -> int 50 + (** Return the day of the month (1..31). *) 51 + 52 + val hour : t -> int 53 + val minute : t -> int 54 + val second : t -> float

+9

stack/syndic/lib/syndic_error.ml

··· 1 + open Printf 2 + 3 + type t = Xmlm.pos * string 4 + 5 + exception Error of t 6 + 7 + let to_string = function 8 + | Error (pos, str) -> sprintf "%s at l.%d c.%d" str (fst pos) (snd pos) 9 + | exn -> Printexc.to_string exn

+7

stack/syndic/lib/syndic_error.mli

··· 1 + (** The common signature that all error modules must (at least) satisfy. *) 2 + 3 + type t = Xmlm.pos * string 4 + 5 + exception Error of t 6 + 7 + val to_string : exn -> string

+545

stack/syndic/lib/syndic_opml1.ml

··· 1 + open Syndic_common.XML 2 + open Syndic_common.Util 3 + open Printf 4 + module XML = Syndic_xml 5 + module Error = Syndic_error 6 + module Date = Syndic_date 7 + 8 + type head = 9 + { title: string 10 + ; date_created: Date.t option 11 + ; date_modified: Date.t 12 + ; owner_name: string 13 + ; owner_email: string 14 + ; expansion_state: int list 15 + ; vert_scroll_state: int option 16 + ; window_top: int option 17 + ; window_left: int option 18 + ; window_bottom: int option 19 + ; window_right: int option } 20 + 21 + let head ?date_created ?(expansion_state = []) ?vert_scroll_state ?window_top 22 + ?window_left ?window_bottom ?window_right ~date_modified ~owner_name 23 + ~owner_email title = 24 + { title 25 + ; date_created 26 + ; date_modified 27 + ; owner_name 28 + ; owner_email 29 + ; expansion_state 30 + ; vert_scroll_state 31 + ; window_top 32 + ; window_left 33 + ; window_bottom 34 + ; window_right } 35 + 36 + let string_of_xml name (pos, _, datas) = 37 + try get_leaf datas with Not_found -> 38 + raise (Error.Error (pos, name ^ " must not be empty")) 39 + 40 + let title_of_xml ~xmlbase:_ a = `Title (string_of_xml "<title>" a) 41 + let owner_name_of_xml ~xmlbase:_ a = `OwnerName (string_of_xml "<ownerName>" a) 42 + 43 + let owner_email_of_xml ~xmlbase:_ a = 44 + `OwnerEmail (string_of_xml "<ownerEmail>" a) 45 + 46 + let expansion_state_of_xml ~xmlbase:_ (pos, _, datas) = 47 + let explode s = 48 + let rec aux acc i = 49 + if i = String.length s then acc else aux (s.[i] :: acc) (succ i) 50 + in 51 + aux [] 0 |> List.rev 52 + in 53 + let implode l = 54 + let rec aux s = function 55 + | x :: xs -> aux (s ^ Char.escaped x) xs 56 + | [] -> s 57 + in 58 + aux "" l 59 + in 60 + let split sep s = 61 + let rec aux acc_char acc = function 62 + | x :: xs when x = sep -> aux [] (List.rev acc_char :: acc) xs 63 + | x :: xs -> aux (x :: acc_char) acc xs 64 + | [] -> List.rev acc_char :: acc 65 + in 66 + explode s |> aux [] [] |> List.rev |> List.map implode 67 + in 68 + try 69 + `ExpansionState (get_leaf datas |> split ',' |> List.map int_of_string) 70 + with 71 + | Not_found -> `ExpansionState [] 72 + | _ -> 73 + raise 74 + (Error.Error 75 + ( pos 76 + , "<expansionState> must be a list of numbers separated by commas \ 77 + as 1,2,3" )) 78 + 79 + let int_of_xml name (pos, _, datas) = 80 + try get_leaf datas |> int_of_string with 81 + | Not_found -> raise (Error.Error (pos, name ^ " must not be empty")) 82 + | Failure _ -> raise (Error.Error (pos, name ^ " must be an integer")) 83 + 84 + let vert_scroll_state_of_xml ~xmlbase:_ a = 85 + `VertScrollState (int_of_xml "<vertScrollState>" a) 86 + 87 + let window_top_of_xml ~xmlbase:_ a = `WindowTop (int_of_xml "<windowTop>" a) 88 + let window_left_of_xml ~xmlbase:_ a = `WindowLeft (int_of_xml "<windowLeft>" a) 89 + 90 + let window_bottom_of_xml ~xmlbase:_ a = 91 + `WindowBottom (int_of_xml "<windowBotton>" a) 92 + 93 + let window_right_of_xml ~xmlbase:_ a = 94 + `WindowRight (int_of_xml "<windowRight>" a) 95 + 96 + type head' = 97 + [ `Title of string 98 + | `DateCreated of Date.t 99 + | `DateModified of Date.t 100 + | `OwnerName of string 101 + | `OwnerEmail of string 102 + | `ExpansionState of int list 103 + | `VertScrollState of int 104 + | `WindowTop of int 105 + | `WindowLeft of int 106 + | `WindowBottom of int 107 + | `WindowRight of int ] 108 + 109 + let make_head ~pos (l : [< head'] list) = 110 + let title = 111 + match find (function `Title _ -> true | _ -> false) l with 112 + | Some (`Title s) -> s 113 + | _ -> 114 + raise 115 + (Error.Error (pos, "<head> MUST contains exactly one <title> element")) 116 + in 117 + let date_created = 118 + match find (function `DateCreated _ -> true | _ -> false) l with 119 + | Some (`DateCreated d) -> Some d 120 + | _ -> None 121 + in 122 + let date_modified = 123 + match find (function `DateModified _ -> true | _ -> false) l with 124 + | Some (`DateModified d) -> d 125 + | _ -> 126 + raise 127 + (Error.Error 128 + (pos, "<head> MUST contains exactly one <dateModified> element")) 129 + in 130 + let owner_name = 131 + match find (function `OwnerName _ -> true | _ -> false) l with 132 + | Some (`OwnerName s) -> s 133 + | _ -> 134 + raise 135 + (Error.Error 136 + (pos, "<head> MUST contains exactly one <ownerName> element")) 137 + in 138 + let owner_email = 139 + match find (function `OwnerEmail _ -> true | _ -> false) l with 140 + | Some (`OwnerEmail s) -> s 141 + | _ -> 142 + raise 143 + (Error.Error 144 + (pos, "<head> MUST contains exactly one <ownerEmail> element")) 145 + in 146 + let expansion_state = 147 + match find (function `ExpansionState _ -> true | _ -> false) l with 148 + | Some (`ExpansionState l) -> l 149 + | _ -> [] 150 + in 151 + let vert_scroll_state = 152 + match find (function `VertScrollState _ -> true | _ -> false) l with 153 + | Some (`VertScrollState n) -> Some n 154 + | _ -> None 155 + in 156 + let window_top = 157 + match find (function `WindowTop _ -> true | _ -> false) l with 158 + | Some (`WindowTop h) -> Some h 159 + | _ -> None 160 + in 161 + let window_left = 162 + match find (function `WindowLeft _ -> true | _ -> false) l with 163 + | Some (`WindowLeft x) -> Some x 164 + | _ -> None 165 + in 166 + let window_bottom = 167 + match find (function `WindowBottom _ -> true | _ -> false) l with 168 + | Some (`WindowBottom y) -> Some y 169 + | _ -> None 170 + in 171 + let window_right = 172 + match find (function `WindowRight _ -> true | _ -> false) l with 173 + | Some (`WindowRight r) -> Some r 174 + | _ -> None 175 + in 176 + `Head 177 + { title 178 + ; date_created 179 + ; date_modified 180 + ; owner_name 181 + ; owner_email 182 + ; expansion_state 183 + ; vert_scroll_state 184 + ; window_top 185 + ; window_left 186 + ; window_bottom 187 + ; window_right } 188 + 189 + let date_of_xml name (pos, _, datas) = 190 + let d = 191 + try get_leaf datas with Not_found -> 192 + raise (Error.Error (pos, name ^ " must not be empty")) 193 + in 194 + try Date.of_rfc822 d with _ -> 195 + raise (Error.Error (pos, sprintf "Date %S incorrect" d)) 196 + 197 + let date_created_of_xml ~xmlbase:_ a = 198 + `DateCreated (date_of_xml "<dateCreated>" a) 199 + 200 + let date_modified_of_xml ~xmlbase:_ a = 201 + `DateModified (date_of_xml "<dateModified>" a) 202 + 203 + let head_of_xml = 204 + let data_producer = 205 + [ ("title", title_of_xml) 206 + ; ("dateCreated", date_created_of_xml) 207 + ; ("dateModified", date_modified_of_xml) 208 + ; ("ownerName", owner_name_of_xml) 209 + ; ("ownerEmail", owner_email_of_xml) 210 + ; ("expansionState", expansion_state_of_xml) 211 + ; ("vertScrollState", vert_scroll_state_of_xml) 212 + ; ("windowTop", window_top_of_xml) 213 + ; ("windowLeft", window_left_of_xml) 214 + ; ("windowBottom", window_bottom_of_xml) 215 + ; ("windowRight", window_right_of_xml) ] 216 + in 217 + generate_catcher ~data_producer make_head 218 + 219 + let head_of_xml' = 220 + let data_producer = 221 + [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a)) 222 + ; ("dateCreated", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `DateCreated a)) 223 + ; ("dateModified", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `DateModified a)) 224 + ; ("ownerName", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `OwnerName a)) 225 + ; ("ownerEmail", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `OwnerEmail a)) 226 + ; ( "expansionState" 227 + , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `ExpansionSate a) ) 228 + ; ( "vertScrollState" 229 + , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `VertScrollState a) ) 230 + ; ("windowTop", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WindowTop a)) 231 + ; ("windowLeft", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WindowLeft a)) 232 + ; ("windowBottom", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WindowBottom a)) 233 + ; ("windowRight", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WindowRight a)) 234 + ] 235 + in 236 + generate_catcher ~data_producer (fun ~pos:_ x -> `Head x) 237 + 238 + type outline = 239 + { text: string 240 + ; typ: string option 241 + ; is_comment: bool 242 + ; (* see common attributes *) 243 + is_breakpoint: bool 244 + ; (* see common attributes *) 245 + xml_url: Uri.t option 246 + ; html_url: Uri.t option 247 + ; attrs: Xmlm.attribute list 248 + ; outlines: outline list } 249 + 250 + let outline ?typ ?(is_comment = false) ?(is_breakpoint = false) ?xml_url 251 + ?html_url ?(attrs = []) ?(outlines = []) text = 252 + {text; typ; is_comment; is_breakpoint; xml_url; html_url; attrs; outlines} 253 + 254 + let rec outline_of_node ~xmlbase ((pos, (_, attributes), datas) : node) = 255 + let text = ref "" 256 + and typ = ref None 257 + and is_comment = ref false 258 + and is_breakpoint = ref false 259 + and xml_url = ref None 260 + and html_url = ref None 261 + and attrs = ref [] 262 + and outlines = ref [] in 263 + (* Get xml:base first as it must be used the these attributes too. *) 264 + let xmlbase = xmlbase_of_attr ~xmlbase attributes in 265 + let process_attrs ((name, v) as attr) = 266 + match name with 267 + | _, "text" -> text := v 268 + | _, "type" -> typ := Some v 269 + | _, "isComment" -> ( 270 + try is_comment := bool_of_string v with _ -> 271 + raise (Error.Error (pos, "<isComment> must have true or false value.")) 272 + ) 273 + | _, "isBreakpoint" -> ( 274 + try is_breakpoint := bool_of_string v with _ -> 275 + raise 276 + (Error.Error (pos, "<isBreakpoint> must have true or false value.")) 277 + ) 278 + | _, "xmlUrl" -> ( 279 + try xml_url := Some (XML.resolve ~xmlbase (Uri.of_string v)) with _ -> 280 + raise (Error.Error (pos, "<xmlUrl> content must be an URL")) ) 281 + | _, "htmlUrl" -> ( 282 + try html_url := Some (XML.resolve ~xmlbase (Uri.of_string v)) with _ -> 283 + raise (Error.Error (pos, "<htmlUrl> content must be an URL")) ) 284 + | _ -> attrs := attr :: !attrs 285 + in 286 + List.iter process_attrs attributes ; 287 + let process_outlines = function 288 + | XML.Node (p, (((ns, name), _) as t), d) -> 289 + if ns = "" && name = "outline" then 290 + outlines := outline_of_node ~xmlbase (p, t, d) :: !outlines 291 + | XML.Data _ -> () 292 + in 293 + List.iter process_outlines datas ; 294 + { text= !text 295 + ; typ= !typ 296 + ; is_comment= !is_comment 297 + ; is_breakpoint= !is_breakpoint 298 + ; xml_url= !xml_url 299 + ; html_url= !html_url 300 + ; attrs= !attrs 301 + ; outlines= !outlines } 302 + 303 + let outline_of_xml ~xmlbase a = `Outline (outline_of_node ~xmlbase a) 304 + 305 + let rec outline_of_node' ~xmlbase ((_pos, (_, attributes), datas) : node) = 306 + let el = ref [] in 307 + let xmlbase = xmlbase_of_attr ~xmlbase attributes in 308 + let el_of_attrs (name, v) = 309 + match name with 310 + | _, "text" -> el := `Text v :: !el 311 + | _, "type" -> el := `Type v :: !el 312 + | _, "isComment" -> el := `IsComment v :: !el 313 + | _, "isBreakpoint" -> el := `IsBreakpoint v :: !el 314 + | _, "xmlUrl" -> el := `XML_url (xmlbase, v) :: !el 315 + | _, "htmlUrl" -> el := `HTML_url (xmlbase, v) :: !el 316 + | _, name -> el := `Attr (name, v) :: !el 317 + in 318 + List.iter el_of_attrs attributes ; 319 + let process_outlines = function 320 + | XML.Node (p, (((ns, name), _) as t), d) -> 321 + if ns = "" && name = "outline" then 322 + el := `Outline (outline_of_node' ~xmlbase (p, t, d)) :: !el 323 + | XML.Data _ -> () 324 + in 325 + List.iter process_outlines datas ; 326 + !el 327 + 328 + let outline_of_xml' ~xmlbase a = `Outline (outline_of_node' ~xmlbase a) 329 + 330 + type body = outline list 331 + type body' = [`Outline of outline] 332 + 333 + let make_body ~pos (l : [< body'] list) = 334 + let l = List.map (function `Outline o -> o) l |> List.rev in 335 + if List.length l <> 0 then `Body l 336 + else raise (Error.Error (pos, "Body must contains one <outline> element.")) 337 + 338 + let body_of_xml = 339 + let data_producer = [("outline", outline_of_xml)] in 340 + generate_catcher ~data_producer make_body 341 + 342 + let body_of_xml' = 343 + let data_producer = [("outline", outline_of_xml')] in 344 + generate_catcher ~data_producer (fun ~pos:_ x -> `Body x) 345 + 346 + type t = {version: string; head: head; body: body} 347 + type opml = t 348 + 349 + (* FIXME: @deprecated *) 350 + 351 + type opml' = [`Version of string | `Head of head | `Body of body] 352 + 353 + let make_opml ~pos (l : [< opml'] list) = 354 + let version = 355 + match find (function `Version _ -> true | _ -> false) l with 356 + | Some (`Version v) -> v 357 + | _ -> raise (Error.Error (pos, "Opml tag must have <version> attribut")) 358 + in 359 + let head = 360 + match find (function `Head _ -> true | _ -> false) l with 361 + | Some (`Head h) -> h 362 + | _ -> 363 + raise 364 + (Error.Error (pos, "Opml tag must have exactly one <head> element")) 365 + in 366 + let body = 367 + match find (function `Body _ -> true | _ -> false) l with 368 + | Some (`Body b) -> b 369 + | _ -> 370 + raise 371 + (Error.Error (pos, "Opml tag must have exactly one <body> element")) 372 + in 373 + {version; head; body} 374 + 375 + let opml_of_xml = 376 + let attr_producer = [("version", fun ~xmlbase:_ a -> `Version a)] in 377 + let data_producer = [("head", head_of_xml); ("body", body_of_xml)] in 378 + generate_catcher ~attr_producer ~data_producer make_opml 379 + 380 + let opml_of_xml' = 381 + let attr_producer = [("version", fun ~xmlbase:_ a -> `Version a)] in 382 + let data_producer = [("head", head_of_xml'); ("body", body_of_xml')] in 383 + generate_catcher ~attr_producer ~data_producer (fun ~pos:_ x -> x) 384 + 385 + let find_opml l = 386 + find (function XML.Node (_, t, _) -> tag_is t "opml" | _ -> false) l 387 + 388 + let parse ?xmlbase input = 389 + match XML.of_xmlm input |> snd with 390 + | XML.Node (pos, tag, data) -> ( 391 + if tag_is tag "opml" then opml_of_xml ~xmlbase (pos, tag, data) 392 + else 393 + match find_opml data with 394 + | Some (XML.Node (p, t, d)) -> opml_of_xml ~xmlbase (p, t, d) 395 + | _ -> 396 + raise 397 + (Error.Error 398 + ((0, 0), "document MUST contains exactly one <opml> element")) 399 + ) 400 + | _ -> 401 + raise 402 + (Error.Error 403 + ((0, 0), "document MUST contains exactly one <opml> element")) 404 + 405 + let read ?xmlbase fname = 406 + let fh = open_in fname in 407 + try 408 + let x = parse ?xmlbase (XML.input_of_channel fh) in 409 + close_in fh ; x 410 + with e -> close_in fh ; raise e 411 + 412 + type uri = Uri.t option * string 413 + 414 + let unsafe ?xmlbase input = 415 + match XML.of_xmlm input |> snd with 416 + | XML.Node (pos, tag, data) -> ( 417 + if tag_is tag "opml" then `Opml (opml_of_xml' ~xmlbase (pos, tag, data)) 418 + else 419 + match find_opml data with 420 + | Some (XML.Node (p, t, d)) -> `Opml (opml_of_xml' ~xmlbase (p, t, d)) 421 + | _ -> `Opml [] ) 422 + | _ -> `Opml [] 423 + 424 + (* Output functions *) 425 + 426 + (* Names have have no namespace. This shortcut makes it more readable. *) 427 + let n x = ("", x) 428 + let node name sub = XML.Node (dummy_pos, (n name, []), sub) 429 + let data d = XML.Data (dummy_pos, d) 430 + 431 + let add_node name opt to_string xml = 432 + match opt with 433 + | None -> xml 434 + | Some d -> node name [data (to_string d)] :: xml 435 + 436 + let head_to_xml h = 437 + let xml = 438 + add_node "windowRight" h.window_right string_of_int [] 439 + |> add_node "windowBottom" h.window_bottom string_of_int 440 + |> add_node "windowLeft" h.window_left string_of_int 441 + |> add_node "windowTop" h.window_top string_of_int 442 + |> add_node "vertScrollState" h.vert_scroll_state string_of_int 443 + |> (fun x -> 444 + let c = List.map string_of_int h.expansion_state in 445 + node "expansionState" [data (String.concat "," c)] :: x ) 446 + |> add_node "dateCreated" h.date_created Date.to_rfc822 447 + in 448 + node "title" [data h.title] 449 + :: node "dateModified" [data (Date.to_rfc822 h.date_modified)] 450 + :: node "ownerName" [data h.owner_name] 451 + :: node "ownerEmail" [data h.owner_email] 452 + :: xml 453 + 454 + let add_attr name opt to_string attr = 455 + match opt with None -> attr | Some d -> (n name, to_string d) :: attr 456 + 457 + let id_string (s : string) = s 458 + 459 + let rec outline_to_xml o = 460 + (* isComment and isBreakpoint: absent <=> false *) 461 + let attr = 462 + if o.is_comment then (n "isComment", "true") :: o.attrs else o.attrs 463 + in 464 + let attr = 465 + if o.is_breakpoint then (n "isBreakpoint", "true") :: attr else attr 466 + in 467 + let attr = 468 + (n "text", o.text) :: attr 469 + |> add_attr "type" o.typ id_string 470 + |> add_attr "xmlUrl" o.xml_url Uri.to_string 471 + |> add_attr "htmlUrl" o.html_url Uri.to_string 472 + in 473 + XML.Node (dummy_pos, (n "outline", attr), List.map outline_to_xml o.outlines) 474 + 475 + let to_xml (o : t) = 476 + XML.Node 477 + ( dummy_pos 478 + , (n "opml", [(n "version", o.version)]) 479 + , [ node "head" (head_to_xml o.head) 480 + ; node "body" (List.map outline_to_xml o.body) ] ) 481 + 482 + let output opml dest = XML.to_xmlm (to_xml opml) (XML.make_output dest) 483 + 484 + let write opml fname = 485 + let fh = open_out fname in 486 + try 487 + output opml (`Channel fh) ; 488 + close_out fh 489 + with e -> close_out fh ; raise e 490 + 491 + (* Creation from atom feeds *) 492 + 493 + (* Remove all tags *) 494 + let rec add_to_buffer buf = function 495 + | XML.Node (_, _, subs) -> List.iter (add_to_buffer buf) subs 496 + | XML.Data (_, d) -> Buffer.add_string buf d 497 + 498 + let xhtml_to_string ~buf xhtml = 499 + Buffer.clear buf ; 500 + List.iter (add_to_buffer buf) xhtml ; 501 + Buffer.contents buf 502 + 503 + let string_of_text_construct ~buf = function 504 + (* FIXME: Once we use a proper HTML library, we probably would like to parse 505 + the HTML and remove the tags *) 506 + | (Syndic_atom.Text s : Syndic_atom.text_construct) | Syndic_atom.Html (_, s) 507 + -> 508 + s 509 + | Syndic_atom.Xhtml (_, x) -> xhtml_to_string ~buf x 510 + 511 + let rec first_non_empty = function 512 + | a :: tl -> 513 + if a.Syndic_atom.name = "" then first_non_empty tl 514 + else a.Syndic_atom.name 515 + | [] -> "" 516 + 517 + let outine_of_feed ~buf (f : Syndic_atom.feed) = 518 + let open Syndic_atom in 519 + let author = 520 + match f.authors with 521 + | _ :: _ -> first_non_empty f.authors 522 + | [] -> ( 523 + match f.entries with 524 + | e :: _ -> 525 + let a0, a = e.authors in 526 + if a0.name = "" then first_non_empty a else a0.name 527 + | [] -> "" ) 528 + in 529 + let title = string_of_text_construct ~buf f.title in 530 + let xml_url, is_comment = 531 + try 532 + let l = List.find (fun l -> l.rel = Self) f.links in 533 + let is_comment = 534 + match l.length with Some len -> len < 0 | None -> false 535 + in 536 + (Some l.href, is_comment) 537 + with Not_found -> (None, false) 538 + in 539 + outline ~typ:"rss" ~is_comment 540 + ~attrs:[(("", "title"), title)] 541 + ?xml_url author 542 + 543 + let of_atom ~head feeds = 544 + let buf = Buffer.create 1024 in 545 + {version= "1.1"; head; body= List.map (outine_of_feed ~buf) feeds}

+177

stack/syndic/lib/syndic_opml1.mli

··· 1 + (** [Syndic.Opml1]: compliant with {{:http://dev.opml.org/spec1.html} OPML 2 + 1.0}. 3 + 4 + The purpose of the {i Outline Processor Markup Language}, or OPML, is to 5 + provide a way to exchange information between outliners and Internet 6 + services that can be browsed or controlled through an outliner. Outlines 7 + can be used for specifications, legal briefs, product plans, presentations, 8 + screenplays, directories, diaries, discussion groups, chat systems and 9 + stories. *) 10 + 11 + module Error : module type of Syndic_error 12 + 13 + type head = 14 + { title: string (** Title of the document. *) 15 + ; date_created: Syndic_date.t option 16 + (** A date-time indicating when the document was created. *) 17 + ; date_modified: Syndic_date.t 18 + (** A date-time indicating when the document was last modified. *) 19 + ; owner_name: string (** Owner of the document. *) 20 + ; owner_email: string (** Email address of the owner of the document. *) 21 + ; expansion_state: int list 22 + (** A comma-separated list of line numbers that are expanded. The line 23 + numbers in the list tell you which headlines to expand. The order 24 + is important. For each element in the list, X, starting at the 25 + first summit, navigate flatdown X times and expand. Repeat for each 26 + element in the list. *) 27 + ; vert_scroll_state: int option 28 + (** A number saying which line of the outline is displayed on the top 29 + line of the window. This number is calculated with the expansion 30 + state already applied. *) 31 + ; window_top: int option 32 + (** Pixel location of the top edge of the window. *) 33 + ; window_left: int option 34 + (** Pixel location of the left edge of the window. *) 35 + ; window_bottom: int option 36 + (** Pixel location of the bottom edge of the window. *) 37 + ; window_right: int option 38 + (** Pixel location of the right edge of the window. *) } 39 + 40 + val head : 41 + ?date_created:Syndic_date.t 42 + -> ?expansion_state:int list 43 + -> ?vert_scroll_state:int 44 + -> ?window_top:int 45 + -> ?window_left:int 46 + -> ?window_bottom:int 47 + -> ?window_right:int 48 + -> date_modified:Syndic_date.t 49 + -> owner_name:string 50 + -> owner_email:string 51 + -> string 52 + -> head 53 + (** [head ~date_modified ~owner_name ~owner_email title] returns a head. By 54 + default, all optional arguments leave the corresponding fields empty. *) 55 + 56 + type outline = 57 + { text: string 58 + (** String that's displayed when the outline is being browsed or 59 + edited. There is no specific limit on the length of the text 60 + attribute.*) 61 + ; typ: string option 62 + (** "Type" of outline. Says how other attributes of the [outline] are 63 + interpreted. This is application dependent. For example, for news 64 + feed, it is common to have "rss" as the value of this field. *) 65 + ; is_comment: bool 66 + (** Indicates whether the outline is commented or not. By convention if 67 + an outline is commented, all subordinate outlines are considered to 68 + be commented as well. *) 69 + ; is_breakpoint: bool 70 + (** Indicates whether a breakpoint is set on this outline. This 71 + attribute is mainly necessary for outlines used to edit scripts 72 + that execute. *) 73 + ; xml_url: Uri.t option 74 + (** Link to the XML data associated to this outline, typically the RSS 75 + feed. *) 76 + ; html_url: Uri.t option 77 + (** Link to the HTML data associated to this outline, typically the 78 + HTML pages rendering the news feed. *) 79 + ; attrs: Xmlm.attribute list 80 + (** Association list of additional attributes in the outline. *) 81 + ; outlines: outline list 82 + (** List of [outline] elements that are considered sub-items of the 83 + current outline. *) } 84 + 85 + val outline : 86 + ?typ:string 87 + -> ?is_comment:bool 88 + -> ?is_breakpoint:bool 89 + -> ?xml_url:Uri.t 90 + -> ?html_url:Uri.t 91 + -> ?attrs:Xmlm.attribute list 92 + -> ?outlines:outline list 93 + -> string 94 + -> outline 95 + (** [outline text] returns an outline. 96 + 97 + @param is_comment Default: [false]. @param is_breakpoint Default: [false]. 98 + 99 + All the other parameters are bu default empty. *) 100 + 101 + (** List of outline elements. *) 102 + type body = outline list 103 + 104 + type t = 105 + { version: string (** The version of OPML document (should be 1.0 or 1.1) *) 106 + ; head: head 107 + ; body: body } 108 + 109 + val parse : ?xmlbase:Uri.t -> Xmlm.input -> t 110 + (** [parse i] takes [i] and returns an opml record which is the OCaml 111 + representation of the OPML document. *) 112 + 113 + val read : ?xmlbase:Uri.t -> string -> t 114 + (** [read fname] reads the file name [fname] and parses it. For the optional 115 + parameters, see {!parse}. *) 116 + 117 + val to_xml : t -> Syndic_xml.t 118 + (** [to_xml opml] converts the OPML document [opml] to an XML tree. *) 119 + 120 + val output : t -> Xmlm.dest -> unit 121 + (** [output opml dest] writes the XML tree of the OPML document [opml] to 122 + [dest]. *) 123 + 124 + val write : t -> string -> unit 125 + (** [write opml fname] writes the XML tree of the OPML document [opml] to the 126 + file named [fname]. *) 127 + 128 + val of_atom : head:head -> Syndic_atom.feed list -> t 129 + (** [of_atom ~head feeds] returns the OPML list of authors of the atom feeds. 130 + The [text] is the name associated to a feed, i.e. the name of the first 131 + author in the feed authors list or, if empty, the one of the first post. It 132 + is important that the feeds contain a link entry with [rel = Self] for 133 + the OPML document to be able to create a [xml_url] entry pointing to the 134 + feed. 135 + 136 + As a special convention, if the length of the [rel = Self] link is 137 + present and negative, the property [is_comment] is set to [true]. *) 138 + 139 + (**/**) 140 + 141 + (** An URI is given by (xmlbase, uri). The value of [xmlbase], if not [None], 142 + gives the base URI against which [uri] must be resolved if it is relative. *) 143 + type uri = Uri.t option * string 144 + 145 + val unsafe : 146 + ?xmlbase:Uri.t 147 + -> Xmlm.input 148 + -> [> `Opml of [> `Body of [> `Outline of ([> `Text of string 149 + | `Type of string 150 + | `IsBreakpoint of string 151 + | `IsComment of string 152 + | `Outline of 'a 153 + | `XML_url of uri 154 + | `HTML_url of uri 155 + | `Attr of string * string ] 156 + list 157 + as 158 + 'a) ] 159 + list 160 + | `Head of [> `DateCreated of string 161 + | `DateModified of string 162 + | `ExpansionSate of string 163 + | `OwnerEmail of string 164 + | `OwnerName of string 165 + | `Title of string 166 + | `VertScrollState of string 167 + | `WindowBottom of string 168 + | `WindowLeft of string 169 + | `WindowRight of string 170 + | `WindowTop of string ] 171 + list 172 + | `Version of string ] 173 + list ] 174 + (** Analysis without verification. *) 175 + 176 + (** @deprecated Use Syndic.Opml1.t instead. *) 177 + type opml = t

+562

stack/syndic/lib/syndic_rss1.ml

··· 1 + open Syndic_common.XML 2 + open Syndic_common.Util 3 + module XML = Syndic_xml 4 + module Error = Syndic_error 5 + 6 + let namespaces = 7 + ["http://purl.org/rss/1.0/"; "http://www.w3.org/1999/02/22-rdf-syntax-ns#"] 8 + 9 + type title = string 10 + 11 + let make_title ~pos (l : string list) = 12 + let title = 13 + match l with 14 + | d :: _ -> d 15 + | [] -> 16 + raise 17 + (Error.Error 18 + (pos, "The content of <title> MUST be a non-empty string")) 19 + in 20 + `Title title 21 + 22 + let title_of_xml, title_of_xml' = 23 + let leaf_producer ~xmlbase:_ _pos data = data in 24 + ( generate_catcher ~namespaces ~leaf_producer make_title 25 + , generate_catcher ~namespaces ~leaf_producer (fun ~pos:_ x -> `Title x) ) 26 + 27 + type name = string 28 + 29 + let make_name ~pos (l : string list) = 30 + let name = 31 + match l with 32 + | d :: _ -> d 33 + | [] -> 34 + raise 35 + (Error.Error (pos, "The content of <name> MUST be a non-empty string")) 36 + in 37 + `Name name 38 + 39 + let name_of_xml, name_of_xml' = 40 + let leaf_producer ~xmlbase:_ _pos data = data in 41 + ( generate_catcher ~namespaces ~leaf_producer make_name 42 + , generate_catcher ~namespaces ~leaf_producer (fun ~pos:_ x -> `Name x) ) 43 + 44 + type description = string 45 + 46 + let make_description ~pos (l : string list) = 47 + let description = 48 + match l with 49 + | s :: _ -> s 50 + | [] -> 51 + raise 52 + (Error.Error 53 + (pos, "The content of <description> MUST be a non-empty string")) 54 + in 55 + `Description description 56 + 57 + let description_of_xml, description_of_xml' = 58 + let leaf_producer ~xmlbase:_ _pos data = data in 59 + ( generate_catcher ~namespaces ~leaf_producer make_description 60 + , generate_catcher ~namespaces ~leaf_producer (fun ~pos:_ x -> `Description x) 61 + ) 62 + 63 + type channel_image = Uri.t 64 + type channel_image' = [`URI of Uri.t option * string] 65 + 66 + let make_channel_image ~pos (l : [< channel_image'] list) = 67 + let image = 68 + match find (function `URI _ -> true) l with 69 + | Some (`URI (xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u) 70 + | _ -> 71 + raise 72 + (Error.Error 73 + (pos, "The content of <image> MUST be a non-empty string")) 74 + in 75 + `Image image 76 + 77 + let channel_image_of_xml, channel_image_of_xml' = 78 + let attr_producer = [("resource", fun ~xmlbase a -> `URI (xmlbase, a))] in 79 + ( generate_catcher ~namespaces ~attr_producer make_channel_image 80 + , generate_catcher ~namespaces ~attr_producer (fun ~pos:_ x -> `Image x) ) 81 + 82 + type link = Uri.t 83 + type link' = [`URI of Uri.t option * string] 84 + 85 + let make_link ~pos (l : [< link'] list) = 86 + let link = 87 + match find (function `URI _ -> true) l with 88 + | Some (`URI (xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u) 89 + | _ -> 90 + raise 91 + (Error.Error (pos, "The content of <link> MUST be a non-empty string")) 92 + in 93 + `Link link 94 + 95 + let link_of_xml, link_of_xml' = 96 + let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in 97 + ( generate_catcher ~namespaces ~leaf_producer make_link 98 + , generate_catcher ~namespaces ~leaf_producer (fun ~pos:_ x -> `Link x) ) 99 + 100 + type url = Uri.t 101 + type url' = [`URI of Uri.t option * string] 102 + 103 + let make_url ~pos (l : [< url'] list) = 104 + let url = 105 + match find (function `URI _ -> true) l with 106 + | Some (`URI (xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u) 107 + | _ -> 108 + raise 109 + (Error.Error (pos, "The content of <url> MUST be a non-empty string")) 110 + in 111 + `URL url 112 + 113 + let url_of_xml, url_of_xml' = 114 + let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in 115 + ( generate_catcher ~namespaces ~leaf_producer make_url 116 + , generate_catcher ~namespaces ~leaf_producer (fun ~pos:_ x -> `URL x) ) 117 + 118 + type li = Uri.t 119 + type li' = [`URI of Uri.t option * string] 120 + 121 + let make_li ~pos (l : [< li'] list) = 122 + let url = 123 + match find (function `URI _ -> true) l with 124 + | Some (`URI (xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u) 125 + | _ -> 126 + raise 127 + (Error.Error (pos, "Li elements MUST have a 'resource' attribute")) 128 + in 129 + `Li url 130 + 131 + let li_of_xml, li_of_xml' = 132 + let attr_producer = [("resource", fun ~xmlbase a -> `URI (xmlbase, a))] in 133 + ( generate_catcher ~namespaces ~attr_producer make_li 134 + , generate_catcher ~namespaces ~attr_producer (fun ~pos:_ x -> `Li x) ) 135 + 136 + type seq = li list 137 + type seq' = [`Li of li] 138 + 139 + let make_seq ~pos:_ (l : [< seq'] list) = 140 + let li = List.map (function `Li u -> u) l in 141 + `Seq li 142 + 143 + let seq_of_xml = 144 + let data_producer = [("li", li_of_xml)] in 145 + generate_catcher ~namespaces ~data_producer make_seq 146 + 147 + let seq_of_xml' = 148 + let data_producer = [("li", li_of_xml')] in 149 + generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Seq x) 150 + 151 + type items = seq 152 + type items' = [`Seq of seq] 153 + 154 + let make_items ~pos (l : [< items'] list) = 155 + let li = 156 + match find (function `Seq _ -> true) l with 157 + | Some (`Seq l) -> l 158 + | _ -> 159 + raise 160 + (Error.Error 161 + ( pos 162 + , "<items> elements MUST contains exactly one <rdf:Seq> element" 163 + )) 164 + in 165 + `Items li 166 + 167 + let items_of_xml = 168 + let data_producer = [("Seq", seq_of_xml)] in 169 + generate_catcher ~namespaces ~data_producer make_items 170 + 171 + let items_of_xml' = 172 + let data_producer = [("Seq", seq_of_xml')] in 173 + generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Items x) 174 + 175 + type channel_textinput = Uri.t 176 + type channel_textinput' = [`URI of Uri.t option * string] 177 + 178 + let make_textinput ~pos (l : [< channel_textinput'] list) = 179 + let url = 180 + match find (function `URI _ -> true) l with 181 + | Some (`URI (xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u) 182 + | _ -> 183 + raise 184 + (Error.Error 185 + (pos, "Textinput elements MUST have a 'resource' attribute")) 186 + in 187 + `TextInput url 188 + 189 + let channel_textinput_of_xml, channel_textinput_of_xml' = 190 + let attr_producer = [("resource", fun ~xmlbase a -> `URI (xmlbase, a))] in 191 + ( generate_catcher ~namespaces ~attr_producer make_textinput 192 + , generate_catcher ~namespaces ~attr_producer (fun ~pos:_ x -> `TextInput x) 193 + ) 194 + 195 + type channel = 196 + { about: Uri.t 197 + ; (* must be uniq *) 198 + title: title 199 + ; link: link 200 + ; description: description 201 + ; image: channel_image option 202 + ; items: items 203 + ; textinput: channel_textinput option } 204 + 205 + type channel' = 206 + [ `Title of title 207 + | `Link of link 208 + | `Description of description 209 + | `Image of channel_image 210 + | `Items of items 211 + | `TextInput of channel_textinput 212 + | `About of Uri.t ] 213 + 214 + let make_channel ~pos (l : [< channel'] list) = 215 + let about = 216 + match find (function `About _ -> true | _ -> false) l with 217 + | Some (`About u) -> u 218 + | _ -> 219 + raise 220 + (Error.Error (pos, "Channel elements MUST have a 'about' attribute")) 221 + in 222 + let title = 223 + match find (function `Title _ -> true | _ -> false) l with 224 + | Some (`Title s) -> s 225 + | _ -> 226 + raise 227 + (Error.Error 228 + ( pos 229 + , "<channel> elements MUST contains exactly one <title> element" 230 + )) 231 + in 232 + let link = 233 + match find (function `Link _ -> true | _ -> false) l with 234 + | Some (`Link u) -> u 235 + | _ -> 236 + raise 237 + (Error.Error 238 + ( pos 239 + , "<channel> elements MUST contains exactly one <link> element" )) 240 + in 241 + let description = 242 + match find (function `Description _ -> true | _ -> false) l with 243 + | Some (`Description s) -> s 244 + | _ -> 245 + raise 246 + (Error.Error 247 + ( pos 248 + , "<channel> elements MUST contains exactly one <description> \ 249 + element" )) 250 + in 251 + let image = 252 + match find (function `Image _ -> true | _ -> false) l with 253 + | Some (`Image i) -> Some i 254 + | _ -> None 255 + in 256 + let items = 257 + match find (function `Items _ -> true | _ -> false) l with 258 + | Some (`Items l) -> l 259 + | _ -> 260 + raise 261 + (Error.Error 262 + ( pos 263 + , "<channel> elements MUST contains exactly one <items> element" 264 + )) 265 + in 266 + let textinput = 267 + match find (function `TextInput _ -> true | _ -> false) l with 268 + | Some (`TextInput u) -> Some u 269 + | _ -> None 270 + in 271 + `Channel 272 + ({about; title; link; description; image; items; textinput} : channel) 273 + 274 + let about_of_xml ~xmlbase a = `About (XML.resolve ~xmlbase (Uri.of_string a)) 275 + let about_of_xml' ~xmlbase a = `About (xmlbase, a) 276 + 277 + let channel_of_xml = 278 + let data_producer = 279 + [ ("title", title_of_xml); ("link", link_of_xml) 280 + ; ("description", description_of_xml) 281 + ; ("image", channel_image_of_xml) 282 + ; ("items", items_of_xml) 283 + ; ("textinput", channel_textinput_of_xml) ] 284 + in 285 + let attr_producer = [("about", about_of_xml)] in 286 + generate_catcher ~namespaces ~attr_producer ~data_producer make_channel 287 + 288 + let channel_of_xml' = 289 + let data_producer = 290 + [ ("title", title_of_xml'); ("link", link_of_xml') 291 + ; ("description", description_of_xml') 292 + ; ("image", channel_image_of_xml') 293 + ; ("items", items_of_xml') 294 + ; ("textinput", channel_textinput_of_xml') ] 295 + in 296 + let attr_producer = [("about", about_of_xml')] in 297 + generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos:_ x -> 298 + `Channel x ) 299 + 300 + type image = {about: Uri.t; title: title; url: url; link: link} 301 + type image' = [`Title of title | `Link of link | `URL of url | `About of Uri.t] 302 + 303 + let make_image ~pos (l : [< image'] list) = 304 + let title = 305 + match find (function `Title _ -> true | _ -> false) l with 306 + | Some (`Title t) -> t 307 + | _ -> 308 + raise 309 + (Error.Error 310 + (pos, "<image> elements MUST contains exactly one <title> element")) 311 + in 312 + let link = 313 + match find (function `Link _ -> true | _ -> false) l with 314 + | Some (`Link u) -> u 315 + | _ -> 316 + raise 317 + (Error.Error 318 + (pos, "<image> elements MUST contains exactly one <link> element")) 319 + in 320 + let url = 321 + match find (function `URL _ -> true | _ -> false) l with 322 + | Some (`URL u) -> u 323 + | _ -> 324 + raise 325 + (Error.Error 326 + (pos, "<image> elements MUST contains exactly one <url> element")) 327 + in 328 + let about = 329 + match find (function `About _ -> true | _ -> false) l with 330 + | Some (`About a) -> a 331 + | _ -> 332 + raise 333 + (Error.Error (pos, "Image elements MUST have a 'about' attribute")) 334 + in 335 + `Image ({about; title; url; link} : image) 336 + 337 + let image_of_xml = 338 + let data_producer = 339 + [("title", title_of_xml); ("link", link_of_xml); ("url", url_of_xml)] 340 + in 341 + let attr_producer = [("about", about_of_xml)] in 342 + generate_catcher ~namespaces ~attr_producer ~data_producer make_image 343 + 344 + let image_of_xml' = 345 + let data_producer = 346 + [("title", title_of_xml'); ("link", link_of_xml'); ("url", url_of_xml')] 347 + in 348 + let attr_producer = [("about", about_of_xml')] in 349 + generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos:_ x -> 350 + `Image x ) 351 + 352 + type item = 353 + {about: Uri.t; title: title; link: link; description: description option} 354 + 355 + type item' = 356 + [ `Title of title 357 + | `Link of link 358 + | `Description of description 359 + | `About of Uri.t ] 360 + 361 + let make_item ~pos (l : [< item'] list) = 362 + let title = 363 + match find (function `Title _ -> true | _ -> false) l with 364 + | Some (`Title t) -> t 365 + | _ -> 366 + raise 367 + (Error.Error 368 + (pos, "<item> elements MUST contains exactly one <title> element")) 369 + in 370 + let link = 371 + match find (function `Link _ -> true | _ -> false) l with 372 + | Some (`Link u) -> u 373 + | _ -> 374 + raise 375 + (Error.Error 376 + (pos, "<item> elements MUST contains exactly one <link> element")) 377 + in 378 + let description = 379 + match find (function `Description _ -> true | _ -> false) l with 380 + | Some (`Description d) -> Some d 381 + | _ -> None 382 + in 383 + let about = 384 + match find (function `About _ -> true | _ -> false) l with 385 + | Some (`About u) -> u 386 + | _ -> 387 + raise 388 + (Error.Error (pos, "Item elements MUST have a 'about' attribute")) 389 + in 390 + `Item ({about; title; link; description} : item) 391 + 392 + let item_of_xml = 393 + let data_producer = 394 + [ ("title", title_of_xml); ("link", link_of_xml) 395 + ; ("description", description_of_xml) ] 396 + in 397 + let attr_producer = [("about", about_of_xml)] in 398 + generate_catcher ~namespaces ~attr_producer ~data_producer make_item 399 + 400 + let item_of_xml' = 401 + let data_producer = 402 + [ ("title", title_of_xml'); ("link", link_of_xml') 403 + ; ("description", description_of_xml') ] 404 + in 405 + let attr_producer = [("about", about_of_xml')] in 406 + generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos:_ x -> 407 + `Item x ) 408 + 409 + type textinput = 410 + {about: Uri.t; title: title; description: description; name: name; link: link} 411 + 412 + type textinput' = 413 + [ `About of Uri.t 414 + | `Title of title 415 + | `Description of description 416 + | `Name of name 417 + | `Link of link ] 418 + 419 + let make_textinput ~pos (l : [< textinput'] list) = 420 + let title = 421 + match find (function `Title _ -> true | _ -> false) l with 422 + | Some (`Title s) -> s 423 + | _ -> 424 + raise 425 + (Error.Error 426 + ( pos 427 + , "<textinput> elements MUST contains exactly one <title> element" 428 + )) 429 + in 430 + let description = 431 + match find (function `Description _ -> true | _ -> false) l with 432 + | Some (`Description s) -> s 433 + | _ -> 434 + raise 435 + (Error.Error 436 + ( pos 437 + , "<textinput> elements MUST contains exactly one <description> \ 438 + element" )) 439 + in 440 + let name = 441 + match find (function `Name _ -> true | _ -> false) l with 442 + | Some (`Name n) -> n 443 + | _ -> 444 + raise 445 + (Error.Error 446 + ( pos 447 + , "<textinput> elements MUST contains exactly one <name> element" 448 + )) 449 + in 450 + let link = 451 + match find (function `Link _ -> true | _ -> false) l with 452 + | Some (`Link u) -> u 453 + | _ -> 454 + raise 455 + (Error.Error 456 + ( pos 457 + , "<textinput> elements MUST contains exactly one <link> element" 458 + )) 459 + in 460 + let about = 461 + match find (function `About _ -> true | _ -> false) l with 462 + | Some (`About u) -> u 463 + | _ -> 464 + raise 465 + (Error.Error (pos, "Textinput elements MUST have a 'about' attribute")) 466 + in 467 + `TextInput ({about; title; description; name; link} : textinput) 468 + 469 + let textinput_of_xml = 470 + let data_producer = 471 + [ ("title", title_of_xml) 472 + ; ("description", description_of_xml) 473 + ; ("name", name_of_xml); ("link", link_of_xml) ] 474 + in 475 + let attr_producer = [("about", about_of_xml)] in 476 + generate_catcher ~namespaces ~attr_producer ~data_producer make_textinput 477 + 478 + let textinput_of_xml' = 479 + let data_producer = 480 + [ ("title", title_of_xml') 481 + ; ("description", description_of_xml') 482 + ; ("name", name_of_xml'); ("link", link_of_xml') ] 483 + in 484 + let attr_producer = [("about", about_of_xml')] in 485 + generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos:_ x -> 486 + `TextInput x ) 487 + 488 + type rdf = 489 + { channel: channel 490 + ; image: image option 491 + ; item: item list 492 + ; textinput: textinput option } 493 + 494 + type rdf' = 495 + [ `Channel of channel 496 + | `Image of image 497 + | `Item of item 498 + | `TextInput of textinput ] 499 + 500 + let make_rdf ~pos (l : [< rdf'] list) = 501 + let channel = 502 + match find (function `Channel _ -> true | _ -> false) l with 503 + | Some (`Channel channel) -> channel 504 + | _ -> 505 + raise 506 + (Error.Error 507 + (pos, "<rdf> elements MUST contains exactly one <channel> element")) 508 + in 509 + let image = 510 + match find (function `Image _ -> true | _ -> false) l with 511 + | Some (`Image image) -> Some image 512 + | _ -> None 513 + in 514 + let textinput = 515 + match find (function `TextInput _ -> true | _ -> false) l with 516 + | Some (`TextInput textinput) -> Some textinput 517 + | _ -> None 518 + in 519 + let item = 520 + List.fold_left (fun acc -> function `Item x -> x :: acc | _ -> acc) [] l 521 + in 522 + ({channel; image; item; textinput} : rdf) 523 + 524 + let rdf_of_xml = 525 + let data_producer = 526 + [ ("channel", channel_of_xml) 527 + ; ("image", image_of_xml); ("item", item_of_xml) 528 + ; ("textinput", textinput_of_xml) ] 529 + in 530 + generate_catcher ~namespaces ~data_producer make_rdf 531 + 532 + let rdf_of_xml' = 533 + let data_producer = 534 + [ ("channel", channel_of_xml') 535 + ; ("image", image_of_xml'); ("item", item_of_xml') 536 + ; ("textinput", textinput_of_xml') ] 537 + in 538 + generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> x) 539 + 540 + let parse ?xmlbase input = 541 + match XML.of_xmlm input |> snd with 542 + | XML.Node (pos, tag, datas) when tag_is tag "RDF" -> 543 + rdf_of_xml ~xmlbase (pos, tag, datas) 544 + | _ -> 545 + raise 546 + (Error.Error 547 + ((0, 0), "document MUST contains exactly one <rdf> element")) 548 + 549 + let read ?xmlbase fname = 550 + let fh = open_in fname in 551 + try 552 + let x = parse ?xmlbase (XML.input_of_channel fh) in 553 + close_in fh ; x 554 + with e -> close_in fh ; raise e 555 + 556 + type uri = Uri.t option * string 557 + 558 + let unsafe ?xmlbase input = 559 + match XML.of_xmlm input |> snd with 560 + | XML.Node (pos, tag, datas) when tag_is tag "RDF" -> 561 + `RDF (rdf_of_xml' ~xmlbase (pos, tag, datas)) 562 + | _ -> `RDF []

+244

stack/syndic/lib/syndic_rss1.mli

··· 1 + (** [Syndic.Rss1]: compliant with {{: http://web.resource.org/rss/1.0/spec} RSS 2 + 1.0}. *) 3 + 4 + module Error : module type of Syndic_error 5 + 6 + (** A descriptive title for the channel, image, item and textinput. See RSS 1.0 7 + {{: http://web.resource.org/rss/1.0/spec#s5.3.1} § 5.3.1}, {{: 8 + http://web.resource.org/rss/1.0/spec#s5.4.1} § 5.4.1}, {{: 9 + http://web.resource.org/rss/1.0/spec#s5.5.1} § 5.5.1}, and {{: 10 + http://web.resource.org/rss/1.0/spec#s5.6.1} § 5.6.1}. 11 + 12 + {[ Syntax: <title>{title}</title> Requirement: Required for all Model: 13 + (#PCDATA) (Suggested) Maximum Length: 40 (characters) for channel, image 14 + and textinput (Suggested) Maximum Length: 100 for item ]} *) 15 + type title = string 16 + 17 + (** The text input field's (variable) name. {{: 18 + http://web.resource.org/rss/1.0/spec#s5.6.3} See RSS 1.0 § 5.6.3}. 19 + 20 + {[ Syntax: <name>{textinput_varname}</name> Requirement: Required if 21 + textinput Model: (#PCDATA) (Suggested) Maximum Length: 500 ]} *) 22 + type name = string 23 + 24 + (** This can be - a brief description of the channel's content, function, 25 + source, etc. {{: http://web.resource.org/rss/1.0/spec#s5.3.3} See RSS 1.0 26 + § 5.3.3}; - or a brief description/abstract of the item. {{: 27 + http://web.resource.org/rss/1.0/spec#s5.5.3} See RSS 1.0 § 5.5.3}; - or a 28 + brief description of the textinput field's purpose. For example: "Subscribe 29 + to our newsletter for..." or "Search our site's archive of..." {{: 30 + http://web.resource.org/rss/1.0/spec#s5.6.2} See RSS 1.0 § 5.6.2}. 31 + 32 + {[ Syntax: <description>{description}</description> Requirement: Required 33 + only for channel and textinput Model: (#PCDATA) (Suggested) Maximum Length: 34 + 500 for channel and item (Suggested) Maximum Length: 100 for textinput ]} *) 35 + type description = string 36 + 37 + (** Establishes an RDF association between the optional image element [5.4] and 38 + this particular RSS channel. The rdf:resource's {image_uri} must be the 39 + same as the image element's rdf:about {image_uri}. {{: 40 + http://web.resource.org/rss/1.0/spec#s5.3.4} See RSS 1.0 § 5.3.4} 41 + 42 + {[ Syntax: <image rdf:resource="{image_uri}" /> Requirement: Required only 43 + if image element present Model: Empty ]} *) 44 + type channel_image = Uri.t 45 + 46 + (** The URL of the image to used in the "src" attribute of the channel's image 47 + tag when rendered as HTML. {{: http://web.resource.org/rss/1.0/spec#s5.4.2} 48 + See RSS 1.0 § 5.4.2} 49 + 50 + {[ Syntax: <url>{image_url}</url> Requirement: Required if the image 51 + element is present Model: (#PCDATA) (Suggested) Maximum Length: 500 ]} *) 52 + type url = Uri.t 53 + 54 + (** This can be - The URL to which an HTML rendering of the channel title will 55 + link, commonly the parent site's home or news page. {{: 56 + http://web.resource.org/rss/1.0/spec#s5.3.2} See RSS 1.0 § 5.3.2} - Or the 57 + URL to which an HTML rendering of the channel image will link. This, as 58 + with the channel's title link, is commonly the parent site's home or news 59 + page. {{: http://web.resource.org/rss/1.0/spec#s5.4.3} See RSS 1.0 § 60 + 5.4.3} - Or the item's URL. {{: 61 + http://web.resource.org/rss/1.0/spec#s5.5.2} See RSS 1.0 § 5.5.2} - Or the 62 + URL to which a textinput submission will be directed (using GET). {{: 63 + http://web.resource.org/rss/1.0/spec#s5.6.4} See RSS 1.0 § 5.6.4} 64 + 65 + {[ Syntax: <link>{link}</link> Requirement: Required for all Model: 66 + (#PCDATA) (Suggested) Maximum Length: 500 ]} *) 67 + type link = Uri.t 68 + 69 + (** An RDF table of contents, associating the document's items [5.5] with this 70 + particular RSS channel. Each item's rdf:resource {item_uri} must be the 71 + same as the associated item element's rdf:about {item_uri}. 72 + 73 + An RDF Seq (sequence) is used to contain all the items rather than an RDF 74 + Bag to denote item order for rendering and reconstruction. 75 + 76 + Note that items appearing in the document but not as members of the channel 77 + level items sequence are likely to be discarded by RDF parsers. 78 + 79 + {{: http://web.resource.org/rss/1.0/spec#s5.3.5} See RSS 1.0 § 5.3.5} 80 + 81 + {[ Syntax: <items><rdf:Seq><rdf:li resource="{item_uri}" /> ... 82 + </rdf:Seq></items> Requirement: Required ]} *) 83 + type items = Uri.t list 84 + 85 + (** Establishes an RDF association between the optional textinput element [5.6] 86 + and this particular RSS channel. The {textinput_uri} rdf:resource must be 87 + the same as the textinput element's rdf:about {textinput_uri}. 88 + 89 + {{: http://web.resource.org/rss/1.0/spec#s5.3.6} See RSS 1.0 § 5.3.6} 90 + 91 + {[ Syntax: <textinput rdf:resource="{textinput_uri}" /> Requirement: 92 + Required only if texinput element present Model: Empty ]} *) 93 + type channel_textinput = Uri.t 94 + 95 + (** The channel element contains metadata describing the channel itself, 96 + including a title, brief description, and URL link to the described 97 + resource (the channel provider's home page, for instance). The \{resource\} 98 + URL of the channel element's rdf:about attribute must be unique with 99 + respect to any other rdf:about attributes in the RSS document and is a URI 100 + which identifies the channel. Most commonly, this is either the URL of the 101 + homepage being described or a URL where the RSS file can be found. 102 + 103 + {{: http://web.resource.org/rss/1.0/spec#s5.3} See RSS 1.0 § 5.3} 104 + 105 + {[ Syntax: <channel rdf:about="{resource}"> Requirement: Required Required 106 + Attribute(s): rdf:about Model: (title, link, description, image?, items, 107 + textinput?) ]} *) 108 + type channel = 109 + { about: Uri.t (** must be unique *) 110 + ; title: title 111 + ; link: link 112 + ; description: description 113 + ; image: channel_image option 114 + ; items: items 115 + ; textinput: channel_textinput option } 116 + 117 + (** An image to be associated with an HTML rendering of the channel. This image 118 + should be of a format supported by the majority of Web browsers. While the 119 + later 0.91 specification allowed for a width of 1–144 and height of 120 + 1–400, convention (and the 0.9 specification) dictate 88×31. 121 + 122 + {{: http://web.resource.org/rss/1.0/spec#s5.4} See RSS 1.0 § 5.4} 123 + 124 + {[ Syntax: <image rdf:about="{image_uri}"> Requirement: Optional; if 125 + present, must also be present in channel element [5.3.4] Required 126 + Attribute(s): rdf:about Model: (title, url, link) ]} *) 127 + type image = {about: Uri.t; title: title; url: url; link: link} 128 + 129 + (** While commonly a news headline, with RSS 1.0's modular extensibility, this 130 + can be just about anything: discussion posting, job listing, software patch 131 + -- any object with a URI. There may be a minimum of one item per RSS 132 + document. While RSS 1.0 does not enforce an upper limit, for backward 133 + compatibility with RSS 0.9 and 0.91, a maximum of fifteen items is 134 + recommended. 135 + 136 + [about] must be unique with respect to any other rdf:about attributes in 137 + the RSS document and is a URI which identifies the item. The value of 138 + [about] should be identical to the value of the [link], if possible. 139 + 140 + {{: http://web.resource.org/rss/1.0/spec#s5.5} See RSS 1.0 § 5.5} 141 + 142 + {[ Syntax: <item rdf:about="{item_uri}"> Requirement: >= 1 Recommendation 143 + (for backward compatibility with 0.9x): 1-15 Required Attribute(s): 144 + rdf:about Model: (title, link, description?) ]} *) 145 + type item = 146 + {about: Uri.t; title: title; link: link; description: description option} 147 + 148 + (** The textinput element affords a method for submitting form data to an 149 + arbitrary URL — usually located at the parent website. The form processor 150 + at the receiving end only is assumed to handle the HTTP GET method. 151 + 152 + The field is typically used as a search box or subscription form — among 153 + others. While this is of some use when RSS documents are rendered as 154 + channels (see MNN) and accompanied by human readable title and description, 155 + the ambiguity in automatic determination of meaning of this overloaded 156 + element renders it otherwise not particularly useful. RSS 1.0 therefore 157 + suggests either deprecation or augmentation with some form of resource 158 + discovery of this element in future versions while maintaining it for 159 + backward compatiblity with RSS 0.9. 160 + 161 + [about] must be unique with respect to any other rdf:about attributes in 162 + the RSS document and is a URI which identifies the textinput. [about] 163 + should be identical to the value of the [link], if possible. 164 + 165 + {{: http://web.resource.org/rss/1.0/spec#s5.6} See RSS 1.0 § 5.6 } 166 + 167 + {[ Syntax: <textinput rdf:about="{textinput_uri}"> Requirement: Optional; 168 + if present, must also be present in channel element [5.3.6] Required 169 + Attribute(s): rdf:about Model: (title, description, name, link) ]} *) 170 + type textinput = 171 + {about: Uri.t; title: title; description: description; name: name; link: link} 172 + 173 + (** The outermost level in every RSS 1.0 compliant document is the RDF element. 174 + The opening RDF tag assocaties the rdf: namespace prefix with the RDF 175 + syntax schema and establishes the RSS 1.0 schema as the default namespace 176 + for the document. 177 + 178 + While any valid namespace prefix may be used, document creators are advised 179 + to consider "rdf:" normative. Those wishing to be strictly 180 + backward-compatible with RSS 0.9 must use "rdf:". 181 + 182 + {{: http://web.resource.org/rss/1.0/spec#s5.2} See RSS 1.0 § 5.2} 183 + 184 + {[ Syntax: <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 185 + xmlns="http://purl.org/rss/1.0/"> Requirement: Required exactly as shown, 186 + aside from any additional namespace declarations Model: (channel, image?, 187 + item+, textinput?) ]} *) 188 + type rdf = 189 + { channel: channel 190 + ; image: image option 191 + ; item: item list 192 + ; textinput: textinput option } 193 + 194 + val parse : ?xmlbase:Uri.t -> Xmlm.input -> rdf 195 + (** [parse xml] returns the RDF corresponding to [xml]. 196 + 197 + @raise Error.raise_expectation if [xml] is not a valid RSS1 document. 198 + 199 + @param xmlbase the base URI against which relative URIs in the XML RSS1 200 + document are resolved. It is superseded by xml:base present in the document 201 + (if any). *) 202 + 203 + val read : ?xmlbase:Uri.t -> string -> rdf 204 + (** [read fname] reads the file name [fname] and parses it. For the optional 205 + parameters, see {!parse}. *) 206 + 207 + (**/**) 208 + 209 + (** An URI is given by (xmlbase, uri). The value of [xmlbase], if not [None], 210 + gives the base URI against which [uri] must be resolved if it is relative. *) 211 + type uri = Uri.t option * string 212 + 213 + val unsafe : 214 + ?xmlbase:Uri.t 215 + -> Xmlm.input 216 + -> [> `RDF of [> `Channel of [> `About of uri 217 + | `Description of string list 218 + | `Image of [> `URI of uri] list 219 + | `Items of [> `Seq of [> `Li of [> `URI of uri] 220 + list ] 221 + list ] 222 + list 223 + | `Link of [> `URI of uri] list 224 + | `TextInput of [> `URI of uri] list 225 + | `Title of string list ] 226 + list 227 + | `Image of [> `About of uri 228 + | `Link of [> `URI of uri] list 229 + | `Title of string list 230 + | `URL of [> `URI of uri] list ] 231 + list 232 + | `Item of [> `About of uri 233 + | `Description of string list 234 + | `Link of [> `URI of uri] list 235 + | `Title of string list ] 236 + list 237 + | `TextInput of [> `About of uri 238 + | `Description of string list 239 + | `Link of [> `URI of uri] list 240 + | `Name of string list 241 + | `Title of string list ] 242 + list ] 243 + list ] 244 + (** Analysis without verification, enjoy ! *)

+1271

stack/syndic/lib/syndic_rss2.ml

··· 1 + open Syndic_common.XML 2 + open Syndic_common.Util 3 + module XML = Syndic_xml 4 + module Atom = Syndic_atom 5 + module Date = Syndic_date 6 + module Error = Syndic_error 7 + 8 + type image = 9 + { url: Uri.t 10 + ; title: string 11 + ; link: Uri.t 12 + ; width: int 13 + ; (* default 88 *) 14 + height: int 15 + ; (* default 31 *) 16 + description: string option } 17 + 18 + type image' = 19 + [ `URL of Uri.t 20 + | `Title of string 21 + | `Link of Uri.t 22 + | `Width of int 23 + | `Height of int 24 + | `Description of string ] 25 + 26 + let make_image ~pos (l : [< image'] list) = 27 + let url = 28 + match find (function `URL _ -> true | _ -> false) l with 29 + | Some (`URL u) -> u 30 + | _ -> 31 + raise 32 + (Error.Error 33 + (pos, "<image> elements MUST contains exactly one <url> element")) 34 + in 35 + let title = 36 + match find (function `Title _ -> true | _ -> false) l with 37 + | Some (`Title t) -> t 38 + | _ -> 39 + raise 40 + (Error.Error 41 + (pos, "<image> elements MUST contains exactly one <title> element")) 42 + in 43 + let link = 44 + match find (function `Link _ -> true | _ -> false) l with 45 + | Some (`Link l) -> l 46 + | _ -> 47 + raise 48 + (Error.Error 49 + (pos, "<image> elements MUST contains exactly one <link> element")) 50 + in 51 + let width = 52 + match find (function `Width _ -> true | _ -> false) l with 53 + | Some (`Width w) -> w 54 + | _ -> 88 55 + (* cf. RFC *) 56 + in 57 + let height = 58 + match find (function `Height _ -> true | _ -> false) l with 59 + | Some (`Height h) -> h 60 + | _ -> 31 61 + (* cf. RFC *) 62 + in 63 + let description = 64 + match find (function `Description _ -> true | _ -> false) l with 65 + | Some (`Description s) -> Some s 66 + | _ -> None 67 + in 68 + `Image ({url; title; link; width; height; description} : image) 69 + 70 + let url_of_xml ~xmlbase a = `URL (XML.resolve ~xmlbase (Uri.of_string a)) 71 + let url_of_xml' ~xmlbase a = `URL (xmlbase, a) 72 + 73 + let image_url_of_xml ~xmlbase (pos, _tag, datas) = 74 + try url_of_xml ~xmlbase (get_leaf datas) with Not_found -> 75 + raise 76 + (Error.Error (pos, "The content of <uri> MUST be a non-empty string")) 77 + 78 + let image_title_of_xml ~xmlbase:_ (_pos, _tag, datas) = 79 + `Title (try get_leaf datas with Not_found -> "") 80 + 81 + let image_link_of_xml ~xmlbase (pos, _tag, datas) = 82 + try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 83 + with Not_found -> 84 + raise 85 + (Error.Error (pos, "The content of <link> MUST be a non-empty string")) 86 + 87 + let image_size_of_xml ~max ~xmlbase:_ (pos, tag, datas) = 88 + try 89 + let size = int_of_string (get_leaf datas) in 90 + if size > max then 91 + raise 92 + (Error.Error 93 + ( pos 94 + , "size of " 95 + ^ get_tag_name tag 96 + ^ " exceeded (max is " 97 + ^ string_of_int max 98 + ^ ")" )) 99 + else size 100 + with 101 + | Not_found -> 102 + raise 103 + (Error.Error 104 + ( pos 105 + , "The content of <" 106 + ^ get_tag_name tag 107 + ^ "> MUST be a non-empty string" )) 108 + | Failure _ -> 109 + raise 110 + (Error.Error 111 + (pos, "The content of <" ^ get_tag_name tag ^ "> MUST be an integer")) 112 + 113 + let image_width_of_xml ~xmlbase a = 114 + `Width (image_size_of_xml ~max:144 ~xmlbase a) 115 + 116 + let image_height_of_xml ~xmlbase a = 117 + `Height (image_size_of_xml ~max:400 ~xmlbase a) 118 + 119 + let image_description_of_xml ~xmlbase:_ (pos, _tag, datas) = 120 + try `Description (get_leaf datas) with Not_found -> 121 + raise 122 + (Error.Error 123 + (pos, "The content of <description> MUST be a non-empty string")) 124 + 125 + let image_of_xml = 126 + let data_producer = 127 + [ ("url", image_url_of_xml) 128 + ; ("title", image_title_of_xml) 129 + ; ("link", image_link_of_xml) 130 + ; ("width", image_width_of_xml) 131 + ; ("height", image_height_of_xml) 132 + ; ("description", image_description_of_xml) ] 133 + in 134 + generate_catcher ~data_producer make_image 135 + 136 + let image_of_xml' = 137 + let data_producer = 138 + [ ("url", dummy_of_xml ~ctor:url_of_xml') 139 + ; ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a)) 140 + ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a))) 141 + ; ("width", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Width a)) 142 + ; ("height", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Height a)) 143 + ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a)) 144 + ] 145 + in 146 + generate_catcher ~data_producer (fun ~pos:_ x -> `Image x) 147 + 148 + type cloud = {uri: Uri.t; registerProcedure: string; protocol: string} 149 + 150 + type cloud' = 151 + [ `Domain of string 152 + | `Port of string 153 + | `Path of string 154 + | `RegisterProcedure of string 155 + | `Protocol of string ] 156 + 157 + let make_cloud ~pos (l : [< cloud'] list) = 158 + let domain = 159 + match find (function `Domain _ -> true | _ -> false) l with 160 + | Some (`Domain u) -> u 161 + | _ -> 162 + raise 163 + (Error.Error (pos, "Cloud elements MUST have a 'domain' attribute")) 164 + in 165 + let port = 166 + match find (function `Port _ -> true | _ -> false) l with 167 + | Some (`Port p) -> int_of_string p 168 + | _ -> 169 + raise 170 + (Error.Error (pos, "Cloud elements MUST have a 'port' attribute")) 171 + in 172 + let path = 173 + match find (function `Path _ -> true | _ -> false) l with 174 + | Some (`Path p) -> p 175 + | _ -> 176 + raise 177 + (Error.Error (pos, "Cloud elements MUST have a 'path' attribute")) 178 + in 179 + let registerProcedure = 180 + match find (function `RegisterProcedure _ -> true | _ -> false) l with 181 + | Some (`RegisterProcedure r) -> r 182 + | _ -> 183 + raise 184 + (Error.Error 185 + (pos, "Cloud elements MUST have a 'registerProcedure' attribute")) 186 + in 187 + let protocol = 188 + match find (function `Protocol _ -> true | _ -> false) l with 189 + | Some (`Protocol p) -> p 190 + | _ -> 191 + raise 192 + (Error.Error (pos, "Cloud elements MUST have a 'protocol' attribute")) 193 + in 194 + let uri = Uri.make ~host:domain ~port ~path () in 195 + `Cloud ({uri; registerProcedure; protocol} : cloud) 196 + 197 + let cloud_attr_producer = 198 + [ ("domain", fun ~xmlbase:_ a -> `Domain a) 199 + ; ("port", fun ~xmlbase:_ a -> `Port a) 200 + ; ("path", fun ~xmlbase:_ a -> `Path a) 201 + ; (* XXX: it's RFC compliant ? *) 202 + ("registerProcedure", fun ~xmlbase:_ a -> `RegisterProcedure a) 203 + ; ("protocol", fun ~xmlbase:_ a -> `Protocol a) ] 204 + 205 + let cloud_of_xml = 206 + generate_catcher ~attr_producer:cloud_attr_producer make_cloud 207 + 208 + let cloud_of_xml' = 209 + generate_catcher ~attr_producer:cloud_attr_producer (fun ~pos:_ x -> `Cloud x) 210 + 211 + type textinput = {title: string; description: string; name: string; link: Uri.t} 212 + 213 + type textinput' = 214 + [`Title of string | `Description of string | `Name of string | `Link of Uri.t] 215 + 216 + let make_textinput ~pos (l : [< textinput'] list) = 217 + let title = 218 + match find (function `Title _ -> true | _ -> false) l with 219 + | Some (`Title t) -> t 220 + | _ -> 221 + raise 222 + (Error.Error 223 + ( pos 224 + , "<textinput> elements MUST contains exactly one <title> element" 225 + )) 226 + in 227 + let description = 228 + match find (function `Description _ -> true | _ -> false) l with 229 + | Some (`Description s) -> s 230 + | _ -> 231 + raise 232 + (Error.Error 233 + ( pos 234 + , "<textinput> elements MUST contains exactly one <description> \ 235 + element" )) 236 + in 237 + let name = 238 + match find (function `Name _ -> true | _ -> false) l with 239 + | Some (`Name s) -> s 240 + | _ -> 241 + raise 242 + (Error.Error 243 + ( pos 244 + , "<textinput> elements MUST contains exactly one <name> element" 245 + )) 246 + in 247 + let link = 248 + match find (function `Link _ -> true | _ -> false) l with 249 + | Some (`Link u) -> u 250 + | _ -> 251 + raise 252 + (Error.Error 253 + ( pos 254 + , "<textinput> elements MUST contains exactly one <link> element" 255 + )) 256 + in 257 + `TextInput ({title; description; name; link} : textinput) 258 + 259 + let textinput_title_of_xml ~xmlbase:_ (pos, _tag, datas) = 260 + try `Title (get_leaf datas) with Not_found -> 261 + raise 262 + (Error.Error (pos, "The content of <title> MUST be a non-empty string")) 263 + 264 + let textinput_description_of_xml ~xmlbase:_ (pos, _tag, datas) = 265 + try `Description (get_leaf datas) with Not_found -> 266 + raise 267 + (Error.Error 268 + (pos, "The content of <description> MUST be a non-empty string")) 269 + 270 + let textinput_name_of_xml ~xmlbase:_ (pos, _tag, datas) = 271 + try `Name (get_leaf datas) with Not_found -> 272 + raise 273 + (Error.Error (pos, "The content of <name> MUST be a non-empty string")) 274 + 275 + let textinput_link_of_xml ~xmlbase (pos, _tag, datas) = 276 + try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 277 + with Not_found -> 278 + raise 279 + (Error.Error (pos, "The content of <link> MUST be a non-empty string")) 280 + 281 + let textinput_of_xml = 282 + let data_producer = 283 + [ ("title", textinput_title_of_xml) 284 + ; ("description", textinput_description_of_xml) 285 + ; ("name", textinput_name_of_xml) 286 + ; ("link", textinput_link_of_xml) ] 287 + in 288 + generate_catcher ~data_producer make_textinput 289 + 290 + let textinput_of_xml' = 291 + let data_producer = 292 + [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a)) 293 + ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a)) 294 + ; ("name", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Name a)) 295 + ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a))) ] 296 + in 297 + generate_catcher ~data_producer (fun ~pos:_ x -> `TextInput x) 298 + 299 + type category = {data: string; domain: Uri.t option} 300 + type category' = [`Data of string | `Domain of Uri.t] 301 + 302 + let make_category ~pos:_ (l : [< category'] list) = 303 + let data = 304 + match find (function `Data _ -> true | _ -> false) l with 305 + | Some (`Data s) -> s 306 + | _ -> "" 307 + in 308 + let domain = 309 + match find (function `Domain _ -> true | _ -> false) l with 310 + | Some (`Domain d) -> Some d 311 + | _ -> None 312 + in 313 + `Category ({data; domain} : category) 314 + 315 + let category_of_xml = 316 + let attr_producer = 317 + [("domain", fun ~xmlbase:_ a -> `Domain (Uri.of_string a))] 318 + in 319 + let leaf_producer ~xmlbase:_ _pos data = `Data data in 320 + generate_catcher ~attr_producer ~leaf_producer make_category 321 + 322 + let category_of_xml' = 323 + let attr_producer = [("domain", fun ~xmlbase:_ a -> `Domain a)] in 324 + let leaf_producer ~xmlbase:_ _pos data = `Data data in 325 + generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Category x) 326 + 327 + type enclosure = {url: Uri.t; length: int; mime: string} 328 + type enclosure' = [`URL of Uri.t | `Length of string | `Mime of string] 329 + 330 + let make_enclosure ~pos (l : [< enclosure'] list) = 331 + let url = 332 + match find (function `URL _ -> true | _ -> false) l with 333 + | Some (`URL u) -> u 334 + | _ -> 335 + raise 336 + (Error.Error (pos, "Enclosure elements MUST have a 'url' attribute")) 337 + in 338 + let length = 339 + match find (function `Length _ -> true | _ -> false) l with 340 + | Some (`Length l) -> int_of_string l 341 + | _ -> 342 + raise 343 + (Error.Error 344 + (pos, "Enclosure elements MUST have a 'length' attribute")) 345 + in 346 + let mime = 347 + match find (function `Mime _ -> true | _ -> false) l with 348 + | Some (`Mime m) -> m 349 + | _ -> 350 + raise 351 + (Error.Error (pos, "Enclosure elements MUST have a 'type' attribute")) 352 + in 353 + `Enclosure ({url; length; mime} : enclosure) 354 + 355 + let enclosure_of_xml = 356 + let attr_producer = 357 + [ ("url", url_of_xml) 358 + ; ("length", fun ~xmlbase:_ a -> `Length a) 359 + ; ("type", fun ~xmlbase:_ a -> `Mime a) ] 360 + in 361 + generate_catcher ~attr_producer make_enclosure 362 + 363 + let enclosure_of_xml' = 364 + let attr_producer = 365 + [ ("url", url_of_xml') 366 + ; ("length", fun ~xmlbase:_ a -> `Length a) 367 + ; ("type", fun ~xmlbase:_ a -> `Mime a) ] 368 + in 369 + generate_catcher ~attr_producer (fun ~pos:_ x -> `Enclosure x) 370 + 371 + type guid = {data: Uri.t; (* must be uniq *) permalink: bool (* default true *)} 372 + type guid' = [`Data of Uri.t option * string | `Permalink of string] 373 + 374 + (* Some RSS2 server output <guid isPermaLink="false"></guid> ! *) 375 + let make_guid ~pos:_ (l : [< guid'] list) = 376 + let permalink = 377 + match find (function `Permalink _ -> true | _ -> false) l with 378 + | Some (`Permalink b) -> bool_of_string b 379 + | _ -> true 380 + (* cf. RFC *) 381 + in 382 + match find (function `Data _ -> true | _ -> false) l with 383 + | Some (`Data (xmlbase, u)) -> 384 + if u = "" then `Guid None 385 + else 386 + (* When the GUID is declared as a permlink, resolve it using xml:base *) 387 + let data = 388 + if permalink then XML.resolve ~xmlbase (Uri.of_string u) 389 + else Uri.of_string u 390 + in 391 + `Guid (Some ({data; permalink} : guid)) 392 + | _ -> `Guid None 393 + 394 + let guid_of_xml, guid_of_xml' = 395 + let attr_producer = [("isPermaLink", fun ~xmlbase:_ a -> `Permalink a)] in 396 + let leaf_producer ~xmlbase _pos data = `Data (xmlbase, data) in 397 + ( generate_catcher ~attr_producer ~leaf_producer make_guid 398 + , generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Guid x) ) 399 + 400 + type source = {data: string; url: Uri.t} 401 + type source' = [`Data of string | `URL of Uri.t] 402 + 403 + let make_source ~pos (l : [< source'] list) = 404 + let data = 405 + match find (function `Data _ -> true | _ -> false) l with 406 + | Some (`Data s) -> s 407 + | _ -> 408 + raise 409 + (Error.Error 410 + (pos, "The content of <source> MUST be a non-empty string")) 411 + in 412 + let url = 413 + match find (function `URL _ -> true | _ -> false) l with 414 + | Some (`URL u) -> u 415 + | _ -> 416 + raise 417 + (Error.Error (pos, "Source elements MUST have a 'url' attribute")) 418 + in 419 + `Source ({data; url} : source) 420 + 421 + let source_of_xml = 422 + let attr_producer = [("url", url_of_xml)] in 423 + let leaf_producer ~xmlbase:_ _pos data = `Data data in 424 + generate_catcher ~attr_producer ~leaf_producer make_source 425 + 426 + let source_of_xml' = 427 + let attr_producer = [("url", url_of_xml')] in 428 + let leaf_producer ~xmlbase:_ _pos data = `Data data in 429 + generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Source x) 430 + 431 + type story = 432 + | All of string * Uri.t option * string 433 + | Title of string 434 + | Description of Uri.t option * string 435 + 436 + type item = 437 + { story: story 438 + ; content: Uri.t option * string 439 + ; link: Uri.t option 440 + ; author: string option 441 + ; (* e-mail *) 442 + categories: category list 443 + ; comments: Uri.t option 444 + ; enclosure: enclosure option 445 + ; guid: guid option 446 + ; pubDate: Date.t option 447 + ; (* date *) 448 + source: source option } 449 + 450 + [@@@warning "-34"] 451 + 452 + type item' = 453 + [ `Title of string 454 + | `Description of Uri.t option * string (* xmlbase, description *) 455 + | `Content of Uri.t option * string 456 + | `Link of Uri.t 457 + | `Author of string (* e-mail *) 458 + | `Category of category 459 + | `Comments of Uri.t 460 + | `Enclosure of enclosure 461 + | `Guid of guid 462 + | `PubDate of Date.t 463 + | `Source of source ] 464 + 465 + let make_item ~pos (l : _ list) = 466 + let story = 467 + match 468 + ( find (function `Title _ -> true | _ -> false) l 469 + , find (function `Description _ -> true | _ -> false) l ) 470 + with 471 + | Some (`Title t), Some (`Description (x, d)) -> All (t, x, d) 472 + | Some (`Title t), _ -> Title t 473 + | _, Some (`Description (x, d)) -> Description (x, d) 474 + | _, _ -> 475 + raise (Error.Error (pos, "Item expected <title> or <description> tag")) 476 + in 477 + let content = 478 + match find (function `Content _ -> true | _ -> false) l with 479 + | Some (`Content (x, c)) -> (x, c) 480 + | _ -> (None, "") 481 + in 482 + let link = 483 + match find (function `Link _ -> true | _ -> false) l with 484 + | Some (`Link l) -> l 485 + | _ -> None 486 + in 487 + let author = 488 + match find (function `Author _ -> true | _ -> false) l with 489 + | Some (`Author a) -> Some a 490 + | _ -> None 491 + in 492 + let categories = 493 + let fn = fun acc -> function `Category x -> x :: acc | _ -> acc in 494 + List.fold_left fn [] l |> List.rev 495 + in 496 + let comments = 497 + match find (function `Comments _ -> true | _ -> false) l with 498 + | Some (`Comments c) -> Some c 499 + | _ -> None 500 + in 501 + let enclosure = 502 + match find (function `Enclosure _ -> true | _ -> false) l with 503 + | Some (`Enclosure e) -> Some e 504 + | _ -> None 505 + in 506 + let guid = 507 + match find (function `Guid _ -> true | _ -> false) l with 508 + | Some (`Guid g) -> g 509 + | _ -> None 510 + in 511 + let pubDate = 512 + match find (function `PubDate _ -> true | _ -> false) l with 513 + | Some (`PubDate p) -> Some p 514 + | _ -> None 515 + in 516 + let source = 517 + match find (function `Source _ -> true | _ -> false) l with 518 + | Some (`Source s) -> Some s 519 + | _ -> None 520 + in 521 + `Item 522 + ( { story 523 + ; content 524 + ; link 525 + ; author 526 + ; categories 527 + ; comments 528 + ; enclosure 529 + ; guid 530 + ; pubDate 531 + ; source } 532 + : item ) 533 + 534 + let item_title_of_xml ~xmlbase:_ (pos, _tag, datas) = 535 + try `Title (get_leaf datas) with Not_found -> 536 + raise 537 + (Error.Error (pos, "The content of <title> MUST be a non-empty string")) 538 + 539 + let item_description_of_xml ~xmlbase (_pos, _tag, datas) = 540 + `Description (xmlbase, try get_leaf datas with Not_found -> "") 541 + 542 + let item_content_of_xml ~xmlbase (_pos, _tag, datas) = 543 + `Content (xmlbase, try get_leaf datas with Not_found -> "") 544 + 545 + let item_link_of_xml ~xmlbase (_pos, _tag, datas) = 546 + `Link 547 + ( try Some (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 548 + with Not_found -> None ) 549 + 550 + let item_author_of_xml ~xmlbase:_ (pos, _tag, datas) = 551 + try `Author (get_leaf datas) with Not_found -> 552 + raise 553 + (Error.Error (pos, "The content of <author> MUST be a non-empty string")) 554 + 555 + let item_comments_of_xml ~xmlbase (pos, _tag, datas) = 556 + try `Comments (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 557 + with Not_found -> 558 + raise 559 + (Error.Error (pos, "The content of <comments> MUST be a non-empty string")) 560 + 561 + let item_pubdate_of_xml ~xmlbase:_ (pos, _tag, datas) = 562 + try `PubDate (Date.of_rfc822 (get_leaf datas)) with Not_found -> 563 + raise 564 + (Error.Error (pos, "The content of <pubDate> MUST be a non-empty string")) 565 + 566 + let item_namespaces = [""; "http://purl.org/rss/1.0/modules/content/"] 567 + 568 + let item_of_xml = 569 + let data_producer = 570 + [ ("title", item_title_of_xml) 571 + ; ("description", item_description_of_xml) 572 + ; (* <content:encoded> where 573 + xmlns:content="http://purl.org/rss/1.0/modules/content/" *) 574 + ("encoded", item_content_of_xml) 575 + ; ("link", item_link_of_xml) 576 + ; ("author", item_author_of_xml) 577 + ; ("category", category_of_xml) 578 + ; ("comments", item_comments_of_xml) 579 + ; ("enclosure", enclosure_of_xml) 580 + ; ("guid", guid_of_xml) 581 + ; ("pubDate", item_pubdate_of_xml) 582 + ; ("source", source_of_xml) ] 583 + in 584 + generate_catcher ~data_producer make_item ~namespaces:item_namespaces 585 + 586 + let item_of_xml' = 587 + let data_producer = 588 + [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a)) 589 + ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a)) 590 + ; ("encoded", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Content a)) 591 + ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a))) 592 + ; ("author", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Author a)) 593 + ; ("category", category_of_xml') 594 + ; ("comments", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Comments a)) 595 + ; ("enclosure", enclosure_of_xml') 596 + ; ("guid", guid_of_xml') 597 + ; ("pubdate", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `PubDate a)) 598 + ; ("source", source_of_xml') ] 599 + in 600 + generate_catcher ~data_producer 601 + (fun ~pos:_ x -> `Item x) 602 + ~namespaces:item_namespaces 603 + 604 + type channel = 605 + { title: string 606 + ; link: Uri.t 607 + ; description: string 608 + ; language: string option 609 + ; copyright: string option 610 + ; managingEditor: string option 611 + ; webMaster: string option 612 + ; pubDate: Date.t option 613 + ; lastBuildDate: Date.t option 614 + ; category: string option 615 + ; generator: string option 616 + ; docs: Uri.t option 617 + ; cloud: cloud option 618 + ; ttl: int option 619 + ; image: image option 620 + ; rating: int option 621 + ; textInput: textinput option 622 + ; skipHours: int option 623 + ; skipDays: int option 624 + ; items: item list } 625 + 626 + type channel' = 627 + [ `Title of string 628 + | `Link of Uri.t 629 + | `Description of string 630 + | `Language of string 631 + | `Copyright of string 632 + | `ManagingEditor of string 633 + | `WebMaster of string 634 + | `PubDate of Date.t 635 + | `LastBuildDate of Date.t 636 + | `Category of string 637 + | `Generator of string 638 + | `Docs of Uri.t 639 + | `Cloud of cloud 640 + | `TTL of int 641 + | `Image of image 642 + | `Rating of int 643 + | `TextInput of textinput 644 + | `SkipHours of int 645 + | `SkipDays of int 646 + | `Item of item ] 647 + 648 + let make_channel ~pos (l : [< channel'] list) = 649 + let title = 650 + match find (function `Title _ -> true | _ -> false) l with 651 + | Some (`Title t) -> t 652 + | _ -> 653 + raise 654 + (Error.Error 655 + ( pos 656 + , "<channel> elements MUST contains exactly one <title> element" 657 + )) 658 + in 659 + let link = 660 + match find (function `Link _ -> true | _ -> false) l with 661 + | Some (`Link l) -> l 662 + | _ -> 663 + raise 664 + (Error.Error 665 + ( pos 666 + , "<channel> elements MUST contains exactly one <link> element" )) 667 + in 668 + let description = 669 + match find (function `Description _ -> true | _ -> false) l with 670 + | Some (`Description l) -> l 671 + | _ -> 672 + raise 673 + (Error.Error 674 + ( pos 675 + , "<channel> elements MUST contains exactly one <description> \ 676 + element" )) 677 + in 678 + let language = 679 + match find (function `Language _ -> true | _ -> false) l with 680 + | Some (`Language a) -> Some a 681 + | _ -> None 682 + in 683 + let copyright = 684 + match find (function `Copyright _ -> true | _ -> false) l with 685 + | Some (`Copyright a) -> Some a 686 + | _ -> None 687 + in 688 + let managingEditor = 689 + match find (function `ManagingEditor _ -> true | _ -> false) l with 690 + | Some (`ManagingEditor a) -> Some a 691 + | _ -> None 692 + in 693 + let webMaster = 694 + match find (function `WebMaster _ -> true | _ -> false) l with 695 + | Some (`WebMaster a) -> Some a 696 + | _ -> None 697 + in 698 + let pubDate = 699 + match find (function `PubDate _ -> true | _ -> false) l with 700 + | Some (`PubDate a) -> Some a 701 + | _ -> None 702 + in 703 + let lastBuildDate = 704 + match find (function `LastBuildDate _ -> true | _ -> false) l with 705 + | Some (`LastBuildDate a) -> Some a 706 + | _ -> None 707 + in 708 + let category = 709 + match find (function `Category _ -> true | _ -> false) l with 710 + | Some (`Category a) -> Some a 711 + | _ -> None 712 + in 713 + let generator = 714 + match find (function `Generator _ -> true | _ -> false) l with 715 + | Some (`Generator a) -> Some a 716 + | _ -> None 717 + in 718 + let docs = 719 + match find (function `Docs _ -> true | _ -> false) l with 720 + | Some (`Docs a) -> Some a 721 + | _ -> None 722 + in 723 + let cloud = 724 + match find (function `Cloud _ -> true | _ -> false) l with 725 + | Some (`Cloud a) -> Some a 726 + | _ -> None 727 + in 728 + let ttl = 729 + match find (function `TTL _ -> true | _ -> false) l with 730 + | Some (`TTL a) -> Some a 731 + | _ -> None 732 + in 733 + let image = 734 + match find (function `Image _ -> true | _ -> false) l with 735 + | Some (`Image a) -> Some a 736 + | _ -> None 737 + in 738 + let rating = 739 + match find (function `Rating _ -> true | _ -> false) l with 740 + | Some (`Rating a) -> Some a 741 + | _ -> None 742 + in 743 + let textInput = 744 + match find (function `TextInput _ -> true | _ -> false) l with 745 + | Some (`TextInput a) -> Some a 746 + | _ -> None 747 + in 748 + let skipHours = 749 + match find (function `SkipHours _ -> true | _ -> false) l with 750 + | Some (`SkipHours a) -> Some a 751 + | _ -> None 752 + in 753 + let skipDays = 754 + match find (function `SkipDays _ -> true | _ -> false) l with 755 + | Some (`SkipDays a) -> Some a 756 + | _ -> None 757 + in 758 + let items = 759 + List.fold_left (fun acc -> function `Item x -> x :: acc | _ -> acc) [] l 760 + in 761 + ( { title 762 + ; link 763 + ; description 764 + ; language 765 + ; copyright 766 + ; managingEditor 767 + ; webMaster 768 + ; pubDate 769 + ; lastBuildDate 770 + ; category 771 + ; generator 772 + ; docs 773 + ; cloud 774 + ; ttl 775 + ; image 776 + ; rating 777 + ; textInput 778 + ; skipHours 779 + ; skipDays 780 + ; items } 781 + : channel ) 782 + 783 + let channel_title_of_xml ~xmlbase:_ (pos, _tag, datas) = 784 + try `Title (get_leaf datas) with Not_found -> 785 + raise 786 + (Error.Error (pos, "The content of <title> MUST be a non-empty string")) 787 + 788 + let channel_description_of_xml ~xmlbase:_ (_pos, _tag, datas) = 789 + `Description (try get_leaf datas with Not_found -> "") 790 + 791 + let channel_link_of_xml ~xmlbase (pos, _tag, datas) = 792 + try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 793 + with Not_found -> 794 + raise 795 + (Error.Error (pos, "The content of <link> MUST be a non-empty string")) 796 + 797 + let channel_language_of_xml ~xmlbase:_ (pos, _tag, datas) = 798 + try `Language (get_leaf datas) with Not_found -> 799 + raise 800 + (Error.Error (pos, "The content of <language> MUST be a non-empty string")) 801 + 802 + let channel_copyright_of_xml ~xmlbase:_ (_pos, _tag, datas) = 803 + try `Copyright (get_leaf datas) with Not_found -> `Copyright "" 804 + 805 + (* XXX(dinosaure): aempty copyright is allowed. *) 806 + 807 + let channel_managingeditor_of_xml ~xmlbase:_ (pos, _tag, datas) = 808 + try `ManagingEditor (get_leaf datas) with Not_found -> 809 + raise 810 + (Error.Error 811 + (pos, "The content of <managingEditor> MUST be a non-empty string")) 812 + 813 + let channel_webmaster_of_xml ~xmlbase:_ (pos, _tag, datas) = 814 + try `WebMaster (get_leaf datas) with Not_found -> 815 + raise 816 + (Error.Error 817 + (pos, "The content of <webMaster> MUST be a non-empty string")) 818 + 819 + let channel_pubdate_of_xml ~xmlbase:_ (pos, _tag, datas) = 820 + try `PubDate (Date.of_rfc822 (get_leaf datas)) with Not_found -> 821 + raise 822 + (Error.Error (pos, "The content of <pubDate> MUST be a non-empty string")) 823 + 824 + let channel_lastbuilddate_of_xml ~xmlbase:_ (pos, _tag, datas) = 825 + try `LastBuildDate (Date.of_rfc822 (get_leaf datas)) with Not_found -> 826 + raise 827 + (Error.Error 828 + (pos, "The content of <lastBuildDate> MUST be a non-empty string")) 829 + 830 + let channel_category_of_xml ~xmlbase:_ (pos, _tag, datas) = 831 + try `Category (get_leaf datas) with Not_found -> 832 + raise 833 + (Error.Error (pos, "The content of <category> MUST be a non-empty string")) 834 + 835 + let channel_generator_of_xml ~xmlbase:_ (pos, _tag, datas) = 836 + try `Generator (get_leaf datas) with Not_found -> 837 + raise 838 + (Error.Error 839 + (pos, "The content of <generator> MUST be a non-empty string")) 840 + 841 + let channel_docs_of_xml ~xmlbase (pos, _tag, datas) = 842 + try `Docs (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 843 + with Not_found -> 844 + raise 845 + (Error.Error (pos, "The content of <docs> MUST be a non-empty string")) 846 + 847 + let channel_ttl_of_xml ~xmlbase:_ (pos, _tag, datas) = 848 + try `TTL (int_of_string (get_leaf datas)) with _ -> 849 + raise 850 + (Error.Error 851 + ( pos 852 + , "The content of <ttl> MUST be a non-empty string representing an \ 853 + integer" )) 854 + 855 + let channel_rating_of_xml ~xmlbase:_ (pos, _tag, datas) = 856 + try `Rating (int_of_string (get_leaf datas)) with _ -> 857 + raise 858 + (Error.Error 859 + ( pos 860 + , "The content of <rating> MUST be a non-empty string representing \ 861 + an integer" )) 862 + 863 + let channel_skipHours_of_xml ~xmlbase:_ (pos, _tag, datas) = 864 + try `SkipHours (int_of_string (get_leaf datas)) with _ -> 865 + raise 866 + (Error.Error 867 + ( pos 868 + , "The content of <skipHours> MUST be a non-empty string \ 869 + representing an integer" )) 870 + 871 + let channel_skipDays_of_xml ~xmlbase:_ (pos, _tag, datas) = 872 + try `SkipDays (int_of_string (get_leaf datas)) with _ -> 873 + raise 874 + (Error.Error 875 + ( pos 876 + , "The content of <skipDays> MUST be a non-empty string representing \ 877 + an integer" )) 878 + 879 + let channel_of_xml = 880 + let data_producer = 881 + [ ("title", channel_title_of_xml) 882 + ; ("link", channel_link_of_xml) 883 + ; ("description", channel_description_of_xml) 884 + ; ("Language", channel_language_of_xml) 885 + ; ("copyright", channel_copyright_of_xml) 886 + ; ("managingeditor", channel_managingeditor_of_xml) 887 + ; ("webmaster", channel_webmaster_of_xml) 888 + ; ("pubdate", channel_pubdate_of_xml) 889 + ; ("lastbuilddate", channel_lastbuilddate_of_xml) 890 + ; ("category", channel_category_of_xml) 891 + ; ("generator", channel_generator_of_xml) 892 + ; ("docs", channel_docs_of_xml) 893 + ; ("cloud", cloud_of_xml) 894 + ; ("ttl", channel_ttl_of_xml) 895 + ; ("image", image_of_xml) 896 + ; ("rating", channel_rating_of_xml) 897 + ; ("textinput", textinput_of_xml) 898 + ; ("skiphours", channel_skipHours_of_xml) 899 + ; ("skipdays", channel_skipDays_of_xml) 900 + ; ("item", item_of_xml) ] 901 + in 902 + generate_catcher ~data_producer make_channel 903 + 904 + let channel_of_xml' = 905 + let data_producer = 906 + [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a)) 907 + ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a))) 908 + ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a)) 909 + ; ("Language", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Language a)) 910 + ; ("copyright", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Copyright a)) 911 + ; ( "managingeditor" 912 + , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `ManagingEditor a) ) 913 + ; ("webmaster", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WebMaster a)) 914 + ; ("pubdate", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `PubDate a)) 915 + ; ( "lastbuilddate" 916 + , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `LastBuildDate a) ) 917 + ; ("category", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Category a)) 918 + ; ("generator", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Generator a)) 919 + ; ("docs", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Docs a)) 920 + ; ("cloud", cloud_of_xml') 921 + ; ("ttl", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `TTL a)) 922 + ; ("image", image_of_xml') 923 + ; ("rating", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Rating a)) 924 + ; ("textinput", textinput_of_xml') 925 + ; ("skiphours", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `SkipHours a)) 926 + ; ("skipdays", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `SkipDays a)) 927 + ; ("item", item_of_xml') ] 928 + in 929 + generate_catcher ~data_producer (fun ~pos:_ x -> x) 930 + 931 + let find_channel l = 932 + find 933 + (function 934 + | XML.Node (_pos, tag, _data) -> tag_is tag "channel" 935 + | XML.Data _ -> false) 936 + l 937 + 938 + let parse ?xmlbase input = 939 + match XML.of_xmlm input |> snd with 940 + | XML.Node (pos, tag, data) -> ( 941 + if tag_is tag "channel" then channel_of_xml ~xmlbase (pos, tag, data) 942 + else 943 + match find_channel data with 944 + | Some (XML.Node (p, t, d)) -> channel_of_xml ~xmlbase (p, t, d) 945 + | Some (XML.Data _) | _ -> 946 + raise 947 + (Error.Error 948 + ( (0, 0) 949 + , "document MUST contains exactly one <channel> element" )) ) 950 + | _ -> 951 + raise 952 + (Error.Error 953 + ((0, 0), "document MUST contains exactly one <channel> element")) 954 + 955 + let read ?xmlbase fname = 956 + let fh = open_in fname in 957 + try 958 + let x = parse ?xmlbase (XML.input_of_channel fh) in 959 + close_in fh ; x 960 + with e -> close_in fh ; raise e 961 + 962 + type uri = Uri.t option * string 963 + 964 + let unsafe ?xmlbase input = 965 + match XML.of_xmlm input |> snd with 966 + | XML.Node (pos, tag, data) -> ( 967 + if tag_is tag "channel" then 968 + `Channel (channel_of_xml' ~xmlbase (pos, tag, data)) 969 + else 970 + match find_channel data with 971 + | Some (XML.Node (p, t, d)) -> 972 + `Channel (channel_of_xml' ~xmlbase (p, t, d)) 973 + | Some (XML.Data _) | None -> `Channel [] ) 974 + | _ -> `Channel [] 975 + 976 + (* Conversion to Atom *) 977 + 978 + let map_option o f = match o with None -> None | Some v -> Some (f v) 979 + 980 + (* Assume ASCII or a superset like UTF-8. *) 981 + let valid_local_part = 982 + let is_valid c = 983 + let c = Char.unsafe_chr c in 984 + ('a' <= c && c <= 'z') 985 + || ('A' <= c && c <= 'Z') 986 + || ('0' <= c && c <= '9') 987 + || c = '.' 988 + (* shouldn't be the 1st char and not appear twice consecutively *) 989 + || c = '!' 990 + || c = '#' 991 + || c = '$' 992 + || c = '%' 993 + || c = '&' 994 + || c = '\'' 995 + || c = '*' 996 + || c = '+' 997 + || c = '-' 998 + || c = '/' 999 + || c = '=' 1000 + || c = '?' 1001 + || c = '^' 1002 + || c = '_' 1003 + || c = '`' 1004 + || c = '{' 1005 + || c = '|' 1006 + || c = '}' 1007 + || c = '~' 1008 + in 1009 + Array.init 256 is_valid 1010 + 1011 + let is_valid_local_part c = valid_local_part.(Char.code c) 1012 + 1013 + let valid_domain_part = 1014 + let is_valid c = 1015 + let c = Char.unsafe_chr c in 1016 + ('a' <= c && c <= 'z') 1017 + || ('A' <= c && c <= 'Z') 1018 + || ('0' <= c && c <= '9') 1019 + || c = '.' 1020 + || c = '.' 1021 + in 1022 + Array.init 256 is_valid 1023 + 1024 + let is_valid_domain_part c = valid_domain_part.(Char.code c) 1025 + 1026 + (* Valid range [s.[i]], [i0 ≤ i < i1]. *) 1027 + let sub_no_braces s i0 i1 = 1028 + let i0 = if s.[i0] = '(' then i0 + 1 else i0 in 1029 + let i1 = if s.[i1 - 1] = ')' then i1 - 1 else i1 in 1030 + String.sub s i0 (i1 - i0) 1031 + 1032 + (* The item author sometimes contains the name and email under the form "name 1033 + <email>" or "email (name)". Try to extract both compnents. *) 1034 + let extract_name_email a = 1035 + try 1036 + let i = String.index a '@' in 1037 + (* or Not_found *) 1038 + let len = String.length a in 1039 + let i0 = ref (i - 1) in 1040 + while !i0 >= 0 && is_valid_local_part a.[!i0] do 1041 + decr i0 1042 + done ; 1043 + incr i0 ; 1044 + (* !i0 >= 0 is the first char of the possible email. *) 1045 + let i1 = ref (i + 1) in 1046 + while !i1 < len && is_valid_domain_part a.[!i1] do 1047 + incr i1 1048 + done ; 1049 + if !i0 < i && i + 1 < !i1 then ( 1050 + let email = String.sub a !i0 (!i1 - !i0) in 1051 + if !i0 > 0 && a.[!i0 - 1] = '<' then decr i0 ; 1052 + if !i1 < len && a.[!i1] = '>' then incr i1 ; 1053 + while !i1 < len && a.[!i1] = ' ' do 1054 + incr i1 1055 + done ; 1056 + (* skip spaces *) 1057 + let name = 1058 + if !i0 <= 0 then 1059 + if !i1 >= len then email (* no name *) else sub_no_braces a !i1 len 1060 + else 1061 + (* !i0 > 0 *) 1062 + let name0 = String.trim (String.sub a 0 !i0) in 1063 + if !i1 >= len then name0 else name0 ^ String.sub a !i1 (len - !i1) 1064 + in 1065 + (name, Some email) ) 1066 + else (a, None) 1067 + with Not_found -> (a, None) 1068 + 1069 + let looks_like_a_link u = 1070 + (Uri.scheme u = Some "http" || Uri.scheme u = Some "https") 1071 + && match Uri.host u with None | Some "" -> false | Some _ -> true 1072 + 1073 + let entry_of_item ch_link ch_updated (it : item) : Atom.entry = 1074 + let author = 1075 + match it.author with 1076 + | Some a -> 1077 + let name, email = extract_name_email a in 1078 + {Atom.name; uri= None; email} 1079 + | None -> 1080 + (* If no author is specified for the item, there is little one can do 1081 + just using the RSS2 feed. The user will have to set it using Atom 1082 + convenience functions. *) 1083 + {Atom.name= ""; uri= None; email= None} 1084 + in 1085 + let categories = 1086 + let fn (c : category) = { Atom.term= c.data; scheme= map_option c.domain (fun d -> d); label= None } in 1087 + List.map fn it.categories 1088 + in 1089 + let (title : Atom.title), content = 1090 + match it.story with 1091 + | All (t, xmlbase, d) -> 1092 + let content = 1093 + match it.content with 1094 + | _, "" -> if d = "" then None else Some (Atom.Html (xmlbase, d)) 1095 + | x, c -> Some (Atom.Html (x, c)) 1096 + in 1097 + (Atom.Text t, content) 1098 + | Title t -> 1099 + let content = 1100 + match it.content with 1101 + | _, "" -> None 1102 + | x, c -> Some (Atom.Html (x, c)) 1103 + in 1104 + (Atom.Text t, content) 1105 + | Description (xmlbase, d) -> 1106 + let content = 1107 + match it.content with 1108 + | _, "" -> if d = "" then None else Some (Atom.Html (xmlbase, d)) 1109 + | x, c -> Some (Atom.Html (x, c)) 1110 + in 1111 + (Atom.Text "", content) 1112 + in 1113 + let id = 1114 + match it.guid with 1115 + | Some g -> 1116 + if g.permalink || looks_like_a_link g.data then g.data 1117 + else 1118 + let d = Digest.to_hex (Digest.string (Uri.to_string g.data)) in 1119 + Uri.with_fragment ch_link (Some d) 1120 + | None -> 1121 + (* The [it.link] may not be a permanent link and may also be used by 1122 + other items. We use a digest to make it unique. *) 1123 + let link = match it.link with Some l -> l | None -> ch_link in 1124 + let s = 1125 + match it.story with 1126 + | All (t, _, d) -> t ^ d 1127 + | Title t -> t 1128 + | Description (_, d) -> d 1129 + in 1130 + let d = Digest.to_hex (Digest.string s) in 1131 + Uri.with_fragment link (Some d) 1132 + in 1133 + let links = 1134 + match (it.guid, it.link) with 1135 + | Some g, _ when g.permalink -> [Atom.link g.data ~rel:Atom.Alternate] 1136 + | _, Some l -> [Atom.link l ~rel:Atom.Alternate] 1137 + | Some g, _ -> 1138 + (* Sometimes the guid sets [l.permalink = false] but is nonetheless the 1139 + only URI we have. *) 1140 + if looks_like_a_link g.data then [Atom.link g.data ~rel:Atom.Alternate] 1141 + else [] 1142 + | _, None -> [] 1143 + in 1144 + let links = 1145 + match it.comments with 1146 + | Some l -> 1147 + { Atom.href= l 1148 + ; rel= Atom.Related 1149 + ; type_media= None 1150 + ; hreflang= None 1151 + ; title= "" 1152 + ; length= None } 1153 + :: links 1154 + | None -> links 1155 + in 1156 + let links = 1157 + match it.enclosure with 1158 + | Some e -> 1159 + { Atom.href= e.url 1160 + ; rel= Atom.Enclosure 1161 + ; type_media= Some e.mime 1162 + ; hreflang= None 1163 + ; title= "" 1164 + ; length= Some e.length } 1165 + :: links 1166 + | None -> links 1167 + in 1168 + let source = 1169 + match it.source with 1170 + | Some s -> 1171 + Some 1172 + { Atom.authors= [author] 1173 + ; (* Best guess *) 1174 + categories= [] 1175 + ; contributors= [] 1176 + ; generator= None 1177 + ; icon= None 1178 + ; id= ch_link 1179 + ; (* declared as the ID of the whole channel *) 1180 + links= 1181 + [ { Atom.href= s.url 1182 + ; rel= Atom.Related 1183 + ; type_media= None 1184 + ; hreflang= None 1185 + ; title= "" 1186 + ; length= None } ] 1187 + ; logo= None 1188 + ; rights= None 1189 + ; subtitle= None 1190 + ; title= Atom.Text s.data 1191 + ; updated= None } 1192 + | None -> None 1193 + in 1194 + { Atom.authors= (author, []) 1195 + ; categories 1196 + ; content 1197 + ; contributors= [] 1198 + ; id 1199 + ; links 1200 + ; published= None 1201 + ; rights= None 1202 + ; source 1203 + ; summary= None 1204 + ; title 1205 + ; updated= (match it.pubDate with Some d -> d | None -> ch_updated) } 1206 + 1207 + let more_recent_of_item date (it : item) = 1208 + match (date, it.pubDate) with 1209 + | _, None -> date 1210 + | None, Some _ -> it.pubDate 1211 + | Some d, Some di -> if Date.compare d di >= 0 then date else it.pubDate 1212 + 1213 + let max_date_opt d = function None -> d | Some d' -> Date.max d d' 1214 + 1215 + let to_atom ?self (ch : channel) : Atom.feed = 1216 + let contributors = 1217 + match ch.webMaster with 1218 + | Some p -> [{Atom.name= "Webmaster"; uri= None; email= Some p}] 1219 + | None -> [] 1220 + in 1221 + let contributors = 1222 + match ch.managingEditor with 1223 + | Some p -> 1224 + {Atom.name= "Managing Editor"; uri= None; email= Some p} 1225 + :: contributors 1226 + | None -> contributors 1227 + in 1228 + let links = 1229 + [ { Atom.href= ch.link 1230 + ; rel= Atom.Related 1231 + ; type_media= Some "text/html" 1232 + ; hreflang= None 1233 + ; title= ch.title 1234 + ; length= None } ] 1235 + in 1236 + let links = 1237 + match self with 1238 + | Some self -> 1239 + { Atom.href= self 1240 + ; rel= Atom.Self 1241 + ; type_media= Some "application/rss+xml" 1242 + ; hreflang= None 1243 + ; title= ch.title 1244 + ; length= None } 1245 + :: links 1246 + | None -> links 1247 + in 1248 + let updated = 1249 + match List.fold_left more_recent_of_item None ch.items with 1250 + | None -> max_date_opt Date.epoch ch.lastBuildDate 1251 + | Some d -> max_date_opt d ch.lastBuildDate 1252 + in 1253 + { Atom.authors= [] 1254 + ; categories= 1255 + ( match ch.category with 1256 + | None -> [] 1257 + | Some c -> [{Atom.term= c; scheme= None; label= None}] ) 1258 + ; contributors 1259 + ; generator= 1260 + map_option ch.generator (fun g -> 1261 + {Atom.content= g; version= None; uri= None} ) 1262 + ; icon= None 1263 + ; id= ch.link 1264 + ; (* FIXME: Best we can do? *) 1265 + links 1266 + ; logo= map_option ch.image (fun i -> i.url) 1267 + ; rights= map_option ch.copyright (fun c -> (Atom.Text c : Atom.rights)) 1268 + ; subtitle= None 1269 + ; title= Atom.Text ch.title 1270 + ; updated 1271 + ; entries= List.map (entry_of_item ch.link updated) ch.items }

+351

stack/syndic/lib/syndic_rss2.mli

··· 1 + (** [Syndic.Rss2]: compliant with {{: 2 + http://www.rssboard.org/rss-specification} RSS 2.0}. *) 3 + 4 + module Error : module type of Syndic_error 5 + 6 + (** An [image] is an optional sub-element of {!channel}, which contains three 7 + required ([url], [title], [link]) and three optional ([width], [height], 8 + [description]) sub-elements. 9 + 10 + {{: 11 + http://www.rssboard.org/rss-specification#ltimagegtSubelementOfLtchannelgt} 12 + See RSS 2.0 about <image>}. *) 13 + type image = 14 + { url: Uri.t 15 + (** The URL of a GIF, JPEG or PNG image that represents the channel. *) 16 + ; title: string 17 + (** Describes the image. It's used in the ALT attribute of the HTML 18 + <img> tag when the channel is rendered in HTML. *) 19 + ; link: Uri.t 20 + (** The URL of the site, when the channel is rendered, the image is a 21 + link to the site. (Note, in practice the image [title] and [link] 22 + should have the same value as the {!channel}'s [title] and [link]. *) 23 + ; width: int 24 + (** Width of the image in pixels. Maximum value is 144, default value 25 + is 88. *) 26 + ; height: int 27 + (** Height of the image in pixels. Maximum value is 400, default value 28 + is 31. *) 29 + ; description: string option 30 + (** contains text that is included in the TITLE attribute of the link 31 + formed around the image in the HTML rendering. *) } 32 + 33 + (** [cloud] is an optional sub-element of {!channel}. It specifies a web 34 + service that supports the rssCloud interface which can be implemented in 35 + HTTP-POST, XML-RPC or SOAP 1.1. 36 + 37 + Its purpose is to allow processes to register with a cloud to be notified 38 + of updates to the channel, implementing a lightweight publish-subscribe 39 + protocol for RSS feeds. 40 + 41 + {{: 42 + http://www.rssboard.org/rss-specification#ltcloudgtSubelementOfLtchannelgt} 43 + See RSS 2.0 about <cloud> } 44 + 45 + {[ <cloud domain="rpc.sys.com" port="80" path="/RPC2" 46 + registerProcedure="myCloud.rssPleaseNotify" protocol="xml-rpc" /> ]} 47 + 48 + In this example, to request notification on the channel it appears in, you 49 + would send an XML-RPC message to rpc.sys.com on port 80, with a path of 50 + /RPC2. The procedure to call is myCloud.rssPleaseNotify. *) 51 + type cloud = 52 + { uri: Uri.t (** The URI of the cloud (domain, port, path). *) 53 + ; registerProcedure: string 54 + ; protocol: string } 55 + 56 + (** A {!channel} may optionally contain a [textInput] sub-element, which 57 + contains four required sub-elements. 58 + 59 + The purpose of the <textInput> element is something of a mystery. You can 60 + use it to specify a search engine box. Or to allow a reader to provide 61 + feedback. Most aggregators ignore it. 62 + 63 + {{: 64 + http://www.rssboard.org/rss-specification#lttextinputgtSubelementOfLtchannelgt} 65 + See RSS 2.0 about <textinput>} *) 66 + type textinput = 67 + { title: string (** The label of the Submit button in the text input area. *) 68 + ; description: string (** Explains the text input area. *) 69 + ; name: string (** The name of the text object in the text input area. *) 70 + ; link: Uri.t 71 + (** The URL of the CGI script that processes text input requests. *) } 72 + 73 + (** [category] is an optional sub-element of {!item}. - [data] is A 74 + forward-slash-separated string that identifies a hierarchic location in the 75 + indicated taxonomy. Processors may establish conventions for the 76 + interpretation of categories. - [domain], if provided, a string that 77 + identifies a categorization taxonomy. 78 + 79 + {{: 80 + http://www.rssboard.org/rss-specification#ltcategorygtSubelementOfLtitemgt} 81 + See RSS 2.0 about <category> } 82 + 83 + Two examples are provided below: 84 + 85 + {[ <category>Grateful Dead</category> ]} 86 + 87 + {[ <category domain="http://www.fool.com/cusips">MSFT</category> ]} 88 + 89 + You may include as many category elements as you need to, for different 90 + domains, and to have an item cross-referenced in different parts of the 91 + same domain. *) 92 + type category = {data: string; domain: Uri.t option} 93 + 94 + (** [enclosure] is an optional sub-element of {!item}. It has three required 95 + attributes. - [url] says where the enclosure is located (must be an http 96 + url), - [length] says how big it is in bytes, and - [mime] says what its 97 + type is, a standard MIME type. 98 + 99 + {{: 100 + http://www.rssboard.org/rss-specification#ltenclosuregtSubelementOfLtitemgt} 101 + See RSS 2.0 about <enclosure> } 102 + 103 + {[ <enclosure url="http://www.scripting.com/mp3s/weatherReportSuite.mp3" 104 + length="12216320" type="audio/mpeg" /> ]} *) 105 + type enclosure = {url: Uri.t; length: int; mime: string} 106 + 107 + (** [guid] is an optional sub-element of {!item}. "guid" stands for globally 108 + unique identifier. It's a string that uniquely identifies the item. When 109 + present, an aggregator may choose to use this string to determine if an 110 + item is new. 111 + 112 + {{: http://www.rssboard.org/rss-specification#ltguidgtSubelementOfLtitemgt} 113 + See RSS 2.0 about <guid>} 114 + 115 + {[<guid>http://some.server.com/weblogItem3207</guid>]} 116 + 117 + There are no rules for the syntax of a guid. Aggregators must view them as 118 + a string. It's up to the source of the feed to establish the uniqueness of 119 + the string. 120 + 121 + If [permalink] is [true], the reader may assume that it is a permalink to 122 + the item, that is, a url that can be opened in a Web browser, that points 123 + to the full item described by the <item> element. An example: 124 + 125 + {[<guid 126 + isPermaLink="true">http://inessential.com/2002/09/01.php#a2</guid>]} 127 + 128 + If [permalink] is [false], the guid may not be assumed to be a url, or a 129 + url to anything in particular. *) 130 + type guid = 131 + {data: Uri.t (** Must be unique *); permalink: bool (** default [true] *)} 132 + 133 + (** [source] is an optional sub-element of {!item}. - [data] is the name of the 134 + RSS channel that the item came from, derived from its <title>. - [url] 135 + links to the XMLization of the source. 136 + 137 + The purpose of this element is to propagate credit for links, to publicize 138 + the sources of news items. It can be used in the Post command of an 139 + aggregator. It should be generated automatically when forwarding an item 140 + from an aggregator to a weblog authoring tool. 141 + 142 + {{:http://www.rssboard.org/rss-specification#ltsourcegtSubelementOfLtitemgt} 143 + See RSS 2.0 about <source>} 144 + 145 + {[<source url="http://www.tomalak.org/links2.xml">Tomalak's 146 + Realm</source>]} *) 147 + type source = {data: string; url: Uri.t} 148 + 149 + type story = 150 + | All of string * Uri.t option * string 151 + (** [All(title, xmlbase, description)] *) 152 + | Title of string 153 + | Description of Uri.t option * string 154 + (** [Description(xmlbase, description)] *) 155 + 156 + (** A {!channel} may contain any number of [item]s. An item may represent a 157 + "story" — much like a story in a newspaper or magazine; if so its 158 + description is a synopsis of the story, and the link points to the full 159 + story. An item may also be complete in itself, if so, the description 160 + contains the text (entity-encoded HTML is allowed; see examples), and the 161 + link and title may be omitted. 162 + 163 + - [title] : The title of the item. - [link] : The URL of the item. - 164 + [story] : The item synopsis. - [content] : The possible full story 165 + ([(_,"")] if not present). (Extension of RSS2, see 166 + http://purl.org/rss/1.0/modules/content/) The first element of the couple 167 + is the possible value of xml:base. It can be used to resolve URIs. - 168 + [author] : Email address of the author of the item. - [category] : Includes 169 + the item in one or more categories. - [comments] : URL of a page for 170 + comments relating to the item. - [enclosure] : Describes a media object 171 + that is attached to the item. - [guid] : A string that uniquely identifies 172 + the item. - [pubDate] : Indicates when the item was published. - [source] : 173 + The RSS channel that the item came from. 174 + 175 + {{: http://www.rssboard.org/rss-specification#hrelementsOfLtitemgt} See RSS 176 + 2.0 about <item> } *) 177 + type item = 178 + { story: story 179 + ; content: Uri.t option * string 180 + ; link: Uri.t option 181 + ; author: string option 182 + ; categories: category list 183 + ; comments: Uri.t option 184 + ; enclosure: enclosure option 185 + ; guid: guid option 186 + ; pubDate: Syndic_date.t option 187 + ; source: source option } 188 + 189 + (** Here's a list of the required channel elements, each with a brief 190 + description, an example, and where available, a pointer to a more complete 191 + description. 192 + 193 + - [title]: The name of the channel. It's how people refer to your service. 194 + If you have an HTML website that contains the same information as your RSS 195 + file, the title of your channel should be the same as the title of your 196 + website. - [link]: The URL to the HTML website corresponding to the 197 + channel. - [description]: Phrase or sentence describing the channel. 198 + 199 + Here's a list of optional channel elements. 200 + 201 + - [language]: The language the channel is written in. This allows 202 + aggregators to group all Italian language sites, for example, on a single 203 + page. A list of allowable values for this element, as provided by Netscape, 204 + is here. You may also use values defined by the W3C. - [copyright]: 205 + Copyright notice for content in the channel. - [managingEditor]: Email 206 + address for person responsible for editorial content. - [webMaster]: Email 207 + address for person responsible for technical issues relating to channel. - 208 + [pubDate]: The publication date for the content in the channel. For 209 + example, the New York Times publishes on a daily basis, the publication 210 + date flips once every 24 hours. That's when the pubDate of the channel 211 + changes. All date-times in RSS conform to the Date and Time Specification 212 + of RFC 822, with the exception that the year may be expressed with two 213 + characters or four characters (four preferred). - [lastBuildDate]: The last 214 + time the content of the channel changed. - [category]: Specify one or more 215 + categories that the channel belongs to. Follows the same rules as the 216 + <item>-level category element. See {!category}. - [generator]: A string 217 + indicating the program used to generate the channel. - [docs]: A URL that 218 + points to the documentation for the format used in the RSS file. It's 219 + probably a pointer to [http://www.rssboard.org/rss-specification]. It's for 220 + people who might stumble across an RSS file on a Web server 25 years from 221 + now and wonder what it is. - [cloud]: Allows processes to register with a 222 + cloud to be notified of updates to the channel, implementing a lightweight 223 + publish-subscribe protocol for RSS feeds. See {!cloud}. - [ttl]: ttl stands 224 + for time to live. It's a number of minutes that indicates how long a 225 + channel can be cached before refreshing from the source. - [image]: 226 + Specifies a GIF, JPEG or PNG image that can be displayed with the channel. 227 + See {!image}. - [rating]: The PICS rating for the channel. - [textInput]: 228 + Specifies a text input box that can be displayed with the channel. See 229 + {!textinput}. - [skipHours]: A hint for aggregators telling them which 230 + hours they can skip. This element contains up to 24 <hour> sub-elements 231 + whose value is a number between 0 and 23, representing a time in GMT, when 232 + aggregators, if they support the feature, may not read the channel on hours 233 + listed in the <skipHours> element. The hour beginning at midnight is hour 234 + zero. - [skipDays]: A hint for aggregators telling them which days they can 235 + skip. This element contains up to seven <day> sub-elements whose value is 236 + Monday, Tuesday, Wednesday, Thursday, Friday, Saturday or Sunday. 237 + Aggregators may not read the channel during days listed in the <skipDays> 238 + element. 239 + 240 + {{: http://www.rssboard.org/rss-specification#requiredChannelElements} See 241 + RSS 2.0 about <channel>} *) 242 + type channel = 243 + { title: string 244 + ; link: Uri.t 245 + ; description: string 246 + ; language: string option 247 + ; copyright: string option 248 + ; managingEditor: string option 249 + ; webMaster: string option 250 + ; pubDate: Syndic_date.t option 251 + ; lastBuildDate: Syndic_date.t option 252 + ; category: string option 253 + ; generator: string option 254 + ; docs: Uri.t option 255 + ; cloud: cloud option 256 + ; ttl: int option 257 + (** {{: 258 + http://www.rssboard.org/rss-specification#ltcloudgtSubelementOfLtchannelgt} 259 + See RSS 2.0 about <ttl> } *) 260 + ; image: image option 261 + ; rating: int option 262 + ; (* lol *) 263 + textInput: textinput option 264 + ; skipHours: int option 265 + ; skipDays: int option 266 + ; items: item list } 267 + 268 + val parse : ?xmlbase:Uri.t -> Xmlm.input -> channel 269 + (** [parse xml] returns the channel corresponding to [xml]. 270 + 271 + Raise [Error.Expected], [Error.Size_Exceeded] or [Error.Item_expectation] 272 + if [xml] is not a valid RSS2 document. *) 273 + 274 + val read : ?xmlbase:Uri.t -> string -> channel 275 + (** [read fname] reads the file name [fname] and parses it. For the optional 276 + parameters, see {!parse}. *) 277 + 278 + val to_atom : ?self:Uri.t -> channel -> Syndic_atom.feed 279 + (** [to_atom ch] returns an Atom feed that (mostly) contains the same 280 + information. 281 + 282 + @param self the URI from where the current feed was retrieved. Contrarily 283 + to Atom, RSS2 has no provision to store the URI of the feed itself. Giving 284 + this information will add an entry to the [links] field of Atom feed with 285 + [rel = Self]. *) 286 + 287 + (**/**) 288 + 289 + (** An URI is given by (xmlbase, uri). The value of [xmlbase], if not [None], 290 + gives the base URI against which [uri] must be resolved if it is relative. *) 291 + type uri = Uri.t option * string 292 + 293 + val unsafe : 294 + ?xmlbase:Uri.t 295 + -> Xmlm.input 296 + -> [> `Channel of [> `Category of string 297 + | `Cloud of [> `Domain of string 298 + | `Path of string 299 + | `Port of string 300 + | `Protocol of string 301 + | `RegisterProcedure of string ] 302 + list 303 + | `Copyright of string 304 + | `Description of string 305 + | `Docs of string 306 + | `Generator of string 307 + | `Image of [> `Description of string 308 + | `Height of string 309 + | `Link of uri 310 + | `Title of string 311 + | `URL of uri 312 + | `Width of string ] 313 + list 314 + | `Item of [> `Author of string 315 + | `Category of [> `Data of string 316 + | `Domain of string ] 317 + list 318 + | `Comments of string 319 + | `Description of string 320 + | `Content of string 321 + | `Enclosure of [> `Length of string 322 + | `Mime of string 323 + | `URL of uri ] 324 + list 325 + | `Guid of [> `Data of uri 326 + | `Permalink of string ] 327 + list 328 + | `Link of uri 329 + | `PubDate of string 330 + | `Source of [> `Data of string | `URL of uri] 331 + list 332 + | `Title of string ] 333 + list 334 + | `Language of string 335 + | `LastBuildDate of string 336 + | `Link of uri 337 + | `ManagingEditor of string 338 + | `PubDate of string 339 + | `Rating of string 340 + | `SkipDays of string 341 + | `SkipHours of string 342 + | `TTL of string 343 + | `TextInput of [> `Description of string 344 + | `Link of uri 345 + | `Name of string 346 + | `Title of string ] 347 + list 348 + | `Title of string 349 + | `WebMaster of string ] 350 + list ] 351 + (** Analysis without verification, enjoy ! *)

+126

stack/syndic/lib/syndic_w3c.ml

··· 1 + open Syndic_common.XML 2 + open Syndic_common.Util 3 + module XML = Syndic_xml 4 + module Error = Syndic_error 5 + 6 + type error' = 7 + [ `Line of string 8 + | `Column of string 9 + | `Text of string 10 + | `Element of string 11 + | `Parent of string 12 + | `Value of string ] 13 + 14 + type error 15 + type warning 16 + type 'a kind = Error | Warning 17 + 18 + let error = Error 19 + let warning = Warning 20 + 21 + type 'a t = 22 + { kind: 'a kind (** Error or warning. *) 23 + ; line: int 24 + (** Within the source code of the validated document, refers to the 25 + line where the error was detected. *) 26 + ; column: int 27 + (** Within the source code of the validated document, refers to the 28 + line where the column was detected. *) 29 + ; text: string (** The actual error message. *) 30 + ; element: string 31 + (** Element in the feed where the message was triggered. *) 32 + ; parent: string (** In the feed, parent of the element. *) 33 + ; value: string 34 + (** If applicable the value of the element, attribute or content which 35 + triggered the message. *) } 36 + 37 + let feed_url = Uri.of_string "http://validator.w3.org/feed/check.cgi" 38 + 39 + let url d = 40 + let q = [("output", ["soap12"])] in 41 + let q = 42 + match d with 43 + | `Data data -> ("rawdata", [data]) :: q 44 + | `Uri uri -> [("url", [Uri.to_string uri])] 45 + in 46 + Uri.with_query feed_url q 47 + 48 + let make_error ~kind ~pos:_ (l : [< error'] list) = 49 + let line = 50 + match find (function `Line _ -> true | _ -> false) l with 51 + | Some (`Line line) -> ( try int_of_string line with _ -> 0 ) 52 + | _ -> 0 53 + in 54 + let column = 55 + match find (function `Column _ -> true | _ -> false) l with 56 + | Some (`Column column) -> ( try int_of_string column with _ -> 0 ) 57 + | _ -> 0 58 + in 59 + let text = 60 + match find (function `Text _ -> true | _ -> false) l with 61 + | Some (`Text text) -> text 62 + | _ -> "" 63 + in 64 + let element = 65 + match find (function `Element _ -> true | _ -> false) l with 66 + | Some (`Element element) -> element 67 + | _ -> "" 68 + in 69 + let parent = 70 + match find (function `Parent _ -> true | _ -> false) l with 71 + | Some (`Parent parent) -> parent 72 + | _ -> "" 73 + in 74 + let value = 75 + match find (function `Value _ -> true | _ -> false) l with 76 + | Some (`Value value) -> value 77 + | _ -> "" 78 + in 79 + ({kind; line; column; text; element; parent; value} : _ t) 80 + 81 + let error_data_producer = 82 + [ ("line", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Line a)) 83 + ; ("column", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Column a)) 84 + ; ("text", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Text a)) 85 + ; ("element", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Element a)) 86 + ; ("parent", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Parent a)) 87 + ; ("value", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Value a)) ] 88 + 89 + let error_of_xml ~kind = 90 + generate_catcher ~data_producer:error_data_producer (make_error ~kind) 91 + 92 + let make_errorlist ~pos:_ (l : _ t list) = l 93 + 94 + let errorlist_of_xml = 95 + let data_producer = [("error", error_of_xml ~kind:Error)] in 96 + generate_catcher ~data_producer ~xmlbase:None make_errorlist 97 + 98 + let warninglist_of_xml = 99 + let data_producer = [("warning", error_of_xml ~kind:Warning)] in 100 + generate_catcher ~data_producer ~xmlbase:None make_errorlist 101 + 102 + let find_errorlist l = 103 + recursive_find 104 + (function XML.Node (_, t, _) -> tag_is t "errorlist" | _ -> false) 105 + l 106 + 107 + let find_warninglist l = 108 + recursive_find 109 + (function XML.Node (_, t, _) -> tag_is t "warninglist" | _ -> false) 110 + l 111 + 112 + let to_error {line; column; text; _} = ((line, column), text) 113 + 114 + let parse input = 115 + let _, xml = XML.of_xmlm input in 116 + let err = 117 + match find_errorlist xml with 118 + | Some (XML.Node (p, t, d)) -> errorlist_of_xml (p, t, d) 119 + | _ -> [] 120 + in 121 + let warn = 122 + match find_warninglist xml with 123 + | Some (XML.Node (p, t, d)) -> warninglist_of_xml (p, t, d) 124 + | _ -> [] 125 + in 126 + (err, warn)

+38

stack/syndic/lib/syndic_w3c.mli

··· 1 + (** [Syndic.W3C]: invoke and parse the result of the W3C validator. *) 2 + 3 + module Error : module type of Syndic_error 4 + 5 + type error 6 + type warning 7 + 8 + (** Distinguishes an error from a warning. *) 9 + type 'a kind 10 + 11 + val error : error kind 12 + val warning : warning kind 13 + 14 + type 'a t = 15 + { kind: 'a kind (** Error or warning. *) 16 + ; line: int 17 + (** Within the source code of the validated document, refers to the 18 + line where the error was detected. *) 19 + ; column: int 20 + (** Within the source code of the validated document, refers to the 21 + line where the column was detected. *) 22 + ; text: string (** The actual error message. *) 23 + ; element: string 24 + (** Element in the feed where the message was triggered. *) 25 + ; parent: string (** In the feed, parent of the element. *) 26 + ; value: string 27 + (** If applicable the value of the element, attribute or content which 28 + triggered the message. *) } 29 + 30 + val url : [< `Data of string | `Uri of Uri.t] -> Uri.t 31 + (** Generate url for the W3C Feed Validator API returning a SOAP 12 output. 32 + Thus URL is supposed to be used with GET. *) 33 + 34 + val to_error : _ t -> Error.t 35 + 36 + val parse : Xmlm.input -> error t list * warning t list 37 + (** [parse i] takes [i] and returns a list of error, result of 38 + {{:http://validator.w3.org/feed/docs/soap} W3C Feed Validator}. *)

+52

stack/syndic/lib/syndic_xml.ml

··· 1 + type dtd = string option 2 + 3 + module Error = Syndic_error 4 + 5 + type pos = Xmlm.pos 6 + type tag = Xmlm.tag 7 + type t = Node of pos * tag * t list | Data of pos * string 8 + 9 + let resolve ~xmlbase uri = 10 + match xmlbase with None -> uri | Some b -> Uri.resolve "" b uri 11 + 12 + (* Specialized version of the Xmlm.make_input one. *) 13 + let input_of_channel fh = 14 + (* Xmlm.make_input does not raise any exception. *) 15 + Xmlm.make_input (`Channel fh) 16 + 17 + let of_xmlm input = 18 + let el tag datas = Node (Xmlm.pos input, tag, datas) in 19 + let data data = Data (Xmlm.pos input, data) in 20 + try Xmlm.input_doc_tree ~el ~data input with Xmlm.Error (pos, e) -> 21 + raise (Error.Error (pos, Xmlm.error_message e)) 22 + 23 + let get_position = function Node (pos, _, _) -> pos | Data (pos, _) -> pos 24 + 25 + let rec t_to_xmlm t output = 26 + match t with 27 + | Data (_pos, d) -> ( 28 + try Xmlm.output output (`Data d) with Xmlm.Error (pos, e) -> 29 + raise (Error.Error (pos, Xmlm.error_message e)) ) 30 + | Node (_pos, tag, t_sub) -> ( 31 + Xmlm.output output (`El_start tag) ; 32 + List.iter (fun t -> t_to_xmlm t output) t_sub ; 33 + try Xmlm.output output `El_end with Xmlm.Error (pos, e) -> 34 + raise (Error.Error (pos, Xmlm.error_message e)) ) 35 + 36 + (* Specialized version of the Xmlm one. *) 37 + let make_output ?ns_prefix dest = 38 + (* Xmlm.make_output does not raise any exception. *) 39 + Xmlm.make_output dest ~decl:true ?ns_prefix 40 + 41 + let to_xmlm ?dtd t output = 42 + ( try Xmlm.output output (`Dtd dtd) with Xmlm.Error (pos, e) -> 43 + raise (Error.Error (pos, Xmlm.error_message e)) ) ; 44 + t_to_xmlm t output 45 + 46 + let to_buffer ?ns_prefix t b = 47 + let output = Xmlm.make_output ~decl:false (`Buffer b) ?ns_prefix in 48 + to_xmlm t output 49 + 50 + let to_string ?ns_prefix t = 51 + let b = Buffer.create 4096 in 52 + to_buffer ?ns_prefix t b ; Buffer.contents b

+27

stack/syndic/lib/syndic_xml.mli

··· 1 + (** Common module for XML parsing. *) 2 + 3 + (** The type for the optional {{:http://www.w3.org/TR/REC-xml/#dt-doctype}DTD}. *) 4 + type dtd = string option 5 + 6 + type pos = Xmlm.pos 7 + type tag = Xmlm.tag 8 + 9 + (** A XML tree. *) 10 + type t = Node of pos * tag * t list | Data of pos * string 11 + 12 + val resolve : xmlbase:Uri.t option -> Uri.t -> Uri.t 13 + (** [resolve base uri] resolve the [uri] against the possible base. *) 14 + 15 + val get_position : t -> pos 16 + val input_of_channel : in_channel -> Xmlm.input 17 + 18 + val of_xmlm : Xmlm.input -> dtd * t 19 + (** [of_xmlm doc] converts an XML document [doc] into a DTD and a tree 20 + representing the document. *) 21 + 22 + val make_output : 23 + ?ns_prefix:(string -> string option) -> Xmlm.dest -> Xmlm.output 24 + 25 + val to_xmlm : ?dtd:string -> t -> Xmlm.output -> unit 26 + val to_string : ?ns_prefix:(string -> string option) -> t -> string 27 + val to_buffer : ?ns_prefix:(string -> string option) -> t -> Buffer.t -> unit

+35

stack/syndic/syndic.opam

··· 1 + version: "1.7.0" 2 + opam-version: "2.0" 3 + maintainer: "Romain Calascibetta <romain.calascibetta@gmail.com>" 4 + authors: [ "Romain Calascibetta" 5 + "Christophe Troestler" ] 6 + license: "MIT" 7 + homepage: "https://github.com/Cumulus/Syndic" 8 + dev-repo: "git+https://github.com/Cumulus/Syndic.git" 9 + bug-reports: "https://github.com/Cumulus/Syndic/issues" 10 + doc: "https://cumulus.github.io/Syndic/" 11 + synopsis: "RSS1, RSS2, Atom and OPML1 parsing" 12 + description: """ 13 + Pure OCaml Library for parsing and writing various types of 14 + feeds and subscriber lists.""" 15 + 16 + build: [ 17 + [ "dune" "subst" ] 18 + [ "dune" "build" "-p" name "-j" jobs ] 19 + [ "dune" "runtest" "-p" name ] {with-test & ocaml:version >= "4.04.1"} 20 + ] 21 + 22 + 23 + depends: [ 24 + "ocaml" {>= "4.03.0"} 25 + "dune" 26 + "ptime" 27 + "uri" {>= "1.9"} 28 + "xmlm" {>= "1.2.0"} 29 + "fmt" {with-test} 30 + "ocurl" {with-test} 31 + "fpath" {with-test} 32 + "ocplib-json-typed" {with-test} 33 + "base-unix" {with-test} 34 + "jsonm" {with-test} 35 + ]

Configure Feed

Configure Feed