OCaml client for the LinkedIn Voyager API
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

ocaml-linkedin: apply dune fmt

Pure formatting changes from `dune fmt`: doc comment placement moves
from above the binding to below it for `type`s, multi-line `match`
expressions collapse onto one line where they fit, and infix operator
applications pick up spaces (`Soup.($?)` -> `Soup.( $? )`). No
semantic changes.

+2559 -367
+48 -19
README.md
··· 13 13 14 14 ## Installation 15 15 16 + Install with opam: 17 + 18 + ```shell 19 + $ opam install linkedin 16 20 ``` 17 - opam install linkedin 21 + 22 + If opam cannot find the package, it may not yet be released in the public 23 + `opam-repository`. Add the overlay repository, then install it: 24 + 25 + ```shell 26 + $ opam repo add samoht https://tangled.org/gazagnaire.org/opam-overlay.git 27 + $ opam update 28 + $ opam install linkedin 18 29 ``` 19 30 20 31 ## Usage ··· 22 33 ### Library 23 34 24 35 ```ocaml 25 - Eio_main.run @@ fun env -> 26 - Eio.Switch.run @@ fun sw -> 27 - let api = Linkedin.Api.v ~sw env ~li_at:"..." ~jsessionid:"..." in 28 - match Linkedin.Api.me api with 29 - | Ok profile -> Fmt.pr "%a@." Linkedin.Profile.pp profile 30 - | Error e -> Fmt.epr "%a@." Linkedin.Api.pp_error e 36 + let show_me ~li_at ~jsessionid = 37 + Eio_main.run @@ fun env -> 38 + Eio.Switch.run @@ fun sw -> 39 + let api = Linkedin.Api.v ~sw env ~li_at ~jsessionid in 40 + match Linkedin.Api.me api with 41 + | Ok profile -> Fmt.pr "%a@." Linkedin.Profile.pp profile 42 + | Error e -> Fmt.epr "%a@." Linkedin.Api.pp_error e 31 43 ``` 32 44 33 45 ### Command-line 34 46 35 - ``` 36 - # Extract cookies from Chrome (macOS, prompts for Keychain access) 37 - linkedin cookies 47 + ```text 48 + # One-time: authenticate by reading your Chrome session 49 + linkedin login 50 + linkedin logout # remove saved cookies 38 51 39 52 # Show your own profile 40 53 linkedin me ··· 43 56 linkedin profile johndoe 44 57 linkedin profile https://www.linkedin.com/in/johndoe 45 58 46 - # Show feed posts 47 - linkedin posts johndoe 48 - linkedin posts -n 5 johndoe 59 + # Fetch any LinkedIn item (Pulse article or feed post) 60 + linkedin get https://www.linkedin.com/pulse/my-article-johndoe-abcde/ 61 + linkedin get https://www.linkedin.com/feed/update/urn:li:activity:7123456789 62 + 63 + # Choose the output format — default is Markdown with YAML frontmatter 64 + linkedin get <url> --format md # (default) 65 + linkedin get <url> --format html # raw article body HTML 66 + linkedin get <url> --format json # structured record via Item.json 67 + linkedin get <url> --json # alias for --format json 49 68 50 - # Show a single post by URN or URL 51 - linkedin post urn:li:activity:7123456789 52 - linkedin post https://www.linkedin.com/posts/johndoe_title-activity-123-abc 69 + # Walk a person's recent feed (articles + posts, reverse-chronological) 70 + linkedin feed johndoe 71 + linkedin feed -n 500 johndoe 72 + linkedin feed johndoe --json 53 73 ``` 54 74 55 75 ## API ··· 64 84 responses. 65 85 - **`Chrome_cookies`** -- Extracts and decrypts LinkedIn session cookies from 66 86 Chrome's SQLite cookie store on macOS using Keychain, PBKDF2, and AES-CBC. 67 - - **`Linkedin_url`** -- Parses LinkedIn profile and post URLs into structured 68 - types. Recognises `/in/{id}`, `/feed/update/urn:...`, and `/posts/{slug}` 69 - formats. 87 + - **`Linkedin_url`** -- Parses LinkedIn profile, post, and Pulse article URLs 88 + into structured types. Recognises `/in/{id}`, `/feed/update/urn:...`, 89 + `/posts/{slug}`, and `/pulse/{slug}` formats. 90 + - **`Pulse`** -- LinkedIn Pulse article type with title, author, subtitle, 91 + publication date, and body rendered as Markdown. `fetch` retrieves a 92 + public article by URL or slug. Uses `lambdasoup` to parse the article 93 + HTML. Low-level; most users want {!Item} instead. 94 + - **`Item`** -- Unified view of any LinkedIn item (Pulse article or short 95 + feed post). `get` fetches by URL and dispatches internally to Pulse or 96 + Post. `feed` walks a member's share feed and returns article+post 97 + summaries interleaved in reverse-chronological order. Includes JSON 98 + codecs for feeding a website or ingestion pipeline.
+263
bin/cli.ml
··· 1 + open Cmdliner 2 + open Tty 3 + 4 + type output_format = Markdown | Html | Json 5 + 6 + (** {1 Cookie persistence and environment wiring} *) 7 + 8 + let li_at_env = Cmd.Env.info "LINKEDIN_LI_AT" ~doc:"LinkedIn li_at cookie." 9 + 10 + let jsessionid_env = 11 + Cmd.Env.info "LINKEDIN_JSESSIONID" ~doc:"LinkedIn JSESSIONID cookie." 12 + 13 + let li_at = 14 + let doc = "LinkedIn li_at cookie (or set LINKEDIN_LI_AT env var)." in 15 + Arg.(value & opt (some string) None & info [ "li-at" ] ~doc ~env:li_at_env) 16 + 17 + let jsessionid = 18 + let doc = 19 + "LinkedIn JSESSIONID cookie (or set LINKEDIN_JSESSIONID env var)." 20 + in 21 + Arg.( 22 + value 23 + & opt (some string) None 24 + & info [ "jsessionid" ] ~doc ~env:jsessionid_env) 25 + 26 + let setup = Vlog.setup "linkedin" 27 + 28 + let format = 29 + let format_enum = 30 + Arg.enum [ ("md", Markdown); ("html", Html); ("json", Json) ] 31 + in 32 + let doc = 33 + "Output format: $(b,md) (default; YAML frontmatter + Markdown body), \ 34 + $(b,html) (raw article body HTML), or $(b,json) (structured record via \ 35 + Item.json codec)." 36 + in 37 + Arg.( 38 + value & opt format_enum Markdown 39 + & info [ "f"; "format" ] ~doc ~docv:"FORMAT") 40 + 41 + let effective_format ~json_alias fmt = if json_alias then Json else fmt 42 + 43 + let cookie_help = 44 + {|To get your LinkedIn session cookies, either: 45 + 46 + a) Let the CLI read them from Chrome automatically (macOS): 47 + 48 + linkedin cookies # reads from Chrome and saves locally 49 + 50 + b) Or export them manually (get from browser DevTools > Application > Cookies): 51 + 52 + export LINKEDIN_LI_AT="<li_at value>" 53 + export LINKEDIN_JSESSIONID="<JSESSIONID value>"|} 54 + 55 + let cookies_path xdg = Eio.Path.(Xdge.data_dir xdg / "cookies") 56 + 57 + let save_cookies xdg ~li_at ~jsessionid = 58 + let path = cookies_path xdg in 59 + let data = Fmt.str "%s\n%s\n" li_at jsessionid in 60 + Eio.Path.save ~create:(`Or_truncate 0o600) path data 61 + 62 + let load_cookies xdg = 63 + try 64 + let data = Eio.Path.load (cookies_path xdg) in 65 + match String.split_on_char '\n' data with 66 + | li_at :: jsessionid :: _ when li_at <> "" && jsessionid <> "" -> 67 + Some (li_at, jsessionid) 68 + | _ -> None 69 + with Eio.Io _ -> None 70 + 71 + (** Returns [(li_at, jsessionid, extra_cookies)]. Extra cookies include 72 + Cloudflare tokens when read from Chrome. *) 73 + let resolve_cookies env li_at jsessionid = 74 + match (li_at, jsessionid) with 75 + | Some a, Some j -> (a, j, []) 76 + | _ -> ( 77 + let fs = Eio.Stdenv.fs env in 78 + let xdg = Xdge.v fs "linkedin" in 79 + match (li_at, jsessionid, load_cookies xdg) with 80 + | None, None, Some (a, j) -> (a, j, []) 81 + | Some a, None, Some (_, j) -> (a, j, []) 82 + | None, Some j, Some (a, _) -> (a, j, []) 83 + | _ -> ( 84 + let proc_mgr = Eio.Stdenv.process_mgr env in 85 + match Linkedin.Chrome_cookies.all_linkedin_cookies proc_mgr fs with 86 + | Ok cookies -> ( 87 + let li_at_v = List.assoc_opt "li_at" cookies in 88 + let jsessionid_v = List.assoc_opt "JSESSIONID" cookies in 89 + match (li_at_v, jsessionid_v) with 90 + | Some a, Some j -> 91 + save_cookies xdg ~li_at:a ~jsessionid:j; 92 + let li_at = match li_at with Some v -> v | None -> a in 93 + let jsessionid = 94 + match jsessionid with Some v -> v | None -> j 95 + in 96 + let extra = 97 + List.filter 98 + (fun (n, _) -> n <> "li_at" && n <> "JSESSIONID") 99 + cookies 100 + in 101 + (li_at, jsessionid, extra) 102 + | _ -> 103 + Fmt.epr 104 + "Error: Chrome cookies missing li_at or JSESSIONID.@.@.%s@." 105 + cookie_help; 106 + exit 1) 107 + | Error e -> 108 + Fmt.epr 109 + "Error: Cookies not provided and could not read from Chrome: \ 110 + %a@.@.%s@." 111 + Linkedin.Chrome_cookies.pp_error e cookie_help; 112 + exit 1)) 113 + 114 + let run f = 115 + Eio_main.run @@ fun env -> 116 + Eio.Switch.run @@ fun sw -> f ~sw env 117 + 118 + let api ~sw env li_at jsessionid = 119 + let li_at, jsessionid, extra_cookies = resolve_cookies env li_at jsessionid in 120 + let api = Linkedin.Api.v ~sw env ~li_at ~jsessionid in 121 + Linkedin.Api.add_cookies api extra_cookies; 122 + api 123 + 124 + let save_cookies_from_chrome env = 125 + let proc_mgr = Eio.Stdenv.process_mgr env in 126 + let fs = Eio.Stdenv.fs env in 127 + let xdg = Xdge.v fs "linkedin" in 128 + match Linkedin.Chrome_cookies.linkedin_cookies proc_mgr fs with 129 + | Ok (li_at, jsessionid) -> 130 + save_cookies xdg ~li_at ~jsessionid; 131 + Eio.Path.native_exn (cookies_path xdg) 132 + | Error e -> 133 + Fmt.epr "Error: %a@." Linkedin.Chrome_cookies.pp_error e; 134 + exit 1 135 + 136 + (** {1 Display helpers} *) 137 + 138 + let dim = Style.fg Color.bright_black 139 + let bold_cyan = Style.(bold + fg Color.cyan) 140 + let styled s txt = Span.to_string (Span.styled s txt) 141 + let print_error e = Fmt.epr "Error: %a@." Linkedin.Api.pp_error e 142 + 143 + let print_profile (p : Linkedin.Profile.t) = 144 + let width = Width.terminal_width () in 145 + ignore width; 146 + let name = Linkedin.Profile.display_name p in 147 + print_string (styled bold_cyan name); 148 + print_char '\n'; 149 + let headline = Linkedin.Profile.headline p in 150 + if headline <> "" then begin 151 + print_string (styled dim " "); 152 + print_string headline; 153 + print_char '\n' 154 + end; 155 + let location = Linkedin.Profile.location p in 156 + if location <> "" then begin 157 + print_string (styled dim " "); 158 + print_string (styled dim location); 159 + print_char '\n' 160 + end; 161 + let summary = Linkedin.Profile.summary p in 162 + if summary <> "" then begin 163 + print_char '\n'; 164 + print_string summary; 165 + print_char '\n' 166 + end; 167 + let pub_id = Linkedin.Profile.public_id p in 168 + if pub_id <> "" then begin 169 + print_string (styled dim (Fmt.str " public_id: %s" pub_id)); 170 + print_char '\n' 171 + end; 172 + let urn = Linkedin.Profile.entity_urn p in 173 + if urn <> "" then begin 174 + print_string (styled dim (Fmt.str " urn: %s" urn)); 175 + print_char '\n' 176 + end 177 + 178 + (** {2 Item output} *) 179 + 180 + let yaml_escape s = 181 + let buf = Buffer.create (String.length s + 2) in 182 + String.iter 183 + (fun c -> 184 + match c with 185 + | '\\' | '"' -> 186 + Buffer.add_char buf '\\'; 187 + Buffer.add_char buf c 188 + | '\n' -> Buffer.add_string buf "\\n" 189 + | c -> Buffer.add_char buf c) 190 + s; 191 + Buffer.contents buf 192 + 193 + let yaml_string s = Fmt.str "\"%s\"" (yaml_escape s) 194 + 195 + let frontmatter_of_item (i : Linkedin.Item.t) = 196 + let kind_str = 197 + match Linkedin.Item.kind i with `Article -> "article" | `Post -> "post" 198 + in 199 + let (y, m, d), _ = Ptime.to_date_time (Linkedin.Item.published i) in 200 + let buf = Buffer.create 256 in 201 + let add_line k v = 202 + Buffer.add_string buf k; 203 + Buffer.add_string buf ": "; 204 + Buffer.add_string buf v; 205 + Buffer.add_char buf '\n' 206 + in 207 + Buffer.add_string buf "---\n"; 208 + (match Linkedin.Item.title i with 209 + | Some t when t <> "" -> add_line "title" (yaml_string t) 210 + | _ -> ()); 211 + (match Linkedin.Item.subtitle i with 212 + | Some s when s <> "" -> add_line "subtitle" (yaml_string s) 213 + | _ -> ()); 214 + add_line "author" (yaml_string (Linkedin.Item.author i)); 215 + (match Linkedin.Item.author_id i with 216 + | Some id -> add_line "author_id" (yaml_string id) 217 + | None -> ()); 218 + add_line "date" (Fmt.str "%04d-%02d-%02d" y m d); 219 + add_line "url" (yaml_string (Linkedin.Item.url i)); 220 + add_line "kind" kind_str; 221 + let likes = Linkedin.Item.num_likes i in 222 + let comments = Linkedin.Item.num_comments i in 223 + if likes > 0 then add_line "likes" (string_of_int likes); 224 + if comments > 0 then add_line "comments" (string_of_int comments); 225 + Buffer.add_string buf "---\n\n"; 226 + Buffer.contents buf 227 + 228 + let print_item_md (i : Linkedin.Item.t) = 229 + print_string (frontmatter_of_item i); 230 + let md = Linkedin.Item.markdown i in 231 + print_string md; 232 + let n = String.length md in 233 + if n = 0 || md.[n - 1] <> '\n' then print_char '\n' 234 + 235 + let print_item_html (i : Linkedin.Item.t) = 236 + let html = Linkedin.Item.html i in 237 + if html = "" then print_string (Linkedin.Item.markdown i) 238 + else print_string html; 239 + print_char '\n' 240 + 241 + let print_item_json i = 242 + print_string (Json.to_string Linkedin.Item.json i); 243 + print_char '\n' 244 + 245 + let print_item fmt i = 246 + match fmt with 247 + | Markdown -> print_item_md i 248 + | Html -> print_item_html i 249 + | Json -> print_item_json i 250 + 251 + let print_item_summary (s : Linkedin.Item.summary) = 252 + let title = Option.value ~default:"" s.title in 253 + if title <> "" then begin 254 + print_string (styled bold_cyan title); 255 + print_string " — " 256 + end; 257 + print_string s.url; 258 + print_char '\n' 259 + 260 + let print_summaries_json summaries = 261 + let codec = Json.Codec.list Linkedin.Item.summary_json in 262 + print_string (Json.to_string codec summaries); 263 + print_char '\n'
+64
bin/cli.mli
··· 1 + (** Shared helpers for the [linkedin] CLI subcommands. *) 2 + 3 + open Cmdliner 4 + 5 + type output_format = Markdown | Html | Json (** Output format for [get]. *) 6 + 7 + (** {1 Cmdliner terms shared across subcommands} *) 8 + 9 + val setup : unit Term.t 10 + (** [setup] is the vlog setup term: initialises logging for the CLI and also 11 + parses the [--json] flag used as a format alias. *) 12 + 13 + val li_at : string option Term.t 14 + (** [li_at] parses the [--li-at] flag or [LINKEDIN_LI_AT] env var. *) 15 + 16 + val jsessionid : string option Term.t 17 + (** [jsessionid] parses the [--jsessionid] flag or [LINKEDIN_JSESSIONID] env 18 + var. *) 19 + 20 + val format : output_format Term.t 21 + (** [format] parses the [--format md|html|json] flag, default [md]. *) 22 + 23 + val effective_format : json_alias:bool -> output_format -> output_format 24 + (** [effective_format ~json_alias fmt] returns [Json] when vlog's [--json] flag 25 + is set, else [fmt]. *) 26 + 27 + (** {1 Runtime helpers} *) 28 + 29 + val run : (sw:Eio.Switch.t -> Eio_unix.Stdenv.base -> unit) -> unit 30 + (** [run f] boots [Eio_main], opens a top-level switch, and invokes [f]. *) 31 + 32 + val api : 33 + sw:Eio.Switch.t -> 34 + Eio_unix.Stdenv.base -> 35 + string option -> 36 + string option -> 37 + Linkedin.Api.t 38 + (** [api ~sw env li_at jsessionid] resolves cookies (from flags, XDG, or Chrome) 39 + and builds an authenticated API client. Exits with an error message if 40 + cookies cannot be obtained. *) 41 + 42 + val save_cookies_from_chrome : Eio_unix.Stdenv.base -> string 43 + (** [save_cookies_from_chrome env] extracts the session cookies from Chrome, 44 + saves them to XDG, and returns the save path. Exits with a non-zero status 45 + on failure. *) 46 + 47 + (** {1 Display helpers} *) 48 + 49 + val print_error : Linkedin.Api.error -> unit 50 + (** [print_error e] writes a formatted error to stderr. *) 51 + 52 + val print_profile : Linkedin.Profile.t -> unit 53 + (** [print_profile p] renders a profile for terminal display. *) 54 + 55 + val print_item : output_format -> Linkedin.Item.t -> unit 56 + (** [print_item fmt i] writes the item to stdout in the chosen format: Markdown 57 + with YAML frontmatter, raw body HTML, or the JSON codec. *) 58 + 59 + val print_item_summary : Linkedin.Item.summary -> unit 60 + (** [print_item_summary s] renders a feed summary as one line. *) 61 + 62 + val print_summaries_json : Linkedin.Item.summary list -> unit 63 + (** [print_summaries_json ss] writes the summaries as a single JSON array using 64 + {!Linkedin.Item.summary_json}. *)
+46
bin/cmd_feed.ml
··· 1 + open Cmdliner 2 + open Tty 3 + 4 + let id_or_url = 5 + let doc = "Profile ID or profile URL." in 6 + Arg.(required & pos 0 (some string) None & info [] ~doc ~docv:"ID_OR_URL") 7 + 8 + let count = 9 + let doc = "Number of feed items to walk (articles + posts combined)." in 10 + Arg.(value & opt int 100 & info [ "n"; "count" ] ~doc) 11 + 12 + let run () li_at jsessionid id_or_url count json_alias () = 13 + match Linkedin.Linkedin_url.profile_of_string id_or_url with 14 + | Error msg -> 15 + Fmt.epr "Error: %s@." msg; 16 + exit 1 17 + | Ok profile_id -> ( 18 + Cli.run @@ fun ~sw env -> 19 + let api = Cli.api ~sw env li_at jsessionid in 20 + match Linkedin.Item.feed ~count ~profile_id api with 21 + | Ok [] -> 22 + if json_alias then Cli.print_summaries_json [] 23 + else 24 + print_string 25 + (Span.to_string 26 + (Span.styled (Style.fg Color.bright_black) "No items found.\n")) 27 + | Ok summaries -> 28 + if json_alias then Cli.print_summaries_json summaries 29 + else List.iter Cli.print_item_summary summaries 30 + | Error e -> Cli.print_error e) 31 + 32 + let cmd = 33 + let doc = "Walk a person's recent feed (articles + posts)." in 34 + let man = 35 + [ 36 + `S Manpage.s_examples; 37 + `Pre " linkedin feed johndoe"; 38 + `Pre " linkedin feed -n 500 johndoe"; 39 + `Pre " linkedin feed johndoe --json"; 40 + ] 41 + in 42 + let info = Cmd.info "feed" ~doc ~man in 43 + Cmd.v info 44 + Term.( 45 + const run $ Cli.setup $ Cli.li_at $ Cli.jsessionid $ id_or_url $ count 46 + $ Vlog.json $ const ())
+5
bin/cmd_feed.mli
··· 1 + (** The [linkedin feed &lt;person&gt;] subcommand: walks a member's recent share 2 + feed, returning article and post summaries interleaved. *) 3 + 4 + val cmd : unit Cmdliner.Cmd.t 5 + (** [cmd] is the Cmdliner term for the [feed] subcommand. *)
+36
bin/cmd_get.ml
··· 1 + open Cmdliner 2 + 3 + let url = 4 + let doc = 5 + "LinkedIn item URL (Pulse article or feed post) or bare URN/slug." 6 + in 7 + Arg.(required & pos 0 (some string) None & info [] ~doc ~docv:"URL") 8 + 9 + let run () li_at jsessionid url fmt json_alias () = 10 + let fmt = Cli.effective_format ~json_alias fmt in 11 + Cli.run @@ fun ~sw env -> 12 + let api = Cli.api ~sw env li_at jsessionid in 13 + match Linkedin.Item.get api url with 14 + | Ok item -> Cli.print_item fmt item 15 + | Error e -> Cli.print_error e 16 + 17 + let cmd = 18 + let doc = 19 + "Fetch a LinkedIn item (article or post) as Markdown, HTML, or JSON." 20 + in 21 + let man = 22 + [ 23 + `S Manpage.s_examples; 24 + `Pre 25 + " linkedin get \ 26 + https://www.linkedin.com/pulse/my-article-johndoe-abcde/"; 27 + `Pre " linkedin get <url> --format json"; 28 + `Pre " linkedin get <url> --format html > article.html"; 29 + `Pre " linkedin get <url> --json # alias for --format json"; 30 + ] 31 + in 32 + let info = Cmd.info "get" ~doc ~man in 33 + Cmd.v info 34 + Term.( 35 + const run $ Cli.setup $ Cli.li_at $ Cli.jsessionid $ url $ Cli.format 36 + $ Vlog.json $ const ())
+5
bin/cmd_get.mli
··· 1 + (** The [linkedin get &lt;url&gt;] subcommand: fetches a LinkedIn item (Pulse 2 + article or short feed post) as Markdown, raw HTML, or JSON. *) 3 + 4 + val cmd : unit Cmdliner.Cmd.t 5 + (** [cmd] is the Cmdliner term for the [get] subcommand. *)
+43
bin/cmd_login.ml
··· 1 + open Cmdliner 2 + open Tty 3 + 4 + let dim = Style.fg Color.bright_black 5 + let bold_green = Style.(bold + fg Color.green) 6 + let styled s txt = Span.to_string (Span.styled s txt) 7 + 8 + let run () () = 9 + Cli.run @@ fun ~sw env -> 10 + let path = Cli.save_cookies_from_chrome env in 11 + (* Verify the session works by calling /me *) 12 + let api = Cli.api ~sw env None None in 13 + match Linkedin.Api.me api with 14 + | Ok p -> 15 + let name = Linkedin.Profile.display_name p in 16 + Fmt.pr "%s Logged in as %s@." (styled bold_green "✓") 17 + (styled bold_green name); 18 + Fmt.pr " %s@." (styled dim (Fmt.str "cookies saved to %s" path)) 19 + | Error e -> 20 + Fmt.epr 21 + "Warning: cookies saved but session check failed: %a@.Run `linkedin \ 22 + login` again, or make sure you're signed in to LinkedIn in Chrome.@." 23 + Linkedin.Api.pp_error e; 24 + exit 1 25 + 26 + let cmd = 27 + let doc = 28 + "Authenticate the CLI by reading your LinkedIn session from Chrome." 29 + in 30 + let man = 31 + [ 32 + `S Manpage.s_description; 33 + `P 34 + "Extracts the $(b,li_at) and $(b,JSESSIONID) cookies from Chrome (on \ 35 + macOS) using the Keychain to decrypt them, saves them to the XDG data \ 36 + directory, and confirms the session by fetching your profile. Re-run \ 37 + whenever your LinkedIn session expires."; 38 + `S Manpage.s_examples; 39 + `Pre " linkedin login"; 40 + ] 41 + in 42 + let info = Cmd.info "login" ~doc ~man in 43 + Cmd.v info Term.(const run $ Cli.setup $ const ())
+6
bin/cmd_login.mli
··· 1 + (** The [linkedin login] subcommand: extracts the logged-in LinkedIn session 2 + cookies from Chrome, saves them to the XDG data dir, and confirms the 3 + session by calling {!Linkedin.Api.me}. *) 4 + 5 + val cmd : unit Cmdliner.Cmd.t 6 + (** [cmd] is the Cmdliner term for the [login] subcommand. *)
+21
bin/cmd_logout.ml
··· 1 + open Cmdliner 2 + open Tty 3 + 4 + let dim = Style.fg Color.bright_black 5 + let styled s txt = Span.to_string (Span.styled s txt) 6 + 7 + let run () () = 8 + Cli.run @@ fun ~sw:_ env -> 9 + let fs = Eio.Stdenv.fs env in 10 + let xdg = Xdge.v fs "linkedin" in 11 + let path = Eio.Path.(Xdge.data_dir xdg / "cookies") in 12 + match Eio.Path.unlink path with 13 + | () -> 14 + Fmt.pr "Logged out. %s@." 15 + (styled dim (Fmt.str "removed %s" (Eio.Path.native_exn path))) 16 + | exception Eio.Io _ -> Fmt.pr "%s@." (styled dim "Not logged in.") 17 + 18 + let cmd = 19 + let doc = "Remove the saved LinkedIn session cookies." in 20 + let info = Cmd.info "logout" ~doc in 21 + Cmd.v info Term.(const run $ Cli.setup $ const ())
+5
bin/cmd_logout.mli
··· 1 + (** The [linkedin logout] subcommand: removes the saved session cookies from the 2 + XDG data dir. *) 3 + 4 + val cmd : unit Cmdliner.Cmd.t 5 + (** [cmd] is the Cmdliner term for the [logout] subcommand. *)
+14
bin/cmd_me.ml
··· 1 + open Cmdliner 2 + 3 + let run () li_at jsessionid () = 4 + Cli.run @@ fun ~sw env -> 5 + let api = Cli.api ~sw env li_at jsessionid in 6 + match Linkedin.Api.me api with 7 + | Ok p -> Cli.print_profile p 8 + | Error e -> Cli.print_error e 9 + 10 + let cmd = 11 + let doc = "Show your own profile." in 12 + let info = Cmd.info "me" ~doc in 13 + Cmd.v info 14 + Term.(const run $ Cli.setup $ Cli.li_at $ Cli.jsessionid $ const ())
+4
bin/cmd_me.mli
··· 1 + (** The [linkedin me] subcommand: shows the authenticated user's profile. *) 2 + 3 + val cmd : unit Cmdliner.Cmd.t 4 + (** [cmd] is the Cmdliner term for the [me] subcommand. *)
+34
bin/cmd_profile.ml
··· 1 + open Cmdliner 2 + 3 + let id_or_url = 4 + let doc = 5 + "Public ID or profile URL (e.g. $(b,johndoe) or \ 6 + $(b,https://linkedin.com/in/johndoe))." 7 + in 8 + Arg.(required & pos 0 (some string) None & info [] ~doc ~docv:"ID_OR_URL") 9 + 10 + let run () li_at jsessionid id_or_url () = 11 + match Linkedin.Linkedin_url.profile_of_string id_or_url with 12 + | Error msg -> 13 + Fmt.epr "Error: %s@." msg; 14 + exit 1 15 + | Ok public_id -> ( 16 + Cli.run @@ fun ~sw env -> 17 + let api = Cli.api ~sw env li_at jsessionid in 18 + match Linkedin.Api.profile ~public_id api with 19 + | Ok p -> Cli.print_profile p 20 + | Error e -> Cli.print_error e) 21 + 22 + let cmd = 23 + let doc = "Show a user's profile." in 24 + let man = 25 + [ 26 + `S Manpage.s_examples; 27 + `Pre " linkedin profile johndoe"; 28 + `Pre " linkedin profile https://www.linkedin.com/in/johndoe"; 29 + ] 30 + in 31 + let info = Cmd.info "profile" ~doc ~man in 32 + Cmd.v info 33 + Term.( 34 + const run $ Cli.setup $ Cli.li_at $ Cli.jsessionid $ id_or_url $ const ())
+4
bin/cmd_profile.mli
··· 1 + (** The [linkedin profile &lt;id&gt;] subcommand: shows a user's profile. *) 2 + 3 + val cmd : unit Cmdliner.Cmd.t 4 + (** [cmd] is the Cmdliner term for the [profile] subcommand. *)
+9 -325
bin/main.ml
··· 1 1 open Cmdliner 2 - open Tty 3 - 4 - let setup = Vlog.setup "linkedin" 5 - let li_at_env = Cmd.Env.info "LINKEDIN_LI_AT" ~doc:"LinkedIn li_at cookie." 6 - 7 - let jsessionid_env = 8 - Cmd.Env.info "LINKEDIN_JSESSIONID" ~doc:"LinkedIn JSESSIONID cookie." 9 - 10 - let li_at_t = 11 - let doc = "LinkedIn li_at cookie (or set LINKEDIN_LI_AT env var)." in 12 - Arg.(value & opt (some string) None & info [ "li-at" ] ~doc ~env:li_at_env) 13 - 14 - let jsessionid_t = 15 - let doc = 16 - "LinkedIn JSESSIONID cookie (or set LINKEDIN_JSESSIONID env var)." 17 - in 18 - Arg.( 19 - value 20 - & opt (some string) None 21 - & info [ "jsessionid" ] ~doc ~env:jsessionid_env) 22 - 23 - let cookie_help = 24 - {|To get your LinkedIn session cookies, either: 25 - 26 - a) Let the CLI read them from Chrome automatically (macOS): 27 - 28 - linkedin cookies # reads from Chrome and saves locally 29 - 30 - b) Or export them manually (get from browser DevTools > Application > Cookies): 31 - 32 - export LINKEDIN_LI_AT="<li_at value>" 33 - export LINKEDIN_JSESSIONID="<JSESSIONID value>"|} 34 - 35 - (** {1 XDG cookie persistence} *) 36 - 37 - let cookies_path xdg = Eio.Path.(Xdge.data_dir xdg / "cookies") 38 - 39 - let save_cookies xdg ~li_at ~jsessionid = 40 - let path = cookies_path xdg in 41 - let data = Fmt.str "%s\n%s\n" li_at jsessionid in 42 - Eio.Path.save ~create:(`Or_truncate 0o600) path data 43 - 44 - let load_cookies xdg = 45 - try 46 - let data = Eio.Path.load (cookies_path xdg) in 47 - match String.split_on_char '\n' data with 48 - | li_at :: jsessionid :: _ when li_at <> "" && jsessionid <> "" -> 49 - Some (li_at, jsessionid) 50 - | _ -> None 51 - with Eio.Io _ -> None 52 - 53 - (** Returns [(li_at, jsessionid, extra_cookies)]. Extra cookies include 54 - Cloudflare tokens when read from Chrome. *) 55 - let resolve_cookies env li_at jsessionid = 56 - match (li_at, jsessionid) with 57 - | Some a, Some j -> (a, j, []) 58 - | _ -> ( 59 - let fs = Eio.Stdenv.fs env in 60 - let xdg = Xdge.v fs "linkedin" in 61 - (* Try saved cookies from XDG *) 62 - match (li_at, jsessionid, load_cookies xdg) with 63 - | None, None, Some (a, j) -> (a, j, []) 64 - | Some a, None, Some (_, j) -> (a, j, []) 65 - | None, Some j, Some (a, _) -> (a, j, []) 66 - | _ -> ( 67 - (* Fall back to Chrome — extract ALL cookies *) 68 - let proc_mgr = Eio.Stdenv.process_mgr env in 69 - match Linkedin.Chrome_cookies.all_linkedin_cookies proc_mgr fs with 70 - | Ok cookies -> ( 71 - let li_at_v = List.assoc_opt "li_at" cookies in 72 - let jsessionid_v = List.assoc_opt "JSESSIONID" cookies in 73 - match (li_at_v, jsessionid_v) with 74 - | Some a, Some j -> 75 - save_cookies xdg ~li_at:a ~jsessionid:j; 76 - let li_at = match li_at with Some v -> v | None -> a in 77 - let jsessionid = 78 - match jsessionid with Some v -> v | None -> j 79 - in 80 - let extra = 81 - List.filter 82 - (fun (n, _) -> n <> "li_at" && n <> "JSESSIONID") 83 - cookies 84 - in 85 - (li_at, jsessionid, extra) 86 - | _ -> 87 - Fmt.epr 88 - "Error: Chrome cookies missing li_at or JSESSIONID.@.@.%s@." 89 - cookie_help; 90 - exit 1) 91 - | Error e -> 92 - Fmt.epr 93 - "Error: Cookies not provided and could not read from Chrome: \ 94 - %a@.@.%s@." 95 - Linkedin.Chrome_cookies.pp_error e cookie_help; 96 - exit 1)) 97 - 98 - let run f = 99 - Eio_main.run @@ fun env -> 100 - Eio.Switch.run @@ fun sw -> f ~sw env 101 - 102 - (** {1 Styles} *) 103 - 104 - let dim = Style.fg Color.bright_black 105 - let bold_cyan = Style.(bold + fg Color.cyan) 106 - let styled s txt = Span.to_string (Span.styled s txt) 107 - 108 - (** {1 Display helpers} *) 109 - 110 - let print_profile (p : Linkedin.Profile.t) = 111 - let width = Width.terminal_width () in 112 - ignore width; 113 - let name = Linkedin.Profile.display_name p in 114 - print_string (styled bold_cyan name); 115 - print_char '\n'; 116 - let headline = Linkedin.Profile.headline p in 117 - if headline <> "" then begin 118 - print_string (styled dim " "); 119 - print_string headline; 120 - print_char '\n' 121 - end; 122 - let location = Linkedin.Profile.location p in 123 - if location <> "" then begin 124 - print_string (styled dim " "); 125 - print_string (styled dim location); 126 - print_char '\n' 127 - end; 128 - let summary = Linkedin.Profile.summary p in 129 - if summary <> "" then begin 130 - print_char '\n'; 131 - print_string summary; 132 - print_char '\n' 133 - end; 134 - let pub_id = Linkedin.Profile.public_id p in 135 - if pub_id <> "" then begin 136 - print_string (styled dim (Fmt.str " public_id: %s" pub_id)); 137 - print_char '\n' 138 - end; 139 - let urn = Linkedin.Profile.entity_urn p in 140 - if urn <> "" then begin 141 - print_string (styled dim (Fmt.str " urn: %s" urn)); 142 - print_char '\n' 143 - end 144 - 145 - let print_post (p : Linkedin.Post.t) = 146 - let author = Linkedin.Post.author_name p in 147 - let text = Linkedin.Post.text p in 148 - let likes = Linkedin.Post.num_likes p in 149 - let comments = Linkedin.Post.num_comments p in 150 - if author <> "" then begin 151 - print_string (styled bold_cyan author); 152 - print_char '\n' 153 - end; 154 - if text <> "" then begin 155 - print_string " "; 156 - print_string text; 157 - print_char '\n' 158 - end; 159 - if likes > 0 || comments > 0 then begin 160 - print_string (styled dim (Fmt.str " %d likes, %d comments" likes comments)); 161 - print_char '\n' 162 - end; 163 - let urn = Linkedin.Post.urn p in 164 - if urn <> "" then begin 165 - print_string (styled dim (Fmt.str " urn: %s" urn)); 166 - print_char '\n' 167 - end 168 - 169 - let print_error e = Fmt.epr "Error: %a@." Linkedin.Api.pp_error e 170 - 171 - let api ~sw env li_at jsessionid = 172 - let li_at, jsessionid, extra_cookies = resolve_cookies env li_at jsessionid in 173 - let api = Linkedin.Api.v ~sw env ~li_at ~jsessionid in 174 - Linkedin.Api.add_cookies api extra_cookies; 175 - api 176 - 177 - (** {1 Subcommands} *) 178 - 179 - let me_cmd = 180 - let run' () li_at jsessionid () = 181 - run @@ fun ~sw env -> 182 - let api = api ~sw env li_at jsessionid in 183 - match Linkedin.Api.me api with 184 - | Ok p -> print_profile p 185 - | Error e -> print_error e 186 - in 187 - let doc = "Show your own profile." in 188 - let info = Cmd.info "me" ~doc in 189 - Cmd.v info Term.(const run' $ setup $ li_at_t $ jsessionid_t $ const ()) 190 - 191 - let profile_cmd = 192 - let id_or_url_t = 193 - let doc = 194 - "Public ID or profile URL (e.g. $(b,johndoe) or \ 195 - $(b,https://linkedin.com/in/johndoe))." 196 - in 197 - Arg.(required & pos 0 (some string) None & info [] ~doc ~docv:"ID_OR_URL") 198 - in 199 - let run' () li_at jsessionid id_or_url () = 200 - match Linkedin.Linkedin_url.profile_of_string id_or_url with 201 - | Error msg -> 202 - Fmt.epr "Error: %s@." msg; 203 - exit 1 204 - | Ok public_id -> ( 205 - run @@ fun ~sw env -> 206 - let api = api ~sw env li_at jsessionid in 207 - match Linkedin.Api.profile ~public_id api with 208 - | Ok p -> print_profile p 209 - | Error e -> print_error e) 210 - in 211 - let doc = "Show a user's profile." in 212 - let man = 213 - [ 214 - `S Manpage.s_examples; 215 - `Pre " linkedin profile johndoe"; 216 - `Pre " linkedin profile https://www.linkedin.com/in/johndoe"; 217 - ] 218 - in 219 - let info = Cmd.info "profile" ~doc ~man in 220 - Cmd.v info 221 - Term.(const run' $ setup $ li_at_t $ jsessionid_t $ id_or_url_t $ const ()) 222 - 223 - let posts_cmd = 224 - let id_or_url_t = 225 - let doc = 226 - "Profile ID or profile URL (e.g. $(b,johndoe) or \ 227 - $(b,https://linkedin.com/in/johndoe))." 228 - in 229 - Arg.(required & pos 0 (some string) None & info [] ~doc ~docv:"ID_OR_URL") 230 - in 231 - let count_t = 232 - let doc = "Number of posts to fetch." in 233 - Arg.(value & opt int 10 & info [ "n"; "count" ] ~doc) 234 - in 235 - let run' () li_at jsessionid id_or_url count () = 236 - match Linkedin.Linkedin_url.profile_of_string id_or_url with 237 - | Error msg -> 238 - Fmt.epr "Error: %s@." msg; 239 - exit 1 240 - | Ok profile_id -> ( 241 - run @@ fun ~sw env -> 242 - let api = api ~sw env li_at jsessionid in 243 - match Linkedin.Api.posts ~count ~profile_id api with 244 - | Ok posts -> 245 - let n = List.length posts in 246 - List.iteri 247 - (fun i p -> 248 - print_post p; 249 - if i < n - 1 then print_char '\n') 250 - posts; 251 - if n = 0 then print_string (styled dim "No posts found.\n") 252 - | Error e -> print_error e) 253 - in 254 - let doc = "Show feed posts for a profile." in 255 - let man = 256 - [ 257 - `S Manpage.s_examples; 258 - `Pre " linkedin posts johndoe"; 259 - `Pre " linkedin posts https://www.linkedin.com/in/johndoe"; 260 - `Pre " linkedin posts -n 5 johndoe"; 261 - ] 262 - in 263 - let info = Cmd.info "posts" ~doc ~man in 264 - Cmd.v info 265 - Term.( 266 - const run' $ setup $ li_at_t $ jsessionid_t $ id_or_url_t $ count_t 267 - $ const ()) 268 - 269 - let cookies_cmd = 270 - let run' () () = 271 - run @@ fun ~sw:_ env -> 272 - let proc_mgr = Eio.Stdenv.process_mgr env in 273 - let fs = Eio.Stdenv.fs env in 274 - let xdg = Xdge.v fs "linkedin" in 275 - match Linkedin.Chrome_cookies.linkedin_cookies proc_mgr fs with 276 - | Ok (li_at, jsessionid) -> 277 - save_cookies xdg ~li_at ~jsessionid; 278 - let path = Eio.Path.native_exn (cookies_path xdg) in 279 - Fmt.pr "Saved LinkedIn cookies to %s@." (styled dim path) 280 - | Error e -> 281 - Fmt.epr "Error: %a@." Linkedin.Chrome_cookies.pp_error e; 282 - exit 1 283 - in 284 - let doc = "Extract LinkedIn cookies from Chrome and save locally (macOS)." in 285 - let info = Cmd.info "cookies" ~doc in 286 - Cmd.v info Term.(const run' $ setup $ const ()) 287 - 288 - let post_cmd = 289 - let urn_or_url_t = 290 - let doc = 291 - "Activity URN or post URL (e.g. $(b,urn:li:activity:123) or \ 292 - $(b,https://linkedin.com/posts/...))." 293 - in 294 - Arg.(required & pos 0 (some string) None & info [] ~doc ~docv:"URN_OR_URL") 295 - in 296 - let run' () li_at jsessionid urn_or_url () = 297 - match Linkedin.Linkedin_url.post_of_string urn_or_url with 298 - | Error msg -> 299 - Fmt.epr "Error: %s@." msg; 300 - exit 1 301 - | Ok urn -> ( 302 - run @@ fun ~sw env -> 303 - let api = api ~sw env li_at jsessionid in 304 - match Linkedin.Api.post ~urn api with 305 - | Ok p -> print_post p 306 - | Error e -> print_error e) 307 - in 308 - let doc = "Show a single post." in 309 - let man = 310 - [ 311 - `S Manpage.s_examples; 312 - `Pre " linkedin post urn:li:activity:7123456789"; 313 - `Pre 314 - " linkedin post \ 315 - https://www.linkedin.com/feed/update/urn:li:activity:7123456789"; 316 - `Pre 317 - " linkedin post \ 318 - https://www.linkedin.com/posts/johndoe_title-activity-123-abc"; 319 - ] 320 - in 321 - let info = Cmd.info "post" ~doc ~man in 322 - Cmd.v info 323 - Term.(const run' $ setup $ li_at_t $ jsessionid_t $ urn_or_url_t $ const ()) 324 - 325 - (** {1 Main} *) 326 2 327 3 let cmd = 328 4 let doc = "LinkedIn API command-line client." in 329 5 let info = Cmd.info "linkedin" ~version:Monopam_info.version ~doc in 330 - Cmd.group info [ me_cmd; profile_cmd; posts_cmd; post_cmd; cookies_cmd ] 6 + Cmd.group info 7 + [ 8 + Cmd_me.cmd; 9 + Cmd_profile.cmd; 10 + Cmd_feed.cmd; 11 + Cmd_get.cmd; 12 + Cmd_login.cmd; 13 + Cmd_logout.cmd; 14 + ] 331 15 332 16 let () = exit (Cmd.eval cmd)
+4
dune
··· 1 1 (env 2 2 (dev 3 3 (flags :standard %{dune-warnings}))) 4 + 5 + (mdx 6 + (files README.md) 7 + (libraries linkedin eio_main eio eio.core eio.unix fmt))
+4
dune-project
··· 1 1 (lang dune 3.21) 2 + (using mdx 0.4) 2 3 3 4 (name linkedin) 4 5 ··· 29 30 (kdf (>= 0.1)) 30 31 (uri (>= 4.0)) 31 32 (cmdliner (>= 1.2)) 33 + (lambdasoup (>= 1.0)) 34 + (re (>= 1.11)) 35 + (mdx :with-test) 32 36 (alcotest :with-test)))
+43 -18
lib/api.ml
··· 12 12 type error = 13 13 [ `Http_error of int * string 14 14 | `Json_parse of string 15 + | `Parse_error of string 15 16 | `Network_error of string ] 16 17 17 18 let pp_error ppf = function ··· 20 21 (if String.length body > 200 then String.sub body 0 200 ^ "..." 21 22 else body) 22 23 | `Json_parse msg -> Fmt.pf ppf "JSON parse error: %s" msg 24 + | `Parse_error msg -> Fmt.pf ppf "Parse error: %s" msg 23 25 | `Network_error msg -> Fmt.pf ppf "Network error: %s" msg 24 26 25 27 (** {1 API client type} *) 26 28 27 - type t = { session : Requests.t; now : Ptime.t } 29 + type t = { 30 + session : Requests.t; 31 + public_session : Requests.t; 32 + (** cookieless session used for public pages (Pulse articles); LinkedIn 33 + serves a React app shell to authenticated sessions but full SSR HTML 34 + to anonymous ones. *) 35 + now : Ptime.t; 36 + } 28 37 29 38 let user_agent = 30 39 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, \ ··· 63 72 Cookie_jar.add_cookie jar (cookie ~now ~name:"li_at" ~value:li_at); 64 73 Cookie_jar.add_cookie jar 65 74 (cookie ~now ~name:"JSESSIONID" ~value:(Fmt.str "\"%s\"" csrf_token)); 66 - { session; now } 75 + let public_headers = 76 + Requests.Headers.empty 77 + |> Requests.Headers.add_string "user-agent" user_agent 78 + |> Requests.Headers.add_string "accept-language" 79 + "en-AU,en-GB;q=0.9,en-US;q=0.8,en;q=0.7" 80 + in 81 + let public_session = 82 + Requests.v ~sw ~base_url:"https://www.linkedin.com" 83 + ~default_headers:public_headers ~max_redirects:5 ~xsrf_cookie_name:None 84 + env 85 + in 86 + { session; public_session; now } 67 87 68 88 let add_cookies t cookies = 69 89 let jar = Requests.cookies t.session in ··· 76 96 77 97 (** {1 Core request function} *) 78 98 79 - let get t path = 99 + let html_accept = 100 + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" 101 + 102 + let do_get ~session ~accept path = 80 103 try 81 - let jar = Requests.cookies t.session in 82 - let all = Cookie_jar.all_cookies jar in 83 - let summary = 84 - List.map 85 - (fun c -> 86 - Fmt.str "%s=<%d chars> (domain=%s, path=%s)" (Cookie.name c) 87 - (String.length (Cookie.value c)) 88 - (Cookie.domain c) (Cookie.path c)) 89 - all 104 + let headers = 105 + Requests.Headers.empty |> Requests.Headers.add_string "accept" accept 90 106 in 91 - Log.debug (fun m -> 92 - m "Cookie jar contains %d cookies: [%s]" (List.length all) 93 - (String.concat "; " summary)); 94 - let resp = Requests.get t.session path in 107 + let resp = Requests.get session ~headers path in 95 108 let status = Requests.Response.status_code resp in 96 109 let body = Requests.Response.text resp in 97 110 if status >= 200 && status < 300 then Ok body ··· 104 117 cookies` to refresh.") 105 118 | exn -> Error (`Network_error (Printexc.to_string exn)) 106 119 120 + let with_accept ~accept t path = do_get ~session:t.session ~accept path 121 + let get t path = do_get ~session:t.public_session ~accept:html_accept path 122 + 107 123 let json t path codec = 108 - match get t path with 124 + match 125 + with_accept ~accept:"application/vnd.linkedin.normalized+json+2.1" t path 126 + with 109 127 | Error _ as e -> e 110 128 | Ok body -> ( 111 129 Log.debug (fun m -> m "Raw response body (%d bytes)" (String.length body)); ··· 148 166 149 167 let post ~urn t = 150 168 let path = Fmt.str "/voyager/api/feed/updates/%s" (url_encode urn) in 151 - json t path Post.normalized_json 169 + match json t path Json.Codec.Value.t with 170 + | Error _ as e -> e 171 + | Ok j -> ( 172 + match Post.of_voyager_response j with 173 + | Some p -> Ok p 174 + | None -> 175 + Error 176 + (`Parse_error "could not find UpdateV2 entity in Voyager response"))
+10
lib/api.mli
··· 12 12 type error = 13 13 [ `Http_error of int * string 14 14 | `Json_parse of string 15 + | `Parse_error of string 15 16 | `Network_error of string ] 16 17 (** API error type. *) 17 18 ··· 57 58 58 59 val post : urn:string -> t -> (Post.t, error) result 59 60 (** [post ~urn t] retrieves a single post by its activity URN. *) 61 + 62 + (** {1 Raw HTTP} *) 63 + 64 + val get : t -> string -> (string, error) result 65 + (** [get t path] performs an HTTP GET on [path] (relative to 66 + [https://www.linkedin.com]) and returns the raw response body. Exposed for 67 + callers that fetch non-JSON responses (e.g. HTML for Pulse articles). 68 + Session cookies are always sent; the target URL chooses whether to honour 69 + them. *)
+2
lib/dune
··· 11 11 requests 12 12 cookie 13 13 cookie.jar 14 + lambdasoup 15 + re 14 16 ptime 15 17 uri))
+89
lib/html.ml
··· 1 + type node = Soup.general Soup.node 2 + 3 + type 'a t = node -> ('a, string) result 4 + (** A decoder takes a DOM node (subtree root) and produces either a value or an 5 + error message describing what it couldn't find. *) 6 + 7 + let return v _node = Ok v 8 + let map f dec node = match dec node with Ok v -> Ok (f v) | Error _ as e -> e 9 + 10 + let both a b node = 11 + match a node with 12 + | Error _ as e -> e 13 + | Ok av -> ( match b node with Error _ as e -> e | Ok bv -> Ok (av, bv)) 14 + 15 + let apply fd ad node = 16 + match fd node with 17 + | Error _ as e -> e 18 + | Ok f -> ( match ad node with Error _ as e -> e | Ok v -> Ok (f v)) 19 + 20 + let ( let+ ) x f = map f x 21 + let ( and+ ) = both 22 + let default v dec node = match dec node with Ok _ as r -> r | Error _ -> Ok v 23 + 24 + let fallback decs node = 25 + let rec loop = function 26 + | [] -> Error "fallback: all decoders failed" 27 + | dec :: rest -> ( 28 + match dec node with Ok _ as r -> r | Error _ -> loop rest) 29 + in 30 + loop decs 31 + 32 + (** {1 Leaf decoders} *) 33 + 34 + let text node = 35 + match Soup.trimmed_texts node with 36 + | [] -> Ok "" 37 + | parts -> Ok (String.concat " " parts) 38 + 39 + let attr_opt name node = 40 + match Soup.element node with 41 + | None -> Ok None 42 + | Some el -> Ok (Soup.attribute name el) 43 + 44 + let attr name node = 45 + match attr_opt name node with 46 + | Ok (Some v) -> Ok v 47 + | Ok None -> Error (Fmt.str "missing attribute %S" name) 48 + | Error _ as e -> e 49 + 50 + let html node = Ok (Soup.to_string node) 51 + 52 + (** {1 Navigation} *) 53 + 54 + let coerce_soup (el : Soup.element Soup.node) : node = (Soup.coerce el : node) 55 + 56 + let query_opt selector dec node = 57 + match Soup.( $? ) node selector with 58 + | None -> Ok None 59 + | Some el -> ( 60 + match dec (coerce_soup el) with 61 + | Ok v -> Ok (Some v) 62 + | Error e -> Error (Fmt.str "in %S: %s" selector e)) 63 + 64 + let query selector dec node = 65 + match Soup.( $? ) node selector with 66 + | None -> Error (Fmt.str "no match for selector %S" selector) 67 + | Some el -> ( 68 + match dec (coerce_soup el) with 69 + | Ok _ as r -> r 70 + | Error e -> Error (Fmt.str "in %S: %s" selector e)) 71 + 72 + let query_all selector dec node = 73 + let nodes = Soup.( $$ ) node selector |> Soup.to_list in 74 + let rec loop acc = function 75 + | [] -> Ok (List.rev acc) 76 + | el :: rest -> ( 77 + match dec (coerce_soup el) with 78 + | Error e -> Error (Fmt.str "in %S: %s" selector e) 79 + | Ok v -> loop (v :: acc) rest) 80 + in 81 + loop [] nodes 82 + 83 + (** {1 Running} *) 84 + 85 + let run_on_soup dec (soup : Soup.soup Soup.node) = dec (Soup.coerce soup) 86 + 87 + let of_string dec html = 88 + let soup = Soup.parse html in 89 + run_on_soup dec soup
+75
lib/html.mli
··· 1 + (** Typed HTML decoders over lambdasoup. 2 + 3 + A small, codec-flavoured decoder API for HTML pages: you describe {e how} to 4 + pull a typed value out of a DOM subtree, and then run it with {!of_string}. 5 + Backed by [lambdasoup] for parsing and CSS-selector queries. *) 6 + 7 + type 'a t 8 + (** A decoder producing values of type ['a] from a DOM subtree. *) 9 + 10 + (** {1 Running} *) 11 + 12 + val of_string : 'a t -> string -> ('a, string) result 13 + (** [of_string dec html] parses [html] and runs [dec] on the root. *) 14 + 15 + val run_on_soup : 'a t -> Soup.soup Soup.node -> ('a, string) result 16 + (** [run_on_soup dec soup] runs [dec] on an already-parsed document. Useful when 17 + a caller has an existing [Soup] handle (e.g. to share parsing across several 18 + decoders). *) 19 + 20 + (** {1 Leaf decoders — act on the current node} *) 21 + 22 + val text : string t 23 + (** [text] is the trimmed, whitespace-joined text content of the current element 24 + (recursive). *) 25 + 26 + val attr : string -> string t 27 + (** [attr name] reads [name] on the current element; fails if absent. *) 28 + 29 + val attr_opt : string -> string option t 30 + (** [attr_opt name] reads [name] on the current element, [None] if absent. *) 31 + 32 + val html : string t 33 + (** [html] is the raw HTML of the current element (including its open and close 34 + tags). *) 35 + 36 + (** {1 Navigation — descend into matching descendants} *) 37 + 38 + val query : string -> 'a t -> 'a t 39 + (** [query selector dec] runs [dec] on the first descendant matching [selector]. 40 + Fails if no match. *) 41 + 42 + val query_opt : string -> 'a t -> 'a option t 43 + (** [query_opt selector dec] runs [dec] on the first descendant matching 44 + [selector], or returns [None] if no match. *) 45 + 46 + val query_all : string -> 'a t -> 'a list t 47 + (** [query_all selector dec] runs [dec] on every descendant matching [selector], 48 + in document order. *) 49 + 50 + (** {1 Combinators} *) 51 + 52 + val return : 'a -> 'a t 53 + (** [return v] is the decoder that yields [v] without looking at the DOM. *) 54 + 55 + val map : ('a -> 'b) -> 'a t -> 'b t 56 + (** [map f dec] post-processes the decoded value with [f]. *) 57 + 58 + val both : 'a t -> 'b t -> ('a * 'b) t 59 + (** [both a b] runs both decoders on the same node and pairs their results. *) 60 + 61 + val apply : ('a -> 'b) t -> 'a t -> 'b t 62 + (** [apply fd ad] is applicative apply. *) 63 + 64 + val ( let+ ) : 'a t -> ('a -> 'b) -> 'b t 65 + (** [let+] is [map] in let-syntax, enabling record-building with [and+]. *) 66 + 67 + val ( and+ ) : 'a t -> 'b t -> ('a * 'b) t 68 + (** [and+] is [both] in let-syntax. *) 69 + 70 + val default : 'a -> 'a t -> 'a t 71 + (** [default v dec] returns [v] if [dec] would have failed. *) 72 + 73 + val fallback : 'a t list -> 'a t 74 + (** [fallback decs] tries each decoder in turn and returns the first success. 75 + Fails only if every decoder fails. *)
+259
lib/item.ml
··· 1 + let log_src = Logs.Src.create "linkedin.item" 2 + 3 + module Log = (val Logs.src_log log_src : Logs.LOG) 4 + 5 + type kind = [ `Article | `Post ] 6 + 7 + type summary = { 8 + url : string; 9 + kind : kind; 10 + title : string option; 11 + published : Ptime.t option; 12 + } 13 + 14 + type t = { 15 + url : string; 16 + kind : kind; 17 + title : string option; 18 + subtitle : string option; 19 + author : string; 20 + author_id : string option; 21 + published : Ptime.t; 22 + markdown : string; 23 + html : string; 24 + num_likes : int; 25 + num_comments : int; 26 + } 27 + 28 + let url (t : t) = t.url 29 + let kind (t : t) = t.kind 30 + let title (t : t) = t.title 31 + let subtitle (t : t) = t.subtitle 32 + let author (t : t) = t.author 33 + let author_id (t : t) = t.author_id 34 + let published (t : t) = t.published 35 + let markdown (t : t) = t.markdown 36 + let html (t : t) = t.html 37 + let num_likes (t : t) = t.num_likes 38 + let num_comments (t : t) = t.num_comments 39 + 40 + let pp_kind ppf = function 41 + | `Article -> Fmt.pf ppf "Article" 42 + | `Post -> Fmt.pf ppf "Post" 43 + 44 + let pp_date ppf t = 45 + let (y, m, d), _ = Ptime.to_date_time t in 46 + Fmt.pf ppf "%04d-%02d-%02d" y m d 47 + 48 + let pp ppf (t : t) = 49 + let title = Option.value ~default:"(untitled)" t.title in 50 + Fmt.pf ppf "@[<v># %s (%a)@ @ by %s on %a@ %s@ @ %s@]" title pp_kind t.kind 51 + t.author pp_date t.published t.url t.markdown 52 + 53 + (** {1 Conversion from Pulse.t and Post.t} *) 54 + 55 + let of_pulse (p : Pulse.t) : t = 56 + { 57 + url = Pulse.url p; 58 + kind = `Article; 59 + title = Some (Pulse.title p); 60 + subtitle = Pulse.subtitle p; 61 + author = Pulse.author p; 62 + author_id = Pulse.author_id p; 63 + published = Pulse.published p; 64 + markdown = Pulse.markdown p; 65 + html = Pulse.html p; 66 + num_likes = Pulse.num_likes p; 67 + num_comments = Pulse.num_comments p; 68 + } 69 + 70 + let ptime_of_ms ms = 71 + match Ptime.of_float_s (float_of_int ms /. 1000.) with 72 + | Some t -> t 73 + | None -> Ptime.epoch 74 + 75 + let url_of_activity_urn urn = 76 + Fmt.str "https://www.linkedin.com/feed/update/%s" urn 77 + 78 + let of_post ~urn (p : Post.t) : t = 79 + { 80 + url = url_of_activity_urn urn; 81 + kind = `Post; 82 + title = None; 83 + subtitle = None; 84 + author = Post.author_name p; 85 + author_id = None; 86 + published = ptime_of_ms (Post.created_time p); 87 + markdown = Post.text p; 88 + html = ""; 89 + num_likes = Post.num_likes p; 90 + num_comments = Post.num_comments p; 91 + } 92 + 93 + (** {1 Fetch} *) 94 + 95 + let is_urn s = String.length s > 16 && String.sub s 0 16 = "urn:li:activity:" 96 + 97 + let get api input = 98 + match Linkedin_url.of_string input with 99 + | Error _ -> 100 + (* bare string: activity URN or Pulse slug *) 101 + if is_urn input then 102 + Result.map (fun p -> of_post ~urn:input p) (Api.post ~urn:input api) 103 + else Result.map of_pulse (Pulse.fetch api input) 104 + | Ok (Linkedin_url.Profile _) -> 105 + Error (`Parse_error "profile URLs are not items; use Api.profile instead") 106 + | Ok (Linkedin_url.Pulse _) -> Result.map of_pulse (Pulse.fetch api input) 107 + | Ok (Linkedin_url.Post urn) -> 108 + Result.map (fun p -> of_post ~urn p) (Api.post ~urn api) 109 + 110 + (** {1 Feed} *) 111 + 112 + let feed_path ~profile_id ~start ~count = 113 + Fmt.str 114 + "/voyager/api/feed/updates?profileId=%s&q=memberShareFeed&moduleKey=member-share&count=%d&start=%d" 115 + profile_id count start 116 + 117 + (** Matches either a Pulse article URL ([linkedin.com/pulse/<slug>]) or an 118 + activity URN ([urn:li:activity:<id>]). Group 1 captures the article slug; 119 + group 2 captures the activity id. Only one group fires per match. *) 120 + let re = 121 + let slash = Re.alt [ Re.char '/'; Re.str "\\/" ] in 122 + let slug_char = Re.compl [ Re.set "/?\"'\\ \t\n\r" ] in 123 + let article = 124 + Re.seq 125 + [ 126 + Re.str "linkedin.com"; 127 + slash; 128 + Re.str "pulse"; 129 + slash; 130 + Re.group (Re.rep1 slug_char); 131 + ] 132 + in 133 + let activity = 134 + Re.seq [ Re.str "urn:li:activity:"; Re.group (Re.rep1 Re.digit) ] 135 + in 136 + Re.compile (Re.alt [ article; activity ]) 137 + 138 + let pulse_canonical_url slug = Fmt.str "https://www.linkedin.com/pulse/%s/" slug 139 + 140 + let summary_of_match g = 141 + match Re.Group.test g 1 with 142 + | true -> 143 + let slug = Re.Group.get g 1 in 144 + Some 145 + { 146 + url = pulse_canonical_url slug; 147 + kind = `Article; 148 + title = None; 149 + published = None; 150 + } 151 + | false -> 152 + if Re.Group.test g 2 then 153 + let id = Re.Group.get g 2 in 154 + let urn = Fmt.str "urn:li:activity:%s" id in 155 + Some 156 + { 157 + url = url_of_activity_urn urn; 158 + kind = `Post; 159 + title = None; 160 + published = None; 161 + } 162 + else None 163 + 164 + (** {1 JSON codecs} *) 165 + 166 + let kind_codec = 167 + let open Json.Codec in 168 + string 169 + |> map ~kind:"item_kind" 170 + ~dec:(function "Article" -> `Article | _ -> `Post) 171 + ~enc:(function `Article -> "Article" | `Post -> "Post") 172 + 173 + let ptime_codec = 174 + let open Json.Codec in 175 + string 176 + |> map ~kind:"ptime_rfc3339" 177 + ~dec:(fun s -> 178 + match Ptime.of_rfc3339 s with 179 + | Ok (t, _, _) -> t 180 + | Error _ -> Ptime.epoch) 181 + ~enc:Ptime.to_rfc3339 182 + 183 + let summary_json = 184 + let open Json.Codec in 185 + Object.map ~kind:"item_summary" (fun url kind title published -> 186 + { 187 + url = Option.value ~default:"" url; 188 + kind = Option.value ~default:`Post kind; 189 + title; 190 + published; 191 + }) 192 + |> Object.opt_member "url" string ~enc:(fun (s : summary) -> Some s.url) 193 + |> Object.opt_member "kind" kind_codec ~enc:(fun (s : summary) -> Some s.kind) 194 + |> Object.opt_member "title" string ~enc:(fun (s : summary) -> s.title) 195 + |> Object.opt_member "published" ptime_codec ~enc:(fun (s : summary) -> 196 + s.published) 197 + |> Object.skip_unknown |> Object.seal 198 + 199 + let json = 200 + let open Json.Codec in 201 + Object.map ~kind:"item" 202 + (fun 203 + url 204 + kind 205 + title 206 + subtitle 207 + author 208 + author_id 209 + published 210 + markdown 211 + html 212 + num_likes 213 + num_comments 214 + -> 215 + { 216 + url = Option.value ~default:"" url; 217 + kind = Option.value ~default:`Post kind; 218 + title; 219 + subtitle; 220 + author = Option.value ~default:"" author; 221 + author_id; 222 + published = Option.value ~default:Ptime.epoch published; 223 + markdown = Option.value ~default:"" markdown; 224 + html = Option.value ~default:"" html; 225 + num_likes = Option.value ~default:0 num_likes; 226 + num_comments = Option.value ~default:0 num_comments; 227 + }) 228 + |> Object.opt_member "url" string ~enc:(fun i -> Some i.url) 229 + |> Object.opt_member "kind" kind_codec ~enc:(fun i -> Some i.kind) 230 + |> Object.opt_member "title" string ~enc:(fun i -> i.title) 231 + |> Object.opt_member "subtitle" string ~enc:(fun i -> i.subtitle) 232 + |> Object.opt_member "author" string ~enc:(fun i -> Some i.author) 233 + |> Object.opt_member "author_id" string ~enc:(fun i -> i.author_id) 234 + |> Object.opt_member "published" ptime_codec ~enc:(fun i -> Some i.published) 235 + |> Object.opt_member "markdown" string ~enc:(fun i -> Some i.markdown) 236 + |> Object.opt_member "html" string ~enc:(fun i -> Some i.html) 237 + |> Object.opt_member "num_likes" int ~enc:(fun i -> Some i.num_likes) 238 + |> Object.opt_member "num_comments" int ~enc:(fun i -> Some i.num_comments) 239 + |> Object.skip_unknown |> Object.seal 240 + 241 + (** {1 Feed} *) 242 + 243 + let summaries_of_feed body = 244 + let seen = Hashtbl.create 64 in 245 + Re.all re body 246 + |> List.filter_map summary_of_match 247 + |> List.filter (fun (s : summary) -> 248 + if Hashtbl.mem seen s.url then false 249 + else begin 250 + Hashtbl.add seen s.url (); 251 + true 252 + end) 253 + 254 + let feed ?(start = 0) ?(count = 100) ~profile_id api = 255 + let path = feed_path ~profile_id ~start ~count in 256 + Log.debug (fun m -> m "Walking share feed %s" path); 257 + match Api.get api path with 258 + | Error _ as e -> e 259 + | Ok body -> Ok (summaries_of_feed body)
+104
lib/item.mli
··· 1 + (** LinkedIn items: a unified view of Pulse articles and short feed posts. 2 + 3 + A LinkedIn item is anything a member has published that has a URL, an 4 + author, a publication time, and a body. Pulse articles have rich Markdown 5 + bodies walked from HTML; short posts have plain-text bodies. {!get} 6 + dispatches on the URL type automatically; {!feed} returns a member's recent 7 + items (articles and posts interleaved) in reverse-chronological order. *) 8 + 9 + type kind = [ `Article | `Post ] 10 + (** The kind of item. Pulse articles and newsletters both appear as [`Article]; 11 + short feed posts and reshares as [`Post]. *) 12 + 13 + type summary = { 14 + url : string; 15 + kind : kind; 16 + title : string option; 17 + published : Ptime.t option; 18 + } 19 + (** A lightweight reference to an item, as produced by {!feed}. [title] and 20 + [published] are populated on a best-effort basis from the share feed 21 + response; call {!get} on [url] for the full record. *) 22 + 23 + type t 24 + (** A fully-fetched item. *) 25 + 26 + (** {1 Accessors} *) 27 + 28 + val url : t -> string 29 + (** [url t] is the canonical LinkedIn URL for [t]. *) 30 + 31 + val kind : t -> kind 32 + (** [kind t] is [`Article] for Pulse articles, [`Post] for short feed posts. *) 33 + 34 + val title : t -> string option 35 + (** [title t] is the headline for articles, [None] for short posts (which have 36 + no title — only a body). *) 37 + 38 + val subtitle : t -> string option 39 + (** [subtitle t] is the article subtitle or description when present. *) 40 + 41 + val author : t -> string 42 + (** [author t] is the author's display name. *) 43 + 44 + val author_id : t -> string option 45 + (** [author_id t] is the author's LinkedIn public ID (the [/in/&lt;id&gt;] 46 + segment) when the author is a person. [None] for organization authors or 47 + when the ID cannot be extracted. *) 48 + 49 + val published : t -> Ptime.t 50 + (** [published t] is the item's publication time. *) 51 + 52 + val markdown : t -> string 53 + (** [markdown t] is the body rendered as Markdown. For articles, this is the 54 + article body walked from HTML. For posts, it is the post's text. *) 55 + 56 + val html : t -> string 57 + (** [html t] is the raw HTML of the item's body. For articles, this is the 58 + article body element as served by LinkedIn. For posts it is empty (short 59 + posts carry plain text, not HTML). *) 60 + 61 + val num_likes : t -> int 62 + (** [num_likes t] is the total reaction count on the item, or [0] when not 63 + extractable. *) 64 + 65 + val num_comments : t -> int 66 + (** [num_comments t] is the comment count on the item, or [0] when not 67 + extractable. *) 68 + 69 + val pp : t Fmt.t 70 + (** [pp] is a pretty-printer (title/byline/date/URL followed by the body). *) 71 + 72 + (** {1 Fetching} *) 73 + 74 + val get : Api.t -> string -> (t, Api.error) result 75 + (** [get api url] fetches any LinkedIn item by URL. Dispatches on URL type: 76 + - [https://www.linkedin.com/pulse/&lt;slug&gt;] fetches the article publicly 77 + (no auth needed) and renders its body as Markdown; 78 + - [https://www.linkedin.com/feed/update/urn:li:activity:&lt;id&gt;] and 79 + [https://www.linkedin.com/posts/&lt;slug&gt;] fetch the post via Voyager 80 + using the session cookies held by [api]; 81 + - bare Pulse slugs and activity URNs are also accepted. *) 82 + 83 + val feed : 84 + ?start:int -> 85 + ?count:int -> 86 + profile_id:string -> 87 + Api.t -> 88 + (summary list, Api.error) result 89 + (** [feed ~profile_id api] returns a summary of each item in the member's recent 90 + share feed, in reverse-chronological order. Articles and posts interleave. 91 + Pipe each [url] through {!get} for the full item. *) 92 + 93 + val summaries_of_feed : string -> summary list 94 + (** [summaries_of_feed body] extracts item summaries from the body of a Voyager 95 + [memberShareFeed] JSON response. Pure: useful for testing and for pipelines 96 + that already have a feed body in hand. *) 97 + 98 + (** {1 JSON codecs} *) 99 + 100 + val json : t Json.codec 101 + (** [json] is a JSON codec for a full item. *) 102 + 103 + val summary_json : summary Json.codec 104 + (** [summary_json] is a JSON codec for a feed summary. *)
+3
lib/linkedin.ml
··· 3 3 module Api = Api 4 4 module Chrome_cookies = Chrome_cookies 5 5 module Linkedin_url = Linkedin_url 6 + module Html = Html 7 + module Pulse = Pulse 8 + module Item = Item
+3
lib/linkedin.mli
··· 9 9 module Api = Api 10 10 module Chrome_cookies = Chrome_cookies 11 11 module Linkedin_url = Linkedin_url 12 + module Html = Html 13 + module Pulse = Pulse 14 + module Item = Item
+14 -1
lib/linkedin_url.ml
··· 1 - type t = Profile of string | Post of string 1 + type t = Profile of string | Post of string | Pulse of string 2 2 3 3 let pp ppf = function 4 4 | Profile id -> Fmt.pf ppf "Profile(%s)" id 5 5 | Post urn -> Fmt.pf ppf "Post(%s)" urn 6 + | Pulse slug -> Fmt.pf ppf "Pulse(%s)" slug 6 7 7 8 let is_linkedin_host h = 8 9 let h = String.lowercase_ascii h in ··· 45 46 match extract_activity_from_slug slug with 46 47 | Some urn -> Ok (Post urn) 47 48 | None -> err_no_activity_id slug) 49 + | [ "pulse"; slug ] -> Ok (Pulse slug) 48 50 | _ -> err_unrecognised_path path) 49 51 50 52 let is_url s = ··· 56 58 match of_string s with 57 59 | Ok (Profile id) -> Ok id 58 60 | Ok (Post _) -> Error "expected a profile URL, got a post URL" 61 + | Ok (Pulse _) -> Error "expected a profile URL, got a Pulse article URL" 59 62 | Error _ as e -> e 60 63 else Ok s 61 64 ··· 64 67 match of_string s with 65 68 | Ok (Post urn) -> Ok urn 66 69 | Ok (Profile _) -> Error "expected a post URL, got a profile URL" 70 + | Ok (Pulse _) -> Error "expected a post URL, got a Pulse article URL" 71 + | Error _ as e -> e 72 + else Ok s 73 + 74 + let pulse_of_string s = 75 + if is_url s then 76 + match of_string s with 77 + | Ok (Pulse slug) -> Ok slug 78 + | Ok (Profile _) -> Error "expected a Pulse URL, got a profile URL" 79 + | Ok (Post _) -> Error "expected a Pulse URL, got a post URL" 67 80 | Error _ as e -> e 68 81 else Ok s
+14 -2
lib/linkedin_url.mli
··· 1 1 (** LinkedIn URL parser. 2 2 3 - Parses LinkedIn profile and post URLs into structured types. *) 3 + Parses LinkedIn profile, post, and Pulse article URLs into structured types. 4 + *) 4 5 5 6 type t = 6 7 | Profile of string (** A profile URL with the public identifier. *) 7 8 | Post of string (** A post URL with the activity URN. *) 9 + | Pulse of string 10 + (** A Pulse article URL with the full slug (e.g. 11 + [how-much-value-we-leaving-orbit-parsimoni-rjugf]). The slug contains 12 + the article title, author public ID, and a trailing hash, concatenated 13 + with hyphens. Split out reliably only by fetching the article. *) 8 14 9 15 val of_string : string -> (t, string) result 10 16 (** [of_string url] parses a LinkedIn URL into a {!t} value. ··· 12 18 Recognised formats: 13 19 - [https://www.linkedin.com/in/{public_id}] 14 20 - [https://www.linkedin.com/feed/update/urn:li:activity:{id}] 15 - - [https://www.linkedin.com/posts/{slug}-activity-{id}-{hash}]. *) 21 + - [https://www.linkedin.com/posts/{slug}-activity-{id}-{hash}] 22 + - [https://www.linkedin.com/pulse/{slug}]. *) 16 23 17 24 val pp : t Fmt.t 18 25 (** [pp] is a pretty-printer for parsed URLs. *) ··· 24 31 val post_of_string : string -> (string, string) result 25 32 (** [post_of_string s] extracts a post URN from [s]. If [s] is a URL, it parses 26 33 it as a post URL; otherwise it returns [s] as-is (treated as a URN). *) 34 + 35 + val pulse_of_string : string -> (string, string) result 36 + (** [pulse_of_string s] extracts a Pulse article slug from [s]. If [s] is a URL, 37 + it parses it as a Pulse URL; otherwise it returns [s] as-is (treated as a 38 + slug). *)
+147 -1
lib/post.ml
··· 29 29 ?(num_comments = 0) () = 30 30 { urn; text; author_name; created_time; num_likes; num_comments } 31 31 32 + (* LinkedIn's API returns [commentary] as either a plain string (simple 33 + posts) or an object whose [text] field carries the body. Depending 34 + on the surface that produced the post (normalised feed, pulse 35 + article, share with mentions), the [text] field is itself either a 36 + string or another nested object. Accept any of those by recursively 37 + unwrapping [text] fields until we hit a string. Any other JSON sort 38 + decodes to the empty string. *) 39 + let commentary_string = 40 + let open Json.Codec in 41 + let rec self = 42 + lazy 43 + (let dec_object = 44 + Object.map ~kind:"commentary" (fun text -> 45 + Option.value ~default:"" text) 46 + |> Object.opt_member "text" (fix self) ~enc:(fun s -> Some s) 47 + |> Object.skip_unknown |> Object.seal 48 + in 49 + any ~kind:"commentary" ~dec_string:string ~dec_object 50 + ~enc:(fun _ -> string) 51 + ()) 52 + in 53 + fix self 54 + 32 55 let json = 33 56 let open Json.Codec in 34 57 Object.map ~kind:"post" ··· 42 65 num_comments = Option.value ~default:0 num_comments; 43 66 }) 44 67 |> Object.opt_member "urn" string ~enc:(fun p -> Some p.urn) 45 - |> Object.opt_member "commentary" string ~enc:(fun p -> Some p.text) 68 + |> Object.opt_member "commentary" commentary_string ~enc:(fun p -> 69 + Some p.text) 46 70 |> Object.opt_member "authorName" string ~enc:(fun p -> Some p.author_name) 47 71 |> Object.opt_member "createdTime" int ~enc:(fun p -> Some p.created_time) 48 72 |> Object.opt_member "numLikes" int ~enc:(fun p -> Some p.num_likes) ··· 64 88 match included with Some (p :: _) -> p | _ -> v ~urn:"" ()) 65 89 |> Object.opt_member "included" (list json) ~enc:(fun p -> Some [ p ]) 66 90 |> Object.skip_unknown |> Object.seal 91 + 92 + (** {1 Voyager normalized-response decoder} 93 + 94 + LinkedIn's [/voyager/api/feed/updates/&lt;urn&gt;] returns a graph of 95 + URN-linked entities under [included]. Reconstruct a Post by finding the 96 + [UpdateV2] entity, walking its [commentary]/[actor]/[updateMetadata] 97 + sub-fields, and looking up [SocialActivityCounts] by [shareUrn] for 98 + likes/comments. Returns [None] only if there is no [UpdateV2] in the 99 + response at all. *) 100 + 101 + let obj_members = function Json.Value.Object (ms, _) -> Some ms | _ -> None 102 + let as_string = function Json.Value.String (s, _) -> Some s | _ -> None 103 + 104 + let as_int = function 105 + | Json.Value.Number (n, _) -> Some (int_of_float n) 106 + | _ -> None 107 + 108 + let as_array = function Json.Value.Array (xs, _) -> Some xs | _ -> None 109 + 110 + let mem key ms = 111 + match Json.Value.member_key key ms with Some (_, v) -> Some v | None -> None 112 + 113 + let ( let* ) = Option.bind 114 + 115 + let find_by_type included typ = 116 + List.find_opt 117 + (fun e -> 118 + match obj_members e with 119 + | Some ms -> ( 120 + match mem "$type" ms with 121 + | Some v -> as_string v = Some typ 122 + | None -> false) 123 + | None -> false) 124 + included 125 + 126 + let find_social_counts included share_urn = 127 + let typ = "com.linkedin.voyager.feed.shared.SocialActivityCounts" in 128 + List.find_opt 129 + (fun e -> 130 + match obj_members e with 131 + | None -> false 132 + | Some ms -> ( 133 + match (mem "$type" ms, mem "urn" ms) with 134 + | Some tv, Some uv -> 135 + as_string tv = Some typ && as_string uv = Some share_urn 136 + | _ -> false)) 137 + included 138 + 139 + let nested_text path ms = 140 + let rec loop ms = function 141 + | [] -> None 142 + | [ key ] -> mem key ms |> Option.map as_string |> Option.join 143 + | key :: rest -> 144 + let* sub = mem key ms in 145 + let* sub_ms = obj_members sub in 146 + loop sub_ms rest 147 + in 148 + loop ms path 149 + 150 + (** Derive post creation time from an activity URN. LinkedIn's activity IDs are 151 + snowflake-like: the high bits ([id lsr 22]) are the creation time in 152 + milliseconds since the Unix epoch. Returns [0] for non-activity URNs. *) 153 + let created_time_of_urn urn = 154 + let prefix = "urn:li:activity:" in 155 + let pl = String.length prefix in 156 + if String.length urn > pl && String.sub urn 0 pl = prefix then 157 + match Int64.of_string_opt (String.sub urn pl (String.length urn - pl)) with 158 + | Some id -> Int64.to_int (Int64.shift_right_logical id 22) 159 + | None -> 0 160 + else 0 161 + 162 + let of_voyager_response j = 163 + let* root = obj_members j in 164 + let* included_v = mem "included" root in 165 + let* included = as_array included_v in 166 + let* update = 167 + find_by_type included "com.linkedin.voyager.feed.render.UpdateV2" 168 + in 169 + let* upd = obj_members update in 170 + let text = 171 + Option.value ~default:"" (nested_text [ "commentary"; "text"; "text" ] upd) 172 + in 173 + let author_name = 174 + Option.value ~default:"" (nested_text [ "actor"; "name"; "text" ] upd) 175 + in 176 + let meta = 177 + mem "updateMetadata" upd |> Option.map obj_members |> Option.join 178 + in 179 + let urn = 180 + match meta with 181 + | Some m -> Option.value ~default:"" (nested_text [ "urn" ] m) 182 + | None -> "" 183 + in 184 + let share_urn = 185 + match meta with Some m -> nested_text [ "shareUrn" ] m | None -> None 186 + in 187 + let num_likes, num_comments = 188 + match share_urn with 189 + | None -> (0, 0) 190 + | Some su -> ( 191 + match find_social_counts included su with 192 + | None -> (0, 0) 193 + | Some e -> ( 194 + match obj_members e with 195 + | None -> (0, 0) 196 + | Some ms -> 197 + let get k = 198 + match mem k ms with 199 + | Some v -> Option.value ~default:0 (as_int v) 200 + | None -> 0 201 + in 202 + (get "numLikes", get "numComments"))) 203 + in 204 + Some 205 + { 206 + urn; 207 + text; 208 + author_name; 209 + created_time = created_time_of_urn urn; 210 + num_likes; 211 + num_comments; 212 + }
+8
lib/post.mli
··· 47 47 val normalized_json : t Json.codec 48 48 (** [normalized_json] is a JSON codec for a single post from a normalised 49 49 Voyager API response (extracts from the [included] array). *) 50 + 51 + val of_voyager_response : Json.t -> t option 52 + (** [of_voyager_response j] reconstructs a Post from the full Voyager 53 + [/voyager/api/feed/updates/&lt;urn&gt;] response. Walks the [included] array 54 + to find the [UpdateV2] entity, follows its [commentary], [actor] and 55 + [updateMetadata] sub-fields, and resolves [shareUrn] against 56 + [SocialActivityCounts] for likes and comments. Returns [None] only when no 57 + [UpdateV2] entity is present. *)
+533
lib/pulse.ml
··· 1 + let log_src = Logs.Src.create "linkedin.pulse" 2 + 3 + module Log = (val Logs.src_log log_src : Logs.LOG) 4 + 5 + type summary = { 6 + url : string; 7 + slug : string; 8 + title : string; 9 + published : Ptime.t option; 10 + } 11 + 12 + type t = { 13 + url : string; 14 + slug : string; 15 + title : string; 16 + subtitle : string option; 17 + author : string; 18 + author_id : string option; 19 + published : Ptime.t; 20 + markdown : string; 21 + html : string; 22 + num_likes : int; 23 + num_comments : int; 24 + } 25 + 26 + let url (t : t) = t.url 27 + let slug (t : t) = t.slug 28 + let title (t : t) = t.title 29 + let subtitle (t : t) = t.subtitle 30 + let author (t : t) = t.author 31 + let author_id (t : t) = t.author_id 32 + let published (t : t) = t.published 33 + let markdown (t : t) = t.markdown 34 + let html (t : t) = t.html 35 + let num_likes (t : t) = t.num_likes 36 + let num_comments (t : t) = t.num_comments 37 + 38 + let pp_date ppf t = 39 + let (y, m, d), _ = Ptime.to_date_time t in 40 + Fmt.pf ppf "%04d-%02d-%02d" y m d 41 + 42 + let pp ppf (t : t) = 43 + Fmt.pf ppf "@[<v># %s@ @ by %s on %a@ %s@ @ %s@]" t.title t.author pp_date 44 + t.published t.url t.markdown 45 + 46 + (** {1 HTML -> Markdown conversion} 47 + 48 + Walks the article body element and emits Markdown. Handles the subset of 49 + HTML used by LinkedIn Pulse (p, headings, emphasis, links, lists, code, 50 + blockquote, images, line breaks). Unknown tags are transparent (their 51 + children are rendered, the wrapper dropped). *) 52 + 53 + let escape_inline s = 54 + let buf = Buffer.create (String.length s) in 55 + String.iter 56 + (fun c -> 57 + match c with 58 + | '\\' | '*' | '_' | '[' | ']' | '`' -> 59 + Buffer.add_char buf '\\'; 60 + Buffer.add_char buf c 61 + | c -> Buffer.add_char buf c) 62 + s; 63 + Buffer.contents buf 64 + 65 + let lang_of_class cls = 66 + match String.split_on_char ' ' cls with 67 + | [] -> "" 68 + | classes -> ( 69 + let pick c = 70 + let prefix = "language-" in 71 + let pl = String.length prefix in 72 + if String.length c > pl && String.sub c 0 pl = prefix then 73 + Some (String.sub c pl (String.length c - pl)) 74 + else None 75 + in 76 + match List.find_map pick classes with Some l -> l | None -> "") 77 + 78 + let rec render_node ~in_literal node = 79 + match Soup.element node with 80 + | None -> ( 81 + match Soup.leaf_text node with 82 + | None -> "" 83 + | Some s -> if in_literal then s else escape_inline s) 84 + | Some el -> render_element ~in_literal el 85 + 86 + and render_element ~in_literal el = 87 + let tag = String.lowercase_ascii (Soup.name el) in 88 + let children_md () = 89 + Soup.children el |> Soup.to_list 90 + |> List.map (render_node ~in_literal) 91 + |> String.concat "" 92 + in 93 + match tag with 94 + | "p" -> children_md () ^ "\n\n" 95 + | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" -> 96 + let level = Char.code tag.[1] - Char.code '0' in 97 + String.make level '#' ^ " " ^ children_md () ^ "\n\n" 98 + | "strong" | "b" -> "**" ^ children_md () ^ "**" 99 + | "em" | "i" -> "_" ^ children_md () ^ "_" 100 + | "a" -> 101 + let href = Option.value ~default:"" (Soup.attribute "href" el) in 102 + "[" ^ children_md () ^ "](" ^ href ^ ")" 103 + | "br" -> " \n" 104 + | "hr" -> "\n---\n\n" 105 + | "img" -> 106 + let src = Option.value ~default:"" (Soup.attribute "src" el) in 107 + let alt = Option.value ~default:"" (Soup.attribute "alt" el) in 108 + "![" ^ alt ^ "](" ^ src ^ ")" 109 + | "code" when not in_literal -> 110 + let body = 111 + Soup.children el |> Soup.to_list 112 + |> List.map (render_node ~in_literal:true) 113 + |> String.concat "" 114 + in 115 + "`" ^ body ^ "`" 116 + | "pre" -> 117 + let body = 118 + Soup.children el |> Soup.to_list 119 + |> List.map (render_node ~in_literal:true) 120 + |> String.concat "" 121 + in 122 + let lang = 123 + match Soup.( $? ) el "code" with 124 + | Some c -> ( 125 + match Soup.attribute "class" c with 126 + | Some cls -> lang_of_class cls 127 + | None -> "") 128 + | None -> "" 129 + in 130 + "\n```" ^ lang ^ "\n" ^ body ^ "\n```\n\n" 131 + | "code" -> children_md () 132 + | "ul" -> render_list ~in_literal ~ordered:false el 133 + | "ol" -> render_list ~in_literal ~ordered:true el 134 + | "li" -> "- " ^ String.trim (children_md ()) ^ "\n" 135 + | "blockquote" -> 136 + let body = String.trim (children_md ()) in 137 + let lines = String.split_on_char '\n' body in 138 + (lines |> List.map (fun l -> "> " ^ l) |> String.concat "\n") ^ "\n\n" 139 + | "figure" -> children_md () ^ "\n\n" 140 + | "figcaption" -> "_" ^ String.trim (children_md ()) ^ "_\n\n" 141 + | _ -> children_md () 142 + 143 + and render_list ~in_literal ~ordered el = 144 + let counter = ref 0 in 145 + let items = 146 + Soup.children el |> Soup.to_list 147 + |> List.filter_map (fun n -> 148 + match Soup.element n with 149 + | Some e when String.lowercase_ascii (Soup.name e) = "li" -> Some e 150 + | _ -> None) 151 + in 152 + let render_item li = 153 + let inner = 154 + Soup.children li |> Soup.to_list 155 + |> List.map (render_node ~in_literal) 156 + |> String.concat "" 157 + in 158 + incr counter; 159 + let marker = if ordered then Fmt.str "%d. " !counter else "- " in 160 + marker ^ String.trim inner ^ "\n" 161 + in 162 + "\n" ^ String.concat "" (List.map render_item items) ^ "\n" 163 + 164 + let collapse_blank_lines s = 165 + let buf = Buffer.create (String.length s) in 166 + let n = String.length s in 167 + let rec loop i streak = 168 + if i >= n then () 169 + else if s.[i] = '\n' then begin 170 + if streak < 2 then Buffer.add_char buf '\n'; 171 + loop (i + 1) (streak + 1) 172 + end 173 + else begin 174 + Buffer.add_char buf s.[i]; 175 + loop (i + 1) 0 176 + end 177 + in 178 + loop 0 0; 179 + Buffer.contents buf 180 + 181 + let markdown_of_body el = 182 + let raw = render_element ~in_literal:false el in 183 + let collapsed = collapse_blank_lines raw in 184 + String.trim collapsed ^ "\n" 185 + 186 + (** {1 Metadata extraction} 187 + 188 + Meta-tag extraction uses {!Html} decoders; only the JSON-LD [datePublished] 189 + lookup stays as a raw regex because the field lives inside a [<script>] 190 + block, not the DOM. *) 191 + 192 + let og name = Fmt.str "meta[property=\"%s\"]" name 193 + let meta_name name = Fmt.str "meta[name=\"%s\"]" name 194 + 195 + let title_decoder = 196 + Html.fallback 197 + [ 198 + Html.query (og "og:title") (Html.attr "content"); 199 + Html.query "title" Html.text; 200 + Html.return ""; 201 + ] 202 + 203 + let subtitle_decoder = 204 + let open Html in 205 + query_opt (og "og:description") (attr "content") 206 + |> map (function Some s when s <> "" -> Some s | _ -> None) 207 + 208 + (** Extract the public ID from a [/in/<id>] URL path. *) 209 + let public_id_of_url url = 210 + let uri = Uri.of_string url in 211 + match Uri.path uri |> String.split_on_char '/' with 212 + | _ :: "in" :: id :: _ -> Some id 213 + | "in" :: id :: _ -> Some id 214 + | _ -> None 215 + 216 + (** Extract the author's public ID from the article:author OpenGraph tag when 217 + the author is a person. Returns [None] for organization-authored articles — 218 + the byline fallback is unreliable because a Pulse page contains many 219 + [/in/...] links in related sections. *) 220 + let author_id_decoder = 221 + let open Html in 222 + query_opt (og "article:author") (attr "content") 223 + |> map (function Some url -> public_id_of_url url | None -> None) 224 + 225 + let author_decoder = 226 + Html.fallback 227 + [ 228 + Html.query (meta_name "author") (Html.attr "content"); 229 + (* LinkedIn exposes the author name as twitter:data1 with label 230 + "Written by" on public Pulse pages. *) 231 + Html.query (meta_name "twitter:data1") (Html.attr "content"); 232 + Html.query "a[href*=\"/in/\"]" Html.text; 233 + Html.return ""; 234 + ] 235 + 236 + (** Matches the ["datePublished":"ISO-8601"] field in JSON-LD. LinkedIn's 2026 237 + Pulse pages expose this via an embedded schema.org Article block rather than 238 + OpenGraph meta tags. *) 239 + let date_published_re = 240 + Re.compile 241 + (Re.seq 242 + [ 243 + Re.str "\"datePublished\":\""; 244 + Re.group (Re.rep1 (Re.compl [ Re.char '"' ])); 245 + ]) 246 + 247 + let published_of_string s = 248 + match Ptime.of_rfc3339 s with Ok (t, _, _) -> Some t | Error _ -> None 249 + 250 + let extract_published soup raw = 251 + let from_meta = 252 + Html.fallback 253 + [ 254 + Html.query (og "article:published_time") (Html.attr "content"); 255 + Html.query (meta_name "pubdate") (Html.attr "content"); 256 + ] 257 + in 258 + match Html.run_on_soup from_meta soup with 259 + | Ok s -> published_of_string s 260 + | Error _ -> ( 261 + match Re.exec_opt date_published_re raw with 262 + | Some g -> published_of_string (Re.Group.get g 1) 263 + | None -> None) 264 + 265 + (** Extract the first integer-like run from a string, tolerating commas used as 266 + thousands separators. Returns 0 if none found. *) 267 + let first_int s = 268 + let buf = Buffer.create 8 in 269 + let n = String.length s in 270 + let started = ref false in 271 + let stopped = ref false in 272 + for i = 0 to n - 1 do 273 + if not !stopped then 274 + match s.[i] with 275 + | '0' .. '9' -> 276 + Buffer.add_char buf s.[i]; 277 + started := true 278 + | ',' when !started -> () 279 + | _ when !started -> stopped := true 280 + | _ -> () 281 + done; 282 + int_of_string_opt (Buffer.contents buf) |> Option.value ~default:0 283 + 284 + let text_of_node el = 285 + let parts = Soup.trimmed_texts el in 286 + String.concat " " parts 287 + 288 + let max_count_from_attr soup attr = 289 + let nodes = Soup.( $$ ) soup (Fmt.str "[%s]" attr) |> Soup.to_list in 290 + List.fold_left 291 + (fun acc el -> 292 + match Soup.attribute attr el with 293 + | Some s -> max acc (first_int s) 294 + | None -> acc) 295 + 0 nodes 296 + 297 + let extract_count_from_selectors soup selectors = 298 + List.find_map 299 + (fun sel -> 300 + match Soup.( $? ) soup sel with 301 + | None -> None 302 + | Some el -> 303 + let aria = Soup.attribute "aria-label" el in 304 + let source = 305 + match aria with 306 + | Some s when String.length s > 0 -> s 307 + | _ -> text_of_node el 308 + in 309 + let n = first_int source in 310 + if n > 0 then Some n else None) 311 + selectors 312 + |> Option.value ~default:0 313 + 314 + (** The article body has several [data-num-reactions] attributes (one per 315 + social-actions widget, at article top and bottom, plus nested ones inside 316 + comments). The article's own count is the max across the page. *) 317 + let extract_num_likes soup = 318 + let from_attr = max_count_from_attr soup "data-num-reactions" in 319 + if from_attr > 0 then from_attr 320 + else 321 + extract_count_from_selectors soup 322 + [ 323 + "[data-test-id=\"social-actions__reaction-count\"]"; 324 + "[data-test-id=\"social-actions__reactions\"]"; 325 + ".social-details-social-counts__reactions-count"; 326 + "[aria-label*=\"reactions\"]"; 327 + "[aria-label*=\"Reactions\"]"; 328 + ] 329 + 330 + let extract_num_comments soup = 331 + let from_attr = max_count_from_attr soup "data-num-comments" in 332 + if from_attr > 0 then from_attr 333 + else 334 + extract_count_from_selectors soup 335 + [ 336 + "[data-test-id=\"social-actions__comments\"]"; 337 + ".social-details-social-counts__comments"; 338 + "[aria-label*=\"comments\"]"; 339 + "[aria-label*=\"Comments\"]"; 340 + ] 341 + 342 + (** Find the article body element. Tries a ranked list of selectors, starting 343 + with LinkedIn's current public-Pulse markup and falling back to historical 344 + shapes. *) 345 + let extract_body soup = 346 + let selectors = 347 + [ 348 + (* Current public Pulse (2026): the article body is a sequence of 349 + <div data-test-id="publishing-text-block"> blocks (one per 350 + paragraph) wrapped in <div data-test-id="article-content-blocks">. 351 + Prefer the wrapper so we walk the whole article, not just the 352 + lede. *) 353 + "[data-test-id=\"article-content-blocks\"]"; 354 + "article.article-main"; 355 + "[data-test-id=\"publishing-text-block\"]"; 356 + ".article-main__content"; 357 + (* Historical / in-app shapes *) 358 + "article .reader-article-content"; 359 + "div.reader-article-content"; 360 + "article .article__body"; 361 + "div.article-body"; 362 + "main article"; 363 + "article"; 364 + ] 365 + in 366 + List.find_map (fun sel -> Soup.( $? ) soup sel) selectors 367 + 368 + (** {1 Fetch} *) 369 + 370 + let canonical_url ~slug = Fmt.str "https://www.linkedin.com/pulse/%s/" slug 371 + let fetch_path ~slug = Fmt.str "/pulse/%s/" slug 372 + 373 + let login_wall_re = 374 + Re.compile 375 + (Re.alt 376 + [ 377 + Re.str "authwall"; 378 + Re.str "Sign Up | LinkedIn"; 379 + Re.str "Join LinkedIn"; 380 + Re.str "Sign in to LinkedIn"; 381 + Re.str "challenge/l"; 382 + ]) 383 + 384 + let is_login_wall html = Re.execp login_wall_re html 385 + 386 + let of_html ~slug raw = 387 + let soup = Soup.parse raw in 388 + match extract_body soup with 389 + | None -> 390 + Error 391 + (`Parse_error 392 + "could not locate Pulse article body element (selectors did not \ 393 + match)") 394 + | Some body -> 395 + let run_or dec ~default = 396 + match Html.run_on_soup dec soup with Ok v -> v | Error _ -> default 397 + in 398 + let title = run_or title_decoder ~default:"" in 399 + let subtitle = run_or subtitle_decoder ~default:None in 400 + let author = run_or author_decoder ~default:"" in 401 + let author_id = run_or author_id_decoder ~default:None in 402 + let published = 403 + match extract_published soup raw with 404 + | Some t -> t 405 + | None -> Ptime.epoch 406 + in 407 + let markdown = markdown_of_body body in 408 + let html_body = Soup.to_string body in 409 + let num_likes = extract_num_likes soup in 410 + let num_comments = extract_num_comments soup in 411 + Ok 412 + { 413 + url = canonical_url ~slug; 414 + slug; 415 + title; 416 + subtitle; 417 + author; 418 + author_id; 419 + published; 420 + markdown; 421 + html = html_body; 422 + num_likes; 423 + num_comments; 424 + } 425 + 426 + let fetch api input = 427 + match Linkedin_url.pulse_of_string input with 428 + | Error msg -> Error (`Parse_error msg) 429 + | Ok slug -> ( 430 + let path = fetch_path ~slug in 431 + Log.debug (fun m -> m "Fetching Pulse article at %s" path); 432 + match Api.get api path with 433 + | Error _ as e -> e 434 + | Ok html -> of_html ~slug html) 435 + 436 + (** {1 List} *) 437 + 438 + let list_path ~profile_id ~start ~count = 439 + Fmt.str 440 + "/voyager/api/feed/updates?profileId=%s&q=memberShareFeed&moduleKey=member-share&count=%d&start=%d" 441 + profile_id count start 442 + 443 + (** Matches [linkedin.com/pulse/<slug>/] in either raw form or with JSON-escaped 444 + slashes (\\/). Captures the slug. *) 445 + let url_re = 446 + let slash = Re.alt [ Re.char '/'; Re.str "\\/" ] in 447 + let slug_char = Re.compl [ Re.set "/?\"'\\ \t\n\r" ] in 448 + Re.compile 449 + (Re.seq 450 + [ 451 + Re.str "linkedin.com"; 452 + slash; 453 + Re.str "pulse"; 454 + slash; 455 + Re.group (Re.rep1 slug_char); 456 + ]) 457 + 458 + let slugs_of_feed body = 459 + Re.all url_re body 460 + |> List.map (fun g -> Re.Group.get g 1) 461 + |> List.filter (fun s -> s <> "") 462 + 463 + let summary_of_link soup_el = 464 + match Soup.attribute "href" soup_el with 465 + | None -> None 466 + | Some href -> 467 + let uri = Uri.of_string href in 468 + let path = Uri.path uri in 469 + let segments = 470 + String.split_on_char '/' path |> List.filter (fun s -> s <> "") 471 + in 472 + let slug_opt = 473 + match segments with 474 + | [ "pulse"; slug ] -> Some slug 475 + | _ when String.length href > 0 && href.[0] <> '/' -> ( 476 + match Linkedin_url.pulse_of_string href with 477 + | Ok slug -> Some slug 478 + | Error _ -> None) 479 + | _ -> None 480 + in 481 + Option.map 482 + (fun slug -> 483 + let title = 484 + String.trim (Option.value ~default:"" (Soup.leaf_text soup_el)) 485 + in 486 + let url = canonical_url ~slug in 487 + { url; slug; title; published = None }) 488 + slug_opt 489 + 490 + let summaries_of_html html = 491 + let soup = Soup.parse html in 492 + let links = Soup.( $$ ) soup "a[href*=\"/pulse/\"]" in 493 + let seen = Hashtbl.create 16 in 494 + Soup.to_list links 495 + |> List.filter_map summary_of_link 496 + |> List.filter (fun (s : summary) -> 497 + if Hashtbl.mem seen s.slug then false 498 + else begin 499 + Hashtbl.add seen s.slug (); 500 + true 501 + end) 502 + 503 + let list ?(start = 0) ?(count = 100) ~profile_id api = 504 + let path = list_path ~profile_id ~start ~count in 505 + Log.debug (fun m -> m "Listing Pulse articles via share feed %s" path); 506 + match Api.get api path with 507 + | Error _ as e -> e 508 + | Ok body -> 509 + if is_login_wall body then 510 + Error 511 + (`Parse_error 512 + (Fmt.str 513 + "LinkedIn returned a login wall for %s; session cookies may be \ 514 + expired" 515 + path)) 516 + else 517 + let seen = Hashtbl.create 16 in 518 + let summaries = 519 + slugs_of_feed body 520 + |> List.filter_map (fun slug -> 521 + if Hashtbl.mem seen slug then None 522 + else begin 523 + Hashtbl.add seen slug (); 524 + Some 525 + { 526 + url = canonical_url ~slug; 527 + slug; 528 + title = ""; 529 + published = None; 530 + } 531 + end) 532 + in 533 + Ok summaries
+96
lib/pulse.mli
··· 1 + (** LinkedIn Pulse articles. 2 + 3 + Fetches long-form articles published at [https://www.linkedin.com/pulse/] 4 + and converts them to Markdown. Pulse articles are publicly indexed, so 5 + [fetch] does not require session auth; [list] does use the session cookies 6 + held by {!Api.t} to query a member's article archive. *) 7 + 8 + type t 9 + (** A Pulse article. *) 10 + 11 + type summary = { 12 + url : string; 13 + slug : string; 14 + title : string; 15 + published : Ptime.t option; 16 + } 17 + (** A lightweight summary as returned by {!list}. *) 18 + 19 + (** {1 Accessors} *) 20 + 21 + val url : t -> string 22 + (** [url t] is the canonical Pulse URL for [t]. *) 23 + 24 + val slug : t -> string 25 + (** [slug t] is the full Pulse slug (path component after [/pulse/]). *) 26 + 27 + val title : t -> string 28 + (** [title t] is the article's title. *) 29 + 30 + val subtitle : t -> string option 31 + (** [subtitle t] is the article's subtitle when present. *) 32 + 33 + val author : t -> string 34 + (** [author t] is the display name of the author. *) 35 + 36 + val author_id : t -> string option 37 + (** [author_id t] is the author's LinkedIn public ID (the [/in/&lt;id&gt;] 38 + segment) when it can be extracted from the article's author URL. *) 39 + 40 + val published : t -> Ptime.t 41 + (** [published t] is the publication time. *) 42 + 43 + val markdown : t -> string 44 + (** [markdown t] is the article body rendered as Markdown. *) 45 + 46 + val html : t -> string 47 + (** [html t] is the raw HTML of the article body element, for callers that want 48 + to re-render, sanitize, or post-process the original markup. *) 49 + 50 + val num_likes : t -> int 51 + (** [num_likes t] is the total reaction count when extractable from the 52 + article's HTML, otherwise [0]. Combines likes, celebrates, supports, loves, 53 + insightfuls, and funnies. *) 54 + 55 + val num_comments : t -> int 56 + (** [num_comments t] is the comment count when extractable, otherwise [0]. *) 57 + 58 + val pp : t Fmt.t 59 + (** [pp] is a pretty-printer for articles (title, byline, URL, body). *) 60 + 61 + (** {1 Fetching} *) 62 + 63 + val fetch : Api.t -> string -> (t, Api.error) result 64 + (** [fetch api url_or_slug] fetches a Pulse article. [url_or_slug] may be a full 65 + URL (e.g. [https://www.linkedin.com/pulse/&lt;slug&gt;]) or a bare slug. The 66 + request uses the session held by [api] but Pulse articles are publicly 67 + indexed — session auth is not required for public articles. *) 68 + 69 + val of_html : slug:string -> string -> (t, [> `Parse_error of string ]) result 70 + (** [of_html ~slug html] parses a pre-fetched Pulse article HTML document. 71 + Useful for offline testing and for piping HTML fetched by other means. *) 72 + 73 + val summaries_of_html : string -> summary list 74 + (** [summaries_of_html html] extracts Pulse article summaries from the HTML of 75 + an author's activity/articles listing page. Returns an empty list if no 76 + [/pulse/] links are found. *) 77 + 78 + val slugs_of_feed : string -> string list 79 + (** [slugs_of_feed body] extracts all Pulse article slugs from the body of a 80 + Voyager [memberShareFeed] JSON response. Matches both raw ([/pulse/slug]) 81 + and JSON-escaped ([\\/pulse\\/slug]) URL forms. Duplicates are preserved; 82 + dedup at the call site if needed. *) 83 + 84 + val list : 85 + ?start:int -> 86 + ?count:int -> 87 + profile_id:string -> 88 + Api.t -> 89 + (summary list, Api.error) result 90 + (** [list ~profile_id api] lists Pulse articles that appear in a member's recent 91 + share feed. Calls the Voyager [memberShareFeed] endpoint (same one used by 92 + {!Api.posts}) and extracts all [/pulse/&lt;slug&gt;] URLs from the JSON 93 + response. The returned summaries carry [url] and [slug] only; [title] is 94 + [""] and [published] is [None]. Use {!fetch} on each URL for full metadata. 95 + Articles that were not recently shared will not appear — raise [count] to 96 + walk further back. *)
+3
linkedin.opam
··· 21 21 "kdf" {>= "0.1"} 22 22 "uri" {>= "4.0"} 23 23 "cmdliner" {>= "1.2"} 24 + "lambdasoup" {>= "1.0"} 25 + "re" {>= "1.11"} 26 + "mdx" {with-test} 24 27 "alcotest" {with-test} 25 28 "odoc" {with-doc} 26 29 ]
+3 -1
test/dune
··· 1 1 (test 2 2 (name test) 3 - (libraries linkedin alcotest json fmt)) 3 + (deps 4 + (source_tree fixtures)) 5 + (libraries linkedin alcotest json fmt ptime re))
+50
test/fixtures/pulse_article.html
··· 1 + <!DOCTYPE html> 2 + <html lang="en"> 3 + <head> 4 + <meta charset="utf-8"> 5 + <title>How Much Value Are We Leaving in Orbit? | LinkedIn</title> 6 + <meta property="og:title" content="How Much Value Are We Leaving in Orbit?"> 7 + <meta property="og:description" content="A look at the economics of underused orbital assets."> 8 + <meta property="article:published_time" content="2026-04-24T08:30:00Z"> 9 + <meta property="article:author" content="https://www.linkedin.com/in/parsimoni"> 10 + <meta name="author" content="Parsimoni"> 11 + </head> 12 + <body> 13 + <header> 14 + <a href="https://www.linkedin.com/in/parsimoni" class="author-link">Parsimoni</a> 15 + </header> 16 + <article class="article-main relative flex-grow pulse"> 17 + <div data-test-id="article-content-blocks"> 18 + <p>Satellite operators are leaving significant <strong>economic value</strong> 19 + in orbit. Three patterns recur:</p> 20 + <ul> 21 + <li>Idle capacity during non-operational windows.</li> 22 + <li>Difficulty deploying <em>software</em> securely in orbit.</li> 23 + <li>Missions designed around a handful of initial customers.</li> 24 + </ul> 25 + <h2>A different approach</h2> 26 + <p>Rather than launching more hardware, the opportunity is to extract 27 + value from what is already up there. We call this approach 28 + <a href="https://parsimoni.co/spaceos">SpaceOS</a>: secure 29 + multi-tenant middleware that lets third parties deploy applications 30 + onto existing satellites.</p> 31 + <blockquote> 32 + <p>The best satellite is the one already in orbit.</p> 33 + </blockquote> 34 + <h3>Why now</h3> 35 + <ol> 36 + <li>Launch costs have collapsed.</li> 37 + <li>In-orbit compute is now spare, not scarce.</li> 38 + </ol> 39 + <p>A simple code example:</p> 40 + <pre><code class="language-ocaml">let orbit_value ~capacity ~utilisation = 41 + capacity *. utilisation</code></pre> 42 + <p>See <code>orbit_value</code> above.</p> 43 + </div> 44 + </article> 45 + <div class="social-actions"> 46 + <div data-test-id="social-actions__reactions" data-num-reactions="142" aria-label="142 Reactions">Reactions</div> 47 + <div data-test-id="social-actions__comments" data-num-comments="37" aria-label="37 Comments">Comments</div> 48 + </div> 49 + </body> 50 + </html>
+28
test/fixtures/pulse_list.html
··· 1 + <!DOCTYPE html> 2 + <html> 3 + <head><title>Parsimoni - Articles</title></head> 4 + <body> 5 + <main> 6 + <ul class="articles"> 7 + <li> 8 + <a href="https://www.linkedin.com/pulse/how-much-value-we-leaving-orbit-parsimoni-rjugf/"> 9 + How Much Value Are We Leaving in Orbit? 10 + </a> 11 + </li> 12 + <li> 13 + <a href="/pulse/ground-stations-are-the-new-bottleneck-parsimoni-q2a8p"> 14 + Ground Stations Are The New Bottleneck 15 + </a> 16 + </li> 17 + <li> 18 + <a href="/pulse/how-much-value-we-leaving-orbit-parsimoni-rjugf/?utm=share"> 19 + duplicate link should be deduped 20 + </a> 21 + </li> 22 + <li> 23 + <a href="/in/someone-else">Not a pulse article</a> 24 + </li> 25 + </ul> 26 + </main> 27 + </body> 28 + </html>
+2
test/test.ml
··· 6 6 Test_api.suite; 7 7 Test_chrome_cookies.suite; 8 8 Test_linkedin_url.suite; 9 + Test_pulse.suite; 10 + Test_item.suite; 9 11 ]
+120
test/test_item.ml
··· 1 + open Linkedin 2 + 3 + let pulse_url = "https://www.linkedin.com/pulse/foo-author-abcde/" 4 + let activity_url = "https://www.linkedin.com/feed/update/urn:li:activity:12345" 5 + 6 + (* {1 summaries_of_feed} *) 7 + 8 + let test_mixed_feed () = 9 + let body = 10 + {|{"updates":[ 11 + {"actionTarget":"https:\/\/www.linkedin.com\/pulse\/foo-author-abcde\/"}, 12 + {"urn":"urn:li:activity:12345","text":"short post"}, 13 + {"actionTarget":"https:\/\/www.linkedin.com\/pulse\/bar-author-xyz12\/"} 14 + ]}|} 15 + in 16 + let summaries = Item.summaries_of_feed body in 17 + Alcotest.(check int) "three items" 3 (List.length summaries); 18 + let kinds = List.map (fun (s : Item.summary) -> s.kind) summaries in 19 + Alcotest.(check bool) 20 + "first is article" true 21 + (match kinds with `Article :: _ -> true | _ -> false) 22 + 23 + let test_dedup () = 24 + let body = 25 + {|{"a":"linkedin.com/pulse/same-slug/","b":"linkedin.com/pulse/same-slug/?u=1"}|} 26 + in 27 + Alcotest.(check int) 28 + "dedup to 1" 1 29 + (List.length (Item.summaries_of_feed body)) 30 + 31 + let test_empty () = 32 + let body = {|{"other":"no items here"}|} in 33 + Alcotest.(check int) "empty" 0 (List.length (Item.summaries_of_feed body)) 34 + 35 + let test_activity_urn_only () = 36 + let body = {|{"urn":"urn:li:activity:99999"}|} in 37 + let summaries = Item.summaries_of_feed body in 38 + Alcotest.(check int) "one post" 1 (List.length summaries); 39 + match summaries with 40 + | [ s ] -> 41 + Alcotest.(check bool) "kind post" true (s.kind = `Post); 42 + let n = String.length s.url in 43 + Alcotest.(check bool) 44 + "url ends in 99999" true 45 + (n > 5 && String.sub s.url (n - 5) 5 = "99999") 46 + | _ -> Alcotest.fail "expected exactly one summary" 47 + 48 + (* {1 JSON codecs — summary} *) 49 + 50 + let test_summary_roundtrip () = 51 + let s : Item.summary = 52 + { url = pulse_url; kind = `Article; title = Some "hi"; published = None } 53 + in 54 + let json_str = Json.to_string Item.summary_json s in 55 + match Json.of_string Item.summary_json json_str with 56 + | Error e -> Alcotest.failf "decode failed: %s" (Json.Error.to_string e) 57 + | Ok s' -> 58 + Alcotest.(check string) "url" s.url s'.url; 59 + Alcotest.(check (option string)) "title" s.title s'.title; 60 + Alcotest.(check bool) "kind article" true (s'.kind = `Article) 61 + 62 + let test_summary_post_kind () = 63 + let s : Item.summary = 64 + { url = activity_url; kind = `Post; title = None; published = None } 65 + in 66 + let json_str = Json.to_string Item.summary_json s in 67 + let has_post = 68 + let re = Re.compile (Re.str "\"Post\"") in 69 + Re.execp re json_str 70 + in 71 + Alcotest.(check bool) "encoded kind as Post" true has_post 72 + 73 + (* {1 JSON codecs — full item} *) 74 + 75 + let item_json = 76 + {|{"url":"https://www.linkedin.com/pulse/foo-author-abcde/","kind":"Article","title":"Hello","author":"Bob","published":"2026-04-24T10:00:00Z","markdown":"# Hello\n\nparagraph"}|} 77 + 78 + let test_item_decode () = 79 + match Json.of_string Item.json item_json with 80 + | Error e -> Alcotest.failf "decode failed: %s" (Json.Error.to_string e) 81 + | Ok t -> 82 + Alcotest.(check string) "url" pulse_url (Item.url t); 83 + Alcotest.(check (option string)) "title" (Some "Hello") (Item.title t); 84 + Alcotest.(check string) "author" "Bob" (Item.author t); 85 + Alcotest.(check bool) "kind article" true (Item.kind t = `Article); 86 + let (y, m, d), _ = Ptime.to_date_time (Item.published t) in 87 + Alcotest.(check (triple int int int)) "date" (2026, 4, 24) (y, m, d); 88 + Alcotest.(check bool) 89 + "markdown contains heading" true 90 + (let n = String.length (Item.markdown t) in 91 + n > 0 && String.sub (Item.markdown t) 0 7 = "# Hello") 92 + 93 + let test_item_roundtrip () = 94 + match Json.of_string Item.json item_json with 95 + | Error e -> Alcotest.failf "decode failed: %s" (Json.Error.to_string e) 96 + | Ok t1 -> ( 97 + let s1 = Json.to_string Item.json t1 in 98 + match Json.of_string Item.json s1 with 99 + | Error e -> 100 + Alcotest.failf "re-decode failed: %s" (Json.Error.to_string e) 101 + | Ok t2 -> 102 + Alcotest.(check string) "url stable" (Item.url t1) (Item.url t2); 103 + Alcotest.(check string) 104 + "markdown stable" (Item.markdown t1) (Item.markdown t2); 105 + Alcotest.(check bool) 106 + "published stable" true 107 + (Ptime.equal (Item.published t1) (Item.published t2))) 108 + 109 + let suite = 110 + ( "item", 111 + [ 112 + Alcotest.test_case "summaries mixed feed" `Quick test_mixed_feed; 113 + Alcotest.test_case "summaries dedup" `Quick test_dedup; 114 + Alcotest.test_case "summaries empty" `Quick test_empty; 115 + Alcotest.test_case "summaries urn only" `Quick test_activity_urn_only; 116 + Alcotest.test_case "summary json roundtrip" `Quick test_summary_roundtrip; 117 + Alcotest.test_case "summary json post kind" `Quick test_summary_post_kind; 118 + Alcotest.test_case "item json decode" `Quick test_item_decode; 119 + Alcotest.test_case "item json roundtrip" `Quick test_item_roundtrip; 120 + ] )
+2
test/test_item.mli
··· 1 + val suite : string * unit Alcotest.test_case list 2 + (** Alcotest suite for the {!Linkedin.Item} module. *)
+44
test/test_linkedin_url.ml
··· 60 60 "https://www.linkedin.com/posts/johndoe_title-activity-7123456789-abcd?utm=x" 61 61 (Post "urn:li:activity:7123456789") 62 62 63 + (* Pulse URLs *) 64 + 65 + let test_pulse_basic () = 66 + parse_ok 67 + "https://www.linkedin.com/pulse/how-much-value-we-leaving-orbit-parsimoni-rjugf/" 68 + (Pulse "how-much-value-we-leaving-orbit-parsimoni-rjugf") 69 + 70 + let test_pulse_no_trailing_slash () = 71 + parse_ok 72 + "https://www.linkedin.com/pulse/how-much-value-we-leaving-orbit-parsimoni-rjugf" 73 + (Pulse "how-much-value-we-leaving-orbit-parsimoni-rjugf") 74 + 75 + let test_pulse_query () = 76 + parse_ok 77 + "https://www.linkedin.com/pulse/how-much-value-we-leaving-orbit-parsimoni-rjugf/?utm_source=share" 78 + (Pulse "how-much-value-we-leaving-orbit-parsimoni-rjugf") 79 + 80 + (* pulse_of_string *) 81 + 82 + let test_pulse_parse_bare_slug () = 83 + Alcotest.(check (result string string)) 84 + "bare slug" (Ok "how-much-value-we-leaving-orbit-parsimoni-rjugf") 85 + (pulse_of_string "how-much-value-we-leaving-orbit-parsimoni-rjugf") 86 + 87 + let test_pulse_of_string_url () = 88 + Alcotest.(check (result string string)) 89 + "url" (Ok "how-much-value-we-leaving-orbit-parsimoni-rjugf") 90 + (pulse_of_string 91 + "https://www.linkedin.com/pulse/how-much-value-we-leaving-orbit-parsimoni-rjugf/") 92 + 93 + let test_pulse_of_profile_url () = 94 + match pulse_of_string "https://www.linkedin.com/in/johndoe" with 95 + | Error _ -> () 96 + | Ok v -> Alcotest.failf "expected Error, got Ok: %s" v 97 + 63 98 (* Invalid URLs *) 64 99 65 100 let test_invalid_host () = parse_err "https://example.com/in/johndoe" ··· 149 184 Alcotest.test_case "post_of_string url" `Quick test_post_of_string_url; 150 185 Alcotest.test_case "post_of_string profile url" `Quick 151 186 test_post_parse_profile_url; 187 + Alcotest.test_case "pulse basic" `Quick test_pulse_basic; 188 + Alcotest.test_case "pulse no trailing slash" `Quick 189 + test_pulse_no_trailing_slash; 190 + Alcotest.test_case "pulse query" `Quick test_pulse_query; 191 + Alcotest.test_case "pulse_of_string bare slug" `Quick 192 + test_pulse_parse_bare_slug; 193 + Alcotest.test_case "pulse_of_string url" `Quick test_pulse_of_string_url; 194 + Alcotest.test_case "pulse_of_string profile url" `Quick 195 + test_pulse_of_profile_url; 152 196 Alcotest.test_case "pp" `Quick test_pp; 153 197 ] )
+27
test/test_post.ml
··· 40 40 Alcotest.(check string) "urn" "urn:li:activity:456" (Linkedin.Post.urn p); 41 41 Alcotest.(check string) "text" "test" (Linkedin.Post.text p) 42 42 43 + (* LinkedIn returns [commentary] as an object when the body has 44 + mentions, links, or other rich formatting. We still want just the 45 + text. *) 46 + let test_commentary_object () = 47 + let json = 48 + {|{"urn":"urn:li:activity:999","commentary":{"text":"Hello rich world","attributes":[{"start":0,"length":5}]},"authorName":"Carol"}|} 49 + in 50 + let p = decode_ok Linkedin.Post.json json in 51 + Alcotest.(check string) "urn" "urn:li:activity:999" (Linkedin.Post.urn p); 52 + Alcotest.(check string) 53 + "text extracted from object" "Hello rich world" (Linkedin.Post.text p); 54 + Alcotest.(check string) "author_name" "Carol" (Linkedin.Post.author_name p) 55 + 56 + (* Normalised Voyager responses nest one more level: [commentary.text] 57 + is itself an object whose [text] field holds the string. *) 58 + let test_commentary_nested_object () = 59 + let json = 60 + {|{"urn":"urn:li:activity:1000","commentary":{"text":{"text":"Deeply nested body","attributes":[]}}}|} 61 + in 62 + let p = decode_ok Linkedin.Post.json json in 63 + Alcotest.(check string) 64 + "text extracted from nested object" "Deeply nested body" 65 + (Linkedin.Post.text p) 66 + 43 67 let test_roundtrip () = 44 68 let p = 45 69 Linkedin.Post.v ~urn:"urn:li:activity:789" ~text:"Some post" ··· 110 134 Alcotest.test_case "decode" `Quick test_decode; 111 135 Alcotest.test_case "minimal" `Quick test_minimal; 112 136 Alcotest.test_case "unknown fields" `Quick test_unknown_fields; 137 + Alcotest.test_case "commentary object" `Quick test_commentary_object; 138 + Alcotest.test_case "commentary nested object" `Quick 139 + test_commentary_nested_object; 113 140 Alcotest.test_case "roundtrip" `Quick test_roundtrip; 114 141 Alcotest.test_case "constructor defaults" `Quick test_constructor_defaults; 115 142 Alcotest.test_case "feed decode" `Quick test_feed_decode;
+263
test/test_pulse.ml
··· 1 + open Linkedin 2 + 3 + let read_fixture name = 4 + let path = 5 + Filename.concat 6 + (Sys.getenv_opt "DUNE_SOURCEROOT" |> Option.value ~default:".") 7 + (Filename.concat "ocaml-linkedin/test/fixtures" name) 8 + in 9 + (* When running under `dune test`, the fixture directory is copied next to 10 + the test binary. Fall back to a relative path. *) 11 + let path = 12 + if Sys.file_exists path then path else Filename.concat "fixtures" name 13 + in 14 + let ic = open_in path in 15 + let n = in_channel_length ic in 16 + let s = really_input_string ic n in 17 + close_in ic; 18 + s 19 + 20 + let contains s sub = 21 + let n = String.length s in 22 + let k = String.length sub in 23 + if k = 0 then true 24 + else if k > n then false 25 + else 26 + let rec loop i = 27 + if i + k > n then false 28 + else if String.sub s i k = sub then true 29 + else loop (i + 1) 30 + in 31 + loop 0 32 + 33 + (* {1 Article parsing} *) 34 + 35 + let slug = "how-much-value-we-leaving-orbit-parsimoni-rjugf" 36 + 37 + let parse_fixture () = 38 + let html = read_fixture "pulse_article.html" in 39 + match Pulse.of_html ~slug html with 40 + | Ok a -> a 41 + | Error (`Parse_error msg) -> Alcotest.failf "parse failed: %s" msg 42 + 43 + let test_title () = 44 + let a = parse_fixture () in 45 + Alcotest.(check string) 46 + "title" "How Much Value Are We Leaving in Orbit?" (Pulse.title a) 47 + 48 + let test_subtitle () = 49 + let a = parse_fixture () in 50 + Alcotest.(check (option string)) 51 + "subtitle" (Some "A look at the economics of underused orbital assets.") 52 + (Pulse.subtitle a) 53 + 54 + let test_author () = 55 + let a = parse_fixture () in 56 + Alcotest.(check string) "author" "Parsimoni" (Pulse.author a) 57 + 58 + let test_author_id () = 59 + let a = parse_fixture () in 60 + Alcotest.(check (option string)) 61 + "author_id" (Some "parsimoni") (Pulse.author_id a) 62 + 63 + let test_published () = 64 + let a = parse_fixture () in 65 + let (y, m, d), _ = Ptime.to_date_time (Pulse.published a) in 66 + Alcotest.(check (triple int int int)) "published date" (2026, 4, 24) (y, m, d) 67 + 68 + let test_url () = 69 + let a = parse_fixture () in 70 + Alcotest.(check string) 71 + "url" 72 + "https://www.linkedin.com/pulse/how-much-value-we-leaving-orbit-parsimoni-rjugf/" 73 + (Pulse.url a) 74 + 75 + let test_slug () = 76 + let a = parse_fixture () in 77 + Alcotest.(check string) "slug" slug (Pulse.slug a) 78 + 79 + (* {1 Markdown conversion} *) 80 + 81 + let test_markdown_has_heading () = 82 + let a = parse_fixture () in 83 + let md = Pulse.markdown a in 84 + Alcotest.(check bool) 85 + "## heading preserved" true 86 + (contains md "## A different approach") 87 + 88 + let test_markdown_has_h3 () = 89 + let a = parse_fixture () in 90 + let md = Pulse.markdown a in 91 + Alcotest.(check bool) "### heading preserved" true (contains md "### Why now") 92 + 93 + let test_markdown_bold () = 94 + let a = parse_fixture () in 95 + let md = Pulse.markdown a in 96 + Alcotest.(check bool) 97 + "bold marker present" true 98 + (contains md "**economic value**") 99 + 100 + let test_markdown_italic () = 101 + let a = parse_fixture () in 102 + let md = Pulse.markdown a in 103 + Alcotest.(check bool) "italic marker" true (contains md "_software_") 104 + 105 + let test_markdown_link () = 106 + let a = parse_fixture () in 107 + let md = Pulse.markdown a in 108 + Alcotest.(check bool) 109 + "link rendered" true 110 + (contains md "[SpaceOS](https://parsimoni.co/spaceos)") 111 + 112 + let test_markdown_unordered_list () = 113 + let a = parse_fixture () in 114 + let md = Pulse.markdown a in 115 + Alcotest.(check bool) 116 + "bullet item" true 117 + (contains md "- Idle capacity during non-operational windows.") 118 + 119 + let test_markdown_ordered_list () = 120 + let a = parse_fixture () in 121 + let md = Pulse.markdown a in 122 + Alcotest.(check bool) 123 + "numbered item" true 124 + (contains md "1. Launch costs have collapsed.") 125 + 126 + let test_markdown_blockquote () = 127 + let a = parse_fixture () in 128 + let md = Pulse.markdown a in 129 + Alcotest.(check bool) 130 + "blockquote line" true 131 + (contains md "> The best satellite is the one already in orbit.") 132 + 133 + let test_markdown_code_block () = 134 + let a = parse_fixture () in 135 + let md = Pulse.markdown a in 136 + Alcotest.(check bool) "fenced code opens" true (contains md "```ocaml"); 137 + Alcotest.(check bool) "fenced code body" true (contains md "let orbit_value") 138 + 139 + let test_markdown_inline_code () = 140 + let a = parse_fixture () in 141 + let md = Pulse.markdown a in 142 + Alcotest.(check bool) "inline code" true (contains md "`orbit_value`") 143 + 144 + (* {1 Engagement metrics} *) 145 + 146 + let test_num_likes () = 147 + let a = parse_fixture () in 148 + Alcotest.(check int) "reactions" 142 (Pulse.num_likes a) 149 + 150 + let test_num_comments () = 151 + let a = parse_fixture () in 152 + Alcotest.(check int) "comments" 37 (Pulse.num_comments a) 153 + 154 + (* {1 Pretty-printer} *) 155 + 156 + let test_pp_non_empty () = 157 + let a = parse_fixture () in 158 + let s = Fmt.str "%a" Pulse.pp a in 159 + Alcotest.(check bool) "pp non-empty" true (String.length s > 50); 160 + Alcotest.(check bool) 161 + "pp mentions title" true 162 + (contains s "How Much Value Are We Leaving in Orbit?") 163 + 164 + (* {1 List parsing} *) 165 + 166 + let test_summaries () = 167 + let html = read_fixture "pulse_list.html" in 168 + let summaries = Pulse.summaries_of_html html in 169 + Alcotest.(check int) "two unique summaries" 2 (List.length summaries); 170 + let slugs = List.map (fun (s : Pulse.summary) -> s.slug) summaries in 171 + Alcotest.(check bool) 172 + "first article present" true 173 + (List.mem "how-much-value-we-leaving-orbit-parsimoni-rjugf" slugs); 174 + Alcotest.(check bool) 175 + "second article present" true 176 + (List.mem "ground-stations-are-the-new-bottleneck-parsimoni-q2a8p" slugs) 177 + 178 + let test_summaries_have_titles () = 179 + let html = read_fixture "pulse_list.html" in 180 + let summaries = Pulse.summaries_of_html html in 181 + List.iter 182 + (fun (s : Pulse.summary) -> 183 + Alcotest.(check bool) 184 + (Fmt.str "title non-empty for %s" s.slug) 185 + true 186 + (String.length s.title > 0)) 187 + summaries 188 + 189 + (* {1 Share-feed slug extraction} *) 190 + 191 + let test_slugs_of_feed_raw () = 192 + let body = 193 + {|{"actionTarget":"https://www.linkedin.com/pulse/first-article-author-abcde/","x":"https://www.linkedin.com/pulse/second-piece-author-zyx12/?utm=share"}|} 194 + in 195 + Alcotest.(check (list string)) 196 + "raw slugs" 197 + [ "first-article-author-abcde"; "second-piece-author-zyx12" ] 198 + (Pulse.slugs_of_feed body) 199 + 200 + let test_slugs_of_feed_escaped () = 201 + let body = 202 + {|{"navigationContext":{"actionTarget":"https:\/\/www.linkedin.com\/pulse\/escaped-slug-author-qqq/"}}|} 203 + in 204 + Alcotest.(check (list string)) 205 + "escaped slug" 206 + [ "escaped-slug-author-qqq" ] 207 + (Pulse.slugs_of_feed body) 208 + 209 + let test_slugs_of_feed_empty () = 210 + let body = {|{"data":{"other":"no articles here"}}|} in 211 + Alcotest.(check (list string)) "no slugs" [] (Pulse.slugs_of_feed body) 212 + 213 + let test_slugs_of_feed_duplicates () = 214 + let body = 215 + {|{"a":"linkedin.com/pulse/foo-bar-abc/","b":"linkedin.com/pulse/foo-bar-abc/?x=1"}|} 216 + in 217 + Alcotest.(check int) 218 + "both occurrences kept" 2 219 + (List.length (Pulse.slugs_of_feed body)) 220 + 221 + let test_summaries_canonical_url () = 222 + let html = read_fixture "pulse_list.html" in 223 + let summaries = Pulse.summaries_of_html html in 224 + List.iter 225 + (fun (s : Pulse.summary) -> 226 + let expected = Fmt.str "https://www.linkedin.com/pulse/%s/" s.slug in 227 + Alcotest.(check string) "canonical url" expected s.url) 228 + summaries 229 + 230 + let suite = 231 + ( "pulse", 232 + [ 233 + Alcotest.test_case "title" `Quick test_title; 234 + Alcotest.test_case "subtitle" `Quick test_subtitle; 235 + Alcotest.test_case "author" `Quick test_author; 236 + Alcotest.test_case "author_id" `Quick test_author_id; 237 + Alcotest.test_case "published" `Quick test_published; 238 + Alcotest.test_case "url" `Quick test_url; 239 + Alcotest.test_case "slug" `Quick test_slug; 240 + Alcotest.test_case "markdown h2" `Quick test_markdown_has_heading; 241 + Alcotest.test_case "markdown h3" `Quick test_markdown_has_h3; 242 + Alcotest.test_case "markdown bold" `Quick test_markdown_bold; 243 + Alcotest.test_case "markdown italic" `Quick test_markdown_italic; 244 + Alcotest.test_case "markdown link" `Quick test_markdown_link; 245 + Alcotest.test_case "markdown ul" `Quick test_markdown_unordered_list; 246 + Alcotest.test_case "markdown ol" `Quick test_markdown_ordered_list; 247 + Alcotest.test_case "markdown blockquote" `Quick test_markdown_blockquote; 248 + Alcotest.test_case "markdown code block" `Quick test_markdown_code_block; 249 + Alcotest.test_case "markdown inline code" `Quick test_markdown_inline_code; 250 + Alcotest.test_case "num_likes" `Quick test_num_likes; 251 + Alcotest.test_case "num_comments" `Quick test_num_comments; 252 + Alcotest.test_case "pp" `Quick test_pp_non_empty; 253 + Alcotest.test_case "summaries dedup" `Quick test_summaries; 254 + Alcotest.test_case "summary titles" `Quick test_summaries_have_titles; 255 + Alcotest.test_case "summary canonical url" `Quick 256 + test_summaries_canonical_url; 257 + Alcotest.test_case "slugs_of_feed raw" `Quick test_slugs_of_feed_raw; 258 + Alcotest.test_case "slugs_of_feed escaped" `Quick 259 + test_slugs_of_feed_escaped; 260 + Alcotest.test_case "slugs_of_feed empty" `Quick test_slugs_of_feed_empty; 261 + Alcotest.test_case "slugs_of_feed duplicates" `Quick 262 + test_slugs_of_feed_duplicates; 263 + ] )
+2
test/test_pulse.mli
··· 1 + val suite : string * unit Alcotest.test_case list 2 + (** Alcotest suite for the {!Linkedin.Pulse} module. *)