OCaml client for the LinkedIn Voyager API
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

linkedin: extract titles from feed via Voyager graph walker

Item.feed previously regex'd /pulse/ URLs and activity URNs out of the
raw share-feed body. That gave us a URL list but no title — for "feed
on a website" you really want to render an article-share line as
"<title> — <url>", not bare URLs.

Walk the JSON properly: for each UpdateV2 in [included] (or
[elements]), inspect [content.$type]. ArticleComponent shares surface
the article URL ([content.navigationContext.actionTarget], with
tracking params stripped) plus the article title
([content.title.text]). Everything else falls back to the
[/feed/update/<urn>] URL with no title.

The regex scan stays as a safety net for non-graph bodies (error
pages, fixtures used in tests) — switching from "always regex" to
"prefer graph, fall back to regex" leaves all existing tests passing
while populating titles for real feed responses.

+119 -10
+119 -10
lib/item.ml
··· 238 238 |> Object.opt_member "num_comments" int ~enc:(fun i -> Some i.num_comments) 239 239 |> Object.skip_unknown |> Object.seal 240 240 241 - (** {1 Feed} *) 241 + (** {1 Feed} 242 + 243 + The share-feed response is a graph just like the single-post response: 244 + [included] holds [UpdateV2] entries (in document order, which matches 245 + reverse-chronological feed order), and each one's [content.$type] 246 + decides whether the item is a Pulse article share (we surface the 247 + article URL and title) or a plain feed post (we synthesise the 248 + [/feed/update/&lt;urn&gt;] URL). *) 249 + 250 + let obj_members = function Json.Value.Object (ms, _) -> Some ms | _ -> None 251 + let as_string = function Json.Value.String (s, _) -> Some s | _ -> None 252 + let as_array = function Json.Value.Array (xs, _) -> Some xs | _ -> None 253 + 254 + let mem key ms = 255 + match Json.Value.member_key key ms with Some (_, v) -> Some v | None -> None 256 + 257 + let ( let* ) = Option.bind 258 + 259 + let nested_text path ms = 260 + let rec loop ms = function 261 + | [] -> None 262 + | [ key ] -> mem key ms |> Option.map as_string |> Option.join 263 + | key :: rest -> 264 + let* sub = mem key ms in 265 + let* sub_ms = obj_members sub in 266 + loop sub_ms rest 267 + in 268 + loop ms path 269 + 270 + let strip_query url = 271 + match String.index_opt url '?' with 272 + | Some i -> String.sub url 0 i 273 + | None -> url 274 + 275 + let post_url_of_activity_urn urn = 276 + Fmt.str "https://www.linkedin.com/feed/update/%s" urn 277 + 278 + let summary_of_updatev2 upd = 279 + let* upd_ms = obj_members upd in 280 + let activity_urn = 281 + let* meta = mem "updateMetadata" upd_ms in 282 + let* meta_ms = obj_members meta in 283 + nested_text [ "urn" ] meta_ms 284 + in 285 + let content_ms = 286 + let* c = mem "content" upd_ms in 287 + obj_members c 288 + in 289 + let content_type = 290 + Option.bind content_ms (fun ms -> nested_text [ "$type" ] ms) 291 + in 292 + match content_type with 293 + | Some "com.linkedin.voyager.feed.render.ArticleComponent" -> ( 294 + let* c_ms = content_ms in 295 + let url = 296 + nested_text [ "navigationContext"; "actionTarget" ] c_ms 297 + |> Option.map strip_query 298 + in 299 + let title = nested_text [ "title"; "text" ] c_ms in 300 + match url with 301 + | Some u -> Some { url = u; kind = `Article; title; published = None } 302 + | None -> None) 303 + | _ -> 304 + let* urn = activity_urn in 305 + Some 306 + { 307 + url = post_url_of_activity_urn urn; 308 + kind = `Post; 309 + title = None; 310 + published = None; 311 + } 242 312 243 - let summaries_of_feed body = 313 + let summaries_of_feed_value v = 314 + let updatev2 = "com.linkedin.voyager.feed.render.UpdateV2" in 244 315 let seen = Hashtbl.create 64 in 245 - Re.all re body 246 - |> List.filter_map summary_of_match 247 - |> List.filter (fun (s : summary) -> 248 - if Hashtbl.mem seen s.url then false 249 - else begin 250 - Hashtbl.add seen s.url (); 251 - true 252 - end) 316 + let result = ref [] in 317 + let consider e = 318 + match obj_members e with 319 + | None -> () 320 + | Some ms -> ( 321 + match nested_text [ "$type" ] ms with 322 + | Some t when t = updatev2 -> ( 323 + match summary_of_updatev2 e with 324 + | None -> () 325 + | Some s -> 326 + if not (Hashtbl.mem seen s.url) then begin 327 + Hashtbl.add seen s.url (); 328 + result := s :: !result 329 + end) 330 + | _ -> ()) 331 + in 332 + let walk_array v = 333 + match as_array v with None -> () | Some xs -> List.iter consider xs 334 + in 335 + (match obj_members v with 336 + | None -> () 337 + | Some root -> 338 + Option.iter walk_array (mem "included" root); 339 + Option.iter walk_array (mem "elements" root)); 340 + List.rev !result 341 + 342 + let summaries_of_feed body = 343 + let from_json = 344 + match Json.of_string Json.Codec.Value.t body with 345 + | Ok v -> summaries_of_feed_value v 346 + | Error _ -> [] 347 + in 348 + if from_json <> [] then from_json 349 + else 350 + (* Fall back to a regex scan when the response has no UpdateV2 graph 351 + to follow (e.g. an error page, or the synthetic JSON used in tests 352 + to exercise pure URL extraction). *) 353 + let seen = Hashtbl.create 64 in 354 + Re.all re body 355 + |> List.filter_map summary_of_match 356 + |> List.filter (fun (s : summary) -> 357 + if Hashtbl.mem seen s.url then false 358 + else begin 359 + Hashtbl.add seen s.url (); 360 + true 361 + end) 253 362 254 363 let feed ?(start = 0) ?(count = 100) ~profile_id api = 255 364 let path = feed_path ~profile_id ~start ~count in