A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 1620 lines 108 kB view raw
1<!DOCTYPE html> 2<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head> 3 4<meta charset="utf-8"> 5<meta name="generator" content="quarto-1.7.34"> 6 7<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"> 8 9<meta name="description" content="Publishing and discovering datasets on the AT Protocol network"> 10 11<title>Atmosphere (ATProto Integration) – atdata</title> 12<style> 13code{white-space: pre-wrap;} 14span.smallcaps{font-variant: small-caps;} 15div.columns{display: flex; gap: min(4vw, 1.5em);} 16div.column{flex: auto; overflow-x: auto;} 17div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} 18ul.task-list{list-style: none;} 19ul.task-list li input[type="checkbox"] { 20 width: 0.8em; 21 margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 22 vertical-align: middle; 23} 24/* CSS for syntax highlighting */ 25html { -webkit-text-size-adjust: 100%; } 26pre > code.sourceCode { white-space: pre; position: relative; } 27pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } 28pre > code.sourceCode > span:empty { height: 1.2em; } 29.sourceCode { overflow: visible; } 30code.sourceCode > span { color: inherit; text-decoration: inherit; } 31div.sourceCode { margin: 1em 0; } 32pre.sourceCode { margin: 0; } 33@media screen { 34div.sourceCode { overflow: auto; } 35} 36@media print { 37pre > code.sourceCode { white-space: pre-wrap; } 38pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } 39} 40pre.numberSource code 41 { counter-reset: source-line 0; } 42pre.numberSource code > span 43 { position: relative; left: -4em; counter-increment: source-line; } 44pre.numberSource code > span > a:first-child::before 45 { content: counter(source-line); 46 position: relative; left: -1em; text-align: right; vertical-align: baseline; 47 border: none; display: inline-block; 48 -webkit-touch-callout: none; -webkit-user-select: none; 49 -khtml-user-select: none; -moz-user-select: none; 50 -ms-user-select: none; user-select: none; 51 padding: 0 4px; width: 4em; 52 } 53pre.numberSource { margin-left: 3em; padding-left: 4px; } 54div.sourceCode 55 { } 56@media screen { 57pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } 58} 59</style> 60 61 62<script src="../site_libs/quarto-nav/quarto-nav.js"></script> 63<script src="../site_libs/quarto-nav/headroom.min.js"></script> 64<script src="../site_libs/clipboard/clipboard.min.js"></script> 65<script src="../site_libs/quarto-search/autocomplete.umd.js"></script> 66<script src="../site_libs/quarto-search/fuse.min.js"></script> 67<script src="../site_libs/quarto-search/quarto-search.js"></script> 68<meta name="quarto:offset" content="../"> 69<script src="../site_libs/quarto-html/quarto.js" type="module"></script> 70<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script> 71<script src="../site_libs/quarto-html/popper.min.js"></script> 72<script src="../site_libs/quarto-html/tippy.umd.min.js"></script> 73<script src="../site_libs/quarto-html/anchor.min.js"></script> 74<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet"> 75<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme" id="quarto-text-highlighting-styles"> 76<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8dcd8563ea6803ab7cbb3d71ca5772e1.css" rel="stylesheet" class="quarto-color-scheme quarto-color-alternate" id="quarto-text-highlighting-styles"> 77<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme-extra" id="quarto-text-highlighting-styles"> 78<script src="../site_libs/bootstrap/bootstrap.min.js"></script> 79<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet"> 80<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme" id="quarto-bootstrap" data-mode="light"> 81<link href="../site_libs/bootstrap/bootstrap-dark-7964ffd8887b0991fe8d71c6c8bc75d6.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme quarto-color-alternate" id="quarto-bootstrap" data-mode="dark"> 82<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme-extra" id="quarto-bootstrap" data-mode="light"> 83<script id="quarto-search-options" type="application/json">{ 84 "location": "navbar", 85 "copy-button": false, 86 "collapse-after": 3, 87 "panel-placement": "end", 88 "type": "overlay", 89 "limit": 50, 90 "keyboard-shortcut": [ 91 "f", 92 "/", 93 "s" 94 ], 95 "show-item-context": false, 96 "language": { 97 "search-no-results-text": "No results", 98 "search-matching-documents-text": "matching documents", 99 "search-copy-link-title": "Copy link to search", 100 "search-hide-matches-text": "Hide additional matches", 101 "search-more-match-text": "more match in this document", 102 "search-more-matches-text": "more matches in this document", 103 "search-clear-button-title": "Clear", 104 "search-text-placeholder": "", 105 "search-detached-cancel-button-title": "Cancel", 106 "search-submit-button-title": "Submit", 107 "search-label": "Search" 108 } 109}</script> 110 111 112<link rel="stylesheet" href="../assets/styles.css"> 113</head> 114 115<body class="nav-sidebar docked nav-fixed quarto-light"><script id="quarto-html-before-body" type="application/javascript"> 116 const toggleBodyColorMode = (bsSheetEl) => { 117 const mode = bsSheetEl.getAttribute("data-mode"); 118 const bodyEl = window.document.querySelector("body"); 119 if (mode === "dark") { 120 bodyEl.classList.add("quarto-dark"); 121 bodyEl.classList.remove("quarto-light"); 122 } else { 123 bodyEl.classList.add("quarto-light"); 124 bodyEl.classList.remove("quarto-dark"); 125 } 126 } 127 const toggleBodyColorPrimary = () => { 128 const bsSheetEl = window.document.querySelector("link#quarto-bootstrap:not([rel=disabled-stylesheet])"); 129 if (bsSheetEl) { 130 toggleBodyColorMode(bsSheetEl); 131 } 132 } 133 const setColorSchemeToggle = (alternate) => { 134 const toggles = window.document.querySelectorAll('.quarto-color-scheme-toggle'); 135 for (let i=0; i < toggles.length; i++) { 136 const toggle = toggles[i]; 137 if (toggle) { 138 if (alternate) { 139 toggle.classList.add("alternate"); 140 } else { 141 toggle.classList.remove("alternate"); 142 } 143 } 144 } 145 }; 146 const toggleColorMode = (alternate) => { 147 // Switch the stylesheets 148 const primaryStylesheets = window.document.querySelectorAll('link.quarto-color-scheme:not(.quarto-color-alternate)'); 149 const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate'); 150 manageTransitions('#quarto-margin-sidebar .nav-link', false); 151 if (alternate) { 152 // note: dark is layered on light, we don't disable primary! 153 enableStylesheet(alternateStylesheets); 154 for (const sheetNode of alternateStylesheets) { 155 if (sheetNode.id === "quarto-bootstrap") { 156 toggleBodyColorMode(sheetNode); 157 } 158 } 159 } else { 160 disableStylesheet(alternateStylesheets); 161 enableStylesheet(primaryStylesheets) 162 toggleBodyColorPrimary(); 163 } 164 manageTransitions('#quarto-margin-sidebar .nav-link', true); 165 // Switch the toggles 166 setColorSchemeToggle(alternate) 167 // Hack to workaround the fact that safari doesn't 168 // properly recolor the scrollbar when toggling (#1455) 169 if (navigator.userAgent.indexOf('Safari') > 0 && navigator.userAgent.indexOf('Chrome') == -1) { 170 manageTransitions("body", false); 171 window.scrollTo(0, 1); 172 setTimeout(() => { 173 window.scrollTo(0, 0); 174 manageTransitions("body", true); 175 }, 40); 176 } 177 } 178 const disableStylesheet = (stylesheets) => { 179 for (let i=0; i < stylesheets.length; i++) { 180 const stylesheet = stylesheets[i]; 181 stylesheet.rel = 'disabled-stylesheet'; 182 } 183 } 184 const enableStylesheet = (stylesheets) => { 185 for (let i=0; i < stylesheets.length; i++) { 186 const stylesheet = stylesheets[i]; 187 if(stylesheet.rel !== 'stylesheet') { // for Chrome, which will still FOUC without this check 188 stylesheet.rel = 'stylesheet'; 189 } 190 } 191 } 192 const manageTransitions = (selector, allowTransitions) => { 193 const els = window.document.querySelectorAll(selector); 194 for (let i=0; i < els.length; i++) { 195 const el = els[i]; 196 if (allowTransitions) { 197 el.classList.remove('notransition'); 198 } else { 199 el.classList.add('notransition'); 200 } 201 } 202 } 203 const isFileUrl = () => { 204 return window.location.protocol === 'file:'; 205 } 206 const hasAlternateSentinel = () => { 207 let styleSentinel = getColorSchemeSentinel(); 208 if (styleSentinel !== null) { 209 return styleSentinel === "alternate"; 210 } else { 211 return false; 212 } 213 } 214 const setStyleSentinel = (alternate) => { 215 const value = alternate ? "alternate" : "default"; 216 if (!isFileUrl()) { 217 window.localStorage.setItem("quarto-color-scheme", value); 218 } else { 219 localAlternateSentinel = value; 220 } 221 } 222 const getColorSchemeSentinel = () => { 223 if (!isFileUrl()) { 224 const storageValue = window.localStorage.getItem("quarto-color-scheme"); 225 return storageValue != null ? storageValue : localAlternateSentinel; 226 } else { 227 return localAlternateSentinel; 228 } 229 } 230 const toggleGiscusIfUsed = (isAlternate, darkModeDefault) => { 231 const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light'; 232 const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark'; 233 let newTheme = ''; 234 if(authorPrefersDark) { 235 newTheme = isAlternate ? baseTheme : alternateTheme; 236 } else { 237 newTheme = isAlternate ? alternateTheme : baseTheme; 238 } 239 const changeGiscusTheme = () => { 240 // From: https://github.com/giscus/giscus/issues/336 241 const sendMessage = (message) => { 242 const iframe = document.querySelector('iframe.giscus-frame'); 243 if (!iframe) return; 244 iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app'); 245 } 246 sendMessage({ 247 setConfig: { 248 theme: newTheme 249 } 250 }); 251 } 252 const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null; 253 if (isGiscussLoaded) { 254 changeGiscusTheme(); 255 } 256 }; 257 const authorPrefersDark = false; 258 const darkModeDefault = authorPrefersDark; 259 document.querySelector('link#quarto-text-highlighting-styles.quarto-color-scheme-extra').rel = 'disabled-stylesheet'; 260 document.querySelector('link#quarto-bootstrap.quarto-color-scheme-extra').rel = 'disabled-stylesheet'; 261 let localAlternateSentinel = darkModeDefault ? 'alternate' : 'default'; 262 // Dark / light mode switch 263 window.quartoToggleColorScheme = () => { 264 // Read the current dark / light value 265 let toAlternate = !hasAlternateSentinel(); 266 toggleColorMode(toAlternate); 267 setStyleSentinel(toAlternate); 268 toggleGiscusIfUsed(toAlternate, darkModeDefault); 269 window.dispatchEvent(new Event('resize')); 270 }; 271 // Switch to dark mode if need be 272 if (hasAlternateSentinel()) { 273 toggleColorMode(true); 274 } else { 275 toggleColorMode(false); 276 } 277 </script> 278 279<div id="quarto-search-results"></div> 280 <header id="quarto-header" class="headroom fixed-top"> 281 <nav class="navbar navbar-expand-lg " data-bs-theme="dark"> 282 <div class="navbar-container container-fluid"> 283 <div class="navbar-brand-container mx-auto"> 284 <a class="navbar-brand" href="../index.html"> 285 <span class="navbar-title">atdata</span> 286 </a> 287 </div> 288 <div id="quarto-search" class="" title="Search"></div> 289 <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" role="menu" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> 290 <span class="navbar-toggler-icon"></span> 291</button> 292 <div class="collapse navbar-collapse" id="navbarCollapse"> 293 <ul class="navbar-nav navbar-nav-scroll me-auto"> 294 <li class="nav-item"> 295 <a class="nav-link active" href="../index.html" aria-current="page"> 296<span class="menu-text">Guide</span></a> 297 </li> 298 <li class="nav-item dropdown "> 299 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-tutorials" role="link" data-bs-toggle="dropdown" aria-expanded="false"> 300 <span class="menu-text">Tutorials</span> 301 </a> 302 <ul class="dropdown-menu" aria-labelledby="nav-menu-tutorials"> 303 <li> 304 <a class="dropdown-item" href="../tutorials/quickstart.html"> 305 <span class="dropdown-text">Quick Start</span></a> 306 </li> 307 <li> 308 <a class="dropdown-item" href="../tutorials/local-workflow.html"> 309 <span class="dropdown-text">Local Workflow</span></a> 310 </li> 311 <li> 312 <a class="dropdown-item" href="../tutorials/atmosphere.html"> 313 <span class="dropdown-text">Atmosphere Publishing</span></a> 314 </li> 315 <li> 316 <a class="dropdown-item" href="../tutorials/promotion.html"> 317 <span class="dropdown-text">Promotion Workflow</span></a> 318 </li> 319 </ul> 320 </li> 321 <li class="nav-item dropdown "> 322 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-reference" role="link" data-bs-toggle="dropdown" aria-expanded="false"> 323 <span class="menu-text">Reference</span> 324 </a> 325 <ul class="dropdown-menu" aria-labelledby="nav-menu-reference"> 326 <li> 327 <a class="dropdown-item" href="../reference/architecture.html"> 328 <span class="dropdown-text">Architecture Overview</span></a> 329 </li> 330 <li> 331 <a class="dropdown-item" href="../reference/packable-samples.html"> 332 <span class="dropdown-text">Packable Samples</span></a> 333 </li> 334 <li> 335 <a class="dropdown-item" href="../reference/datasets.html"> 336 <span class="dropdown-text">Datasets</span></a> 337 </li> 338 <li> 339 <a class="dropdown-item" href="../reference/lenses.html"> 340 <span class="dropdown-text">Lenses</span></a> 341 </li> 342 <li> 343 <a class="dropdown-item" href="../reference/local-storage.html"> 344 <span class="dropdown-text">Local Storage</span></a> 345 </li> 346 <li> 347 <a class="dropdown-item" href="../reference/atmosphere.html"> 348 <span class="dropdown-text">Atmosphere</span></a> 349 </li> 350 <li> 351 <a class="dropdown-item" href="../reference/promotion.html"> 352 <span class="dropdown-text">Promotion</span></a> 353 </li> 354 <li> 355 <a class="dropdown-item" href="../reference/load-dataset.html"> 356 <span class="dropdown-text">load_dataset API</span></a> 357 </li> 358 <li> 359 <a class="dropdown-item" href="../reference/protocols.html"> 360 <span class="dropdown-text">Protocols</span></a> 361 </li> 362 <li> 363 <a class="dropdown-item" href="../reference/uri-spec.html"> 364 <span class="dropdown-text">URI Specification</span></a> 365 </li> 366 <li> 367 <a class="dropdown-item" href="../reference/troubleshooting.html"> 368 <span class="dropdown-text">Troubleshooting &amp; FAQ</span></a> 369 </li> 370 <li> 371 <a class="dropdown-item" href="../reference/deployment.html"> 372 <span class="dropdown-text">Deployment Guide</span></a> 373 </li> 374 </ul> 375 </li> 376 <li class="nav-item"> 377 <a class="nav-link" href="../api/index.html"> 378<span class="menu-text">API</span></a> 379 </li> 380</ul> 381 <ul class="navbar-nav navbar-nav-scroll ms-auto"> 382 <li class="nav-item compact"> 383 <a class="nav-link" href="https://github.com/your-org/atdata"> <i class="bi bi-github" role="img"> 384</i> 385<span class="menu-text"></span></a> 386 </li> 387</ul> 388 </div> <!-- /navcollapse --> 389 <div class="quarto-navbar-tools"> 390 <a href="" class="quarto-color-scheme-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleColorScheme(); return false;" title="Toggle dark mode"><i class="bi"></i></a> 391</div> 392 </div> <!-- /container-fluid --> 393 </nav> 394 <nav class="quarto-secondary-nav"> 395 <div class="container-fluid d-flex"> 396 <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> 397 <i class="bi bi-layout-text-sidebar-reverse"></i> 398 </button> 399 <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../reference/architecture.html">Reference</a></li><li class="breadcrumb-item"><a href="../reference/atmosphere.html">Atmosphere (ATProto Integration)</a></li></ol></nav> 400 <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> 401 </a> 402 </div> 403 </nav> 404</header> 405<!-- content --> 406<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar"> 407<!-- sidebar --> 408 <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto"> 409 <div class="sidebar-menu-container"> 410 <ul class="list-unstyled mt-1"> 411 <li class="sidebar-item"> 412 <div class="sidebar-item-container"> 413 <a href="../index.html" class="sidebar-item-text sidebar-link"> 414 <span class="menu-text">atdata</span></a> 415 </div> 416</li> 417 <li class="sidebar-item sidebar-item-section"> 418 <div class="sidebar-item-container"> 419 <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true"> 420 <span class="menu-text">Getting Started</span></a> 421 <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section"> 422 <i class="bi bi-chevron-right ms-2"></i> 423 </a> 424 </div> 425 <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show"> 426 <li class="sidebar-item"> 427 <div class="sidebar-item-container"> 428 <a href="../tutorials/quickstart.html" class="sidebar-item-text sidebar-link"> 429 <span class="menu-text">Quick Start</span></a> 430 </div> 431</li> 432 <li class="sidebar-item"> 433 <div class="sidebar-item-container"> 434 <a href="../tutorials/local-workflow.html" class="sidebar-item-text sidebar-link"> 435 <span class="menu-text">Local Workflow</span></a> 436 </div> 437</li> 438 <li class="sidebar-item"> 439 <div class="sidebar-item-container"> 440 <a href="../tutorials/atmosphere.html" class="sidebar-item-text sidebar-link"> 441 <span class="menu-text">Atmosphere Publishing</span></a> 442 </div> 443</li> 444 <li class="sidebar-item"> 445 <div class="sidebar-item-container"> 446 <a href="../tutorials/promotion.html" class="sidebar-item-text sidebar-link"> 447 <span class="menu-text">Promotion Workflow</span></a> 448 </div> 449</li> 450 </ul> 451 </li> 452 <li class="sidebar-item sidebar-item-section"> 453 <div class="sidebar-item-container"> 454 <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true"> 455 <span class="menu-text">Reference</span></a> 456 <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section"> 457 <i class="bi bi-chevron-right ms-2"></i> 458 </a> 459 </div> 460 <ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show"> 461 <li class="sidebar-item"> 462 <div class="sidebar-item-container"> 463 <a href="../reference/architecture.html" class="sidebar-item-text sidebar-link"> 464 <span class="menu-text">Architecture Overview</span></a> 465 </div> 466</li> 467 <li class="sidebar-item"> 468 <div class="sidebar-item-container"> 469 <a href="../reference/packable-samples.html" class="sidebar-item-text sidebar-link"> 470 <span class="menu-text">Packable Samples</span></a> 471 </div> 472</li> 473 <li class="sidebar-item"> 474 <div class="sidebar-item-container"> 475 <a href="../reference/datasets.html" class="sidebar-item-text sidebar-link"> 476 <span class="menu-text">Datasets</span></a> 477 </div> 478</li> 479 <li class="sidebar-item"> 480 <div class="sidebar-item-container"> 481 <a href="../reference/lenses.html" class="sidebar-item-text sidebar-link"> 482 <span class="menu-text">Lenses</span></a> 483 </div> 484</li> 485 <li class="sidebar-item"> 486 <div class="sidebar-item-container"> 487 <a href="../reference/local-storage.html" class="sidebar-item-text sidebar-link"> 488 <span class="menu-text">Local Storage</span></a> 489 </div> 490</li> 491 <li class="sidebar-item"> 492 <div class="sidebar-item-container"> 493 <a href="../reference/atmosphere.html" class="sidebar-item-text sidebar-link active"> 494 <span class="menu-text">Atmosphere (ATProto Integration)</span></a> 495 </div> 496</li> 497 <li class="sidebar-item"> 498 <div class="sidebar-item-container"> 499 <a href="../reference/promotion.html" class="sidebar-item-text sidebar-link"> 500 <span class="menu-text">Promotion Workflow</span></a> 501 </div> 502</li> 503 <li class="sidebar-item"> 504 <div class="sidebar-item-container"> 505 <a href="../reference/load-dataset.html" class="sidebar-item-text sidebar-link"> 506 <span class="menu-text">load_dataset API</span></a> 507 </div> 508</li> 509 <li class="sidebar-item"> 510 <div class="sidebar-item-container"> 511 <a href="../reference/protocols.html" class="sidebar-item-text sidebar-link"> 512 <span class="menu-text">Protocols</span></a> 513 </div> 514</li> 515 <li class="sidebar-item"> 516 <div class="sidebar-item-container"> 517 <a href="../reference/uri-spec.html" class="sidebar-item-text sidebar-link"> 518 <span class="menu-text">URI Specification</span></a> 519 </div> 520</li> 521 <li class="sidebar-item"> 522 <div class="sidebar-item-container"> 523 <a href="../reference/troubleshooting.html" class="sidebar-item-text sidebar-link"> 524 <span class="menu-text">Troubleshooting &amp; FAQ</span></a> 525 </div> 526</li> 527 <li class="sidebar-item"> 528 <div class="sidebar-item-container"> 529 <a href="../reference/deployment.html" class="sidebar-item-text sidebar-link"> 530 <span class="menu-text">Deployment Guide</span></a> 531 </div> 532</li> 533 </ul> 534 </li> 535 </ul> 536 </div> 537</nav> 538<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div> 539<!-- margin-sidebar --> 540 <div id="quarto-margin-sidebar" class="sidebar margin-sidebar"> 541 <nav id="TOC" role="doc-toc" class="toc-active"> 542 <h2 id="toc-title">On this page</h2> 543 544 <ul> 545 <li><a href="#installation" id="toc-installation" class="nav-link active" data-scroll-target="#installation">Installation</a></li> 546 <li><a href="#overview" id="toc-overview" class="nav-link" data-scroll-target="#overview">Overview</a></li> 547 <li><a href="#atmosphereclient" id="toc-atmosphereclient" class="nav-link" data-scroll-target="#atmosphereclient">AtmosphereClient</a> 548 <ul class="collapse"> 549 <li><a href="#session-management" id="toc-session-management" class="nav-link" data-scroll-target="#session-management">Session Management</a></li> 550 <li><a href="#custom-pds" id="toc-custom-pds" class="nav-link" data-scroll-target="#custom-pds">Custom PDS</a></li> 551 </ul></li> 552 <li><a href="#pdsblobstore" id="toc-pdsblobstore" class="nav-link" data-scroll-target="#pdsblobstore">PDSBlobStore</a> 553 <ul class="collapse"> 554 <li><a href="#size-limits" id="toc-size-limits" class="nav-link" data-scroll-target="#size-limits">Size Limits</a></li> 555 </ul></li> 556 <li><a href="#blobsource" id="toc-blobsource" class="nav-link" data-scroll-target="#blobsource">BlobSource</a></li> 557 <li><a href="#atmosphereindex" id="toc-atmosphereindex" class="nav-link" data-scroll-target="#atmosphereindex">AtmosphereIndex</a> 558 <ul class="collapse"> 559 <li><a href="#publishing-schemas" id="toc-publishing-schemas" class="nav-link" data-scroll-target="#publishing-schemas">Publishing Schemas</a></li> 560 <li><a href="#publishing-datasets" id="toc-publishing-datasets" class="nav-link" data-scroll-target="#publishing-datasets">Publishing Datasets</a></li> 561 <li><a href="#listing-and-retrieving" id="toc-listing-and-retrieving" class="nav-link" data-scroll-target="#listing-and-retrieving">Listing and Retrieving</a></li> 562 </ul></li> 563 <li><a href="#lower-level-publishers" id="toc-lower-level-publishers" class="nav-link" data-scroll-target="#lower-level-publishers">Lower-Level Publishers</a> 564 <ul class="collapse"> 565 <li><a href="#schemapublisher" id="toc-schemapublisher" class="nav-link" data-scroll-target="#schemapublisher">SchemaPublisher</a></li> 566 <li><a href="#datasetpublisher" id="toc-datasetpublisher" class="nav-link" data-scroll-target="#datasetpublisher">DatasetPublisher</a></li> 567 <li><a href="#lenspublisher" id="toc-lenspublisher" class="nav-link" data-scroll-target="#lenspublisher">LensPublisher</a></li> 568 </ul></li> 569 <li><a href="#lower-level-loaders" id="toc-lower-level-loaders" class="nav-link" data-scroll-target="#lower-level-loaders">Lower-Level Loaders</a> 570 <ul class="collapse"> 571 <li><a href="#schemaloader" id="toc-schemaloader" class="nav-link" data-scroll-target="#schemaloader">SchemaLoader</a></li> 572 <li><a href="#datasetloader" id="toc-datasetloader" class="nav-link" data-scroll-target="#datasetloader">DatasetLoader</a></li> 573 <li><a href="#lensloader" id="toc-lensloader" class="nav-link" data-scroll-target="#lensloader">LensLoader</a></li> 574 </ul></li> 575 <li><a href="#at-uris" id="toc-at-uris" class="nav-link" data-scroll-target="#at-uris">AT URIs</a></li> 576 <li><a href="#supported-field-types" id="toc-supported-field-types" class="nav-link" data-scroll-target="#supported-field-types">Supported Field Types</a></li> 577 <li><a href="#complete-example" id="toc-complete-example" class="nav-link" data-scroll-target="#complete-example">Complete Example</a></li> 578 <li><a href="#related" id="toc-related" class="nav-link" data-scroll-target="#related">Related</a></li> 579 </ul> 580<div class="toc-actions"><ul><li><a href="https://github.com/your-org/atdata/edit/main/reference/atmosphere.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav> 581 </div> 582<!-- main --> 583<main class="content" id="quarto-document-content"> 584 585 586<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../reference/architecture.html">Reference</a></li><li class="breadcrumb-item"><a href="../reference/atmosphere.html">Atmosphere (ATProto Integration)</a></li></ol></nav> 587<div class="quarto-title"> 588<h1 class="title">Atmosphere (ATProto Integration)</h1> 589</div> 590 591<div> 592 <div class="description"> 593 Publishing and discovering datasets on the AT Protocol network 594 </div> 595</div> 596 597 598<div class="quarto-title-meta"> 599 600 601 602 603 </div> 604 605 606 607</header> 608 609 610<p>The atmosphere module enables publishing and discovering datasets on the ATProto network, creating a federated ecosystem for typed datasets.</p> 611<section id="installation" class="level2"> 612<h2 class="anchored" data-anchor-id="installation">Installation</h2> 613<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install atdata<span class="pp">[</span><span class="ss">atmosphere</span><span class="pp">]</span></span> 614<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="co"># or</span></span> 615<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install atproto</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 616</section> 617<section id="overview" class="level2"> 618<h2 class="anchored" data-anchor-id="overview">Overview</h2> 619<p>ATProto integration publishes datasets, schemas, and lenses as records in the <code>ac.foundation.dataset.*</code> namespace. This enables:</p> 620<ul> 621<li><strong>Discovery</strong> through the ATProto network</li> 622<li><strong>Federation</strong> across different hosts</li> 623<li><strong>Verifiability</strong> through content-addressable records</li> 624</ul> 625</section> 626<section id="atmosphereclient" class="level2"> 627<h2 class="anchored" data-anchor-id="atmosphereclient">AtmosphereClient</h2> 628<p>The client handles authentication and record operations:</p> 629<div id="a55b5480" class="cell"> 630<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient</span> 631<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a></span> 632<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span> 633<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a></span> 634<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Login with app-specific password (not your main password!)</span></span> 635<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"alice.bsky.social"</span>, <span class="st">"app-password"</span>)</span> 636<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a></span> 637<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(client.did) <span class="co"># 'did:plc:...'</span></span> 638<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(client.handle) <span class="co"># 'alice.bsky.social'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 639</div> 640<div class="callout callout-style-default callout-warning callout-titled"> 641<div class="callout-header d-flex align-content-center"> 642<div class="callout-icon-container"> 643<i class="callout-icon"></i> 644</div> 645<div class="callout-title-container flex-fill"> 646Warning 647</div> 648</div> 649<div class="callout-body-container callout-body"> 650<p>Always use an app-specific password, not your main Bluesky password. Create app passwords at <a href="https://bsky.app/settings/app-passwords">bsky.app/settings/app-passwords</a>.</p> 651</div> 652</div> 653<section id="session-management" class="level3"> 654<h3 class="anchored" data-anchor-id="session-management">Session Management</h3> 655<p>Save and restore sessions to avoid re-authentication:</p> 656<div id="546ad519" class="cell"> 657<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Export session for later</span></span> 658<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>session_string <span class="op">=</span> client.export_session()</span> 659<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span> 660<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Later: restore session</span></span> 661<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>new_client <span class="op">=</span> AtmosphereClient()</span> 662<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>new_client.login_with_session(session_string)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 663</div> 664</section> 665<section id="custom-pds" class="level3"> 666<h3 class="anchored" data-anchor-id="custom-pds">Custom PDS</h3> 667<p>Connect to a custom PDS instead of bsky.social:</p> 668<div id="02895206" class="cell"> 669<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient(base_url<span class="op">=</span><span class="st">"https://pds.example.com"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 670</div> 671</section> 672</section> 673<section id="pdsblobstore" class="level2"> 674<h2 class="anchored" data-anchor-id="pdsblobstore">PDSBlobStore</h2> 675<p>Store dataset shards as ATProto blobs for fully decentralized storage:</p> 676<div id="ddcafefe" class="cell"> 677<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient, PDSBlobStore</span> 678<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span> 679<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span> 680<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"handle.bsky.social"</span>, <span class="st">"app-password"</span>)</span> 681<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a></span> 682<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> PDSBlobStore(client)</span> 683<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a></span> 684<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Write shards as blobs</span></span> 685<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a>urls <span class="op">=</span> store.write_shards(dataset, prefix<span class="op">=</span><span class="st">"my-data/v1"</span>)</span> 686<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Returns: ['at://did:plc:.../blob/bafyrei...', ...]</span></span> 687<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a></span> 688<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Transform AT URIs to HTTP URLs for reading</span></span> 689<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a>http_url <span class="op">=</span> store.read_url(urls[<span class="dv">0</span>])</span> 690<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Returns: 'https://pds.example.com/xrpc/com.atproto.sync.getBlob?...'</span></span> 691<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a></span> 692<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a BlobSource for streaming</span></span> 693<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> store.create_source(urls)</span> 694<span id="cb5-18"><a href="#cb5-18" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> atdata.Dataset[MySample](source)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 695</div> 696<section id="size-limits" class="level3"> 697<h3 class="anchored" data-anchor-id="size-limits">Size Limits</h3> 698<p>PDS blobs typically have size limits (often 50MB-5GB depending on the PDS). Use <code>maxcount</code> and <code>maxsize</code> parameters to control shard sizes:</p> 699<div id="8d86e8e3" class="cell"> 700<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>urls <span class="op">=</span> store.write_shards(</span> 701<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a> dataset,</span> 702<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> prefix<span class="op">=</span><span class="st">"large-data/v1"</span>,</span> 703<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> maxcount<span class="op">=</span><span class="dv">5000</span>, <span class="co"># Max 5000 samples per shard</span></span> 704<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> maxsize<span class="op">=</span><span class="fl">50e6</span>, <span class="co"># Max 50MB per shard</span></span> 705<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 706</div> 707</section> 708</section> 709<section id="blobsource" class="level2"> 710<h2 class="anchored" data-anchor-id="blobsource">BlobSource</h2> 711<p>Read datasets stored as PDS blobs:</p> 712<div id="bc781aa8" class="cell"> 713<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata <span class="im">import</span> BlobSource</span> 714<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a></span> 715<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="co"># From blob references</span></span> 716<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> BlobSource.from_refs([</span> 717<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a> {<span class="st">"did"</span>: <span class="st">"did:plc:abc123"</span>, <span class="st">"cid"</span>: <span class="st">"bafyrei111"</span>},</span> 718<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a> {<span class="st">"did"</span>: <span class="st">"did:plc:abc123"</span>, <span class="st">"cid"</span>: <span class="st">"bafyrei222"</span>},</span> 719<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>])</span> 720<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a></span> 721<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Or from PDSBlobStore</span></span> 722<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> store.create_source(urls)</span> 723<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a></span> 724<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Use with Dataset</span></span> 725<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> atdata.Dataset[MySample](source)</span> 726<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> ds.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 727<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a> process(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 728</div> 729</section> 730<section id="atmosphereindex" class="level2"> 731<h2 class="anchored" data-anchor-id="atmosphereindex">AtmosphereIndex</h2> 732<p>The unified interface for ATProto operations, implementing the AbstractIndex protocol:</p> 733<div id="5a53b688" class="cell"> 734<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient, AtmosphereIndex, PDSBlobStore</span> 735<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span> 736<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span> 737<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"handle.bsky.social"</span>, <span class="st">"app-password"</span>)</span> 738<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a></span> 739<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Without blob storage (use external URLs)</span></span> 740<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> AtmosphereIndex(client)</span> 741<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a></span> 742<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a><span class="co"># With PDS blob storage (recommended for full decentralization)</span></span> 743<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> PDSBlobStore(client)</span> 744<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> AtmosphereIndex(client, data_store<span class="op">=</span>store)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 745</div> 746<section id="publishing-schemas" class="level3"> 747<h3 class="anchored" data-anchor-id="publishing-schemas">Publishing Schemas</h3> 748<div id="582e7fb0" class="cell"> 749<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 750<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 751<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a></span> 752<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 753<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> ImageSample:</span> 754<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a> image: NDArray</span> 755<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">str</span></span> 756<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a> confidence: <span class="bu">float</span></span> 757<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a></span> 758<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Publish schema</span></span> 759<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a>schema_uri <span class="op">=</span> index.publish_schema(</span> 760<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a> ImageSample,</span> 761<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a> version<span class="op">=</span><span class="st">"1.0.0"</span>,</span> 762<span id="cb9-14"><a href="#cb9-14" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Image classification sample"</span>,</span> 763<span id="cb9-15"><a href="#cb9-15" aria-hidden="true" tabindex="-1"></a>)</span> 764<span id="cb9-16"><a href="#cb9-16" aria-hidden="true" tabindex="-1"></a><span class="co"># Returns: "at://did:plc:.../ac.foundation.dataset.sampleSchema/..."</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 765</div> 766</section> 767<section id="publishing-datasets" class="level3"> 768<h3 class="anchored" data-anchor-id="publishing-datasets">Publishing Datasets</h3> 769<div id="f658a5cc" class="cell"> 770<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"data-{000000..000009}.tar"</span>)</span> 771<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span> 772<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(</span> 773<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a> dataset,</span> 774<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"imagenet-subset"</span>,</span> 775<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a> schema_ref<span class="op">=</span>schema_uri, <span class="co"># Optional - auto-publishes if omitted</span></span> 776<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"ImageNet subset"</span>,</span> 777<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a> tags<span class="op">=</span>[<span class="st">"images"</span>, <span class="st">"classification"</span>],</span> 778<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a> license<span class="op">=</span><span class="st">"MIT"</span>,</span> 779<span id="cb10-10"><a href="#cb10-10" aria-hidden="true" tabindex="-1"></a>)</span> 780<span id="cb10-11"><a href="#cb10-11" aria-hidden="true" tabindex="-1"></a></span> 781<span id="cb10-12"><a href="#cb10-12" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(entry.uri) <span class="co"># AT URI of the record</span></span> 782<span id="cb10-13"><a href="#cb10-13" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(entry.data_urls) <span class="co"># WebDataset URLs</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 783</div> 784</section> 785<section id="listing-and-retrieving" class="level3"> 786<h3 class="anchored" data-anchor-id="listing-and-retrieving">Listing and Retrieving</h3> 787<div id="bc1fd369" class="cell"> 788<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># List your datasets</span></span> 789<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> entry <span class="kw">in</span> index.list_datasets():</span> 790<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>entry<span class="sc">.</span>name<span class="sc">}</span><span class="ss">: </span><span class="sc">{</span>entry<span class="sc">.</span>schema_ref<span class="sc">}</span><span class="ss">"</span>)</span> 791<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a></span> 792<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a><span class="co"># List from another user</span></span> 793<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> entry <span class="kw">in</span> index.list_datasets(repo<span class="op">=</span><span class="st">"did:plc:other-user"</span>):</span> 794<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(entry.name)</span> 795<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a></span> 796<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Get specific dataset</span></span> 797<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.get_dataset(<span class="st">"at://did:plc:.../ac.foundation.dataset.record/..."</span>)</span> 798<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a></span> 799<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a><span class="co"># List schemas</span></span> 800<span id="cb11-13"><a href="#cb11-13" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> schema <span class="kw">in</span> index.list_schemas():</span> 801<span id="cb11-14"><a href="#cb11-14" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>schema[<span class="st">'name'</span>]<span class="sc">}</span><span class="ss"> v</span><span class="sc">{</span>schema[<span class="st">'version'</span>]<span class="sc">}</span><span class="ss">"</span>)</span> 802<span id="cb11-15"><a href="#cb11-15" aria-hidden="true" tabindex="-1"></a></span> 803<span id="cb11-16"><a href="#cb11-16" aria-hidden="true" tabindex="-1"></a><span class="co"># Decode schema to Python type</span></span> 804<span id="cb11-17"><a href="#cb11-17" aria-hidden="true" tabindex="-1"></a>SampleType <span class="op">=</span> index.decode_schema(schema_uri)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 805</div> 806</section> 807</section> 808<section id="lower-level-publishers" class="level2"> 809<h2 class="anchored" data-anchor-id="lower-level-publishers">Lower-Level Publishers</h2> 810<p>For more control, use the individual publisher classes:</p> 811<section id="schemapublisher" class="level3"> 812<h3 class="anchored" data-anchor-id="schemapublisher">SchemaPublisher</h3> 813<div id="b4deca81" class="cell"> 814<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> SchemaPublisher</span> 815<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a></span> 816<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>publisher <span class="op">=</span> SchemaPublisher(client)</span> 817<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a></span> 818<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> publisher.publish(</span> 819<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a> ImageSample,</span> 820<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"ImageSample"</span>,</span> 821<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a> version<span class="op">=</span><span class="st">"1.0.0"</span>,</span> 822<span id="cb12-9"><a href="#cb12-9" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Image with label"</span>,</span> 823<span id="cb12-10"><a href="#cb12-10" aria-hidden="true" tabindex="-1"></a> metadata<span class="op">=</span>{<span class="st">"source"</span>: <span class="st">"training"</span>},</span> 824<span id="cb12-11"><a href="#cb12-11" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 825</div> 826</section> 827<section id="datasetpublisher" class="level3"> 828<h3 class="anchored" data-anchor-id="datasetpublisher">DatasetPublisher</h3> 829<div id="626a0256" class="cell"> 830<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> DatasetPublisher</span> 831<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a></span> 832<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>publisher <span class="op">=</span> DatasetPublisher(client)</span> 833<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a></span> 834<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> publisher.publish(</span> 835<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a> dataset,</span> 836<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"training-images"</span>,</span> 837<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a> schema_uri<span class="op">=</span>schema_uri, <span class="co"># Required if auto_publish_schema=False</span></span> 838<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a> auto_publish_schema<span class="op">=</span><span class="va">True</span>, <span class="co"># Publish schema automatically</span></span> 839<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Training images"</span>,</span> 840<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a> tags<span class="op">=</span>[<span class="st">"training"</span>, <span class="st">"images"</span>],</span> 841<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a> license<span class="op">=</span><span class="st">"MIT"</span>,</span> 842<span id="cb13-13"><a href="#cb13-13" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 843</div> 844<section id="blob-storage" class="level4"> 845<h4 class="anchored" data-anchor-id="blob-storage">Blob Storage</h4> 846<p>There are two approaches to storing data as ATProto blobs:</p> 847<p><strong>Approach 1: PDSBlobStore (Recommended)</strong></p> 848<p>Use <code>PDSBlobStore</code> with <code>AtmosphereIndex</code> for automatic shard management:</p> 849<div id="ff58eaa6" class="cell"> 850<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> PDSBlobStore, AtmosphereIndex</span> 851<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a></span> 852<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> PDSBlobStore(client)</span> 853<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> AtmosphereIndex(client, data_store<span class="op">=</span>store)</span> 854<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a></span> 855<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Dataset shards are automatically uploaded as blobs</span></span> 856<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(</span> 857<span id="cb14-8"><a href="#cb14-8" aria-hidden="true" tabindex="-1"></a> dataset,</span> 858<span id="cb14-9"><a href="#cb14-9" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"my-dataset"</span>,</span> 859<span id="cb14-10"><a href="#cb14-10" aria-hidden="true" tabindex="-1"></a> schema_ref<span class="op">=</span>schema_uri,</span> 860<span id="cb14-11"><a href="#cb14-11" aria-hidden="true" tabindex="-1"></a>)</span> 861<span id="cb14-12"><a href="#cb14-12" aria-hidden="true" tabindex="-1"></a></span> 862<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Later: load using BlobSource</span></span> 863<span id="cb14-14"><a href="#cb14-14" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> store.create_source(entry.data_urls)</span> 864<span id="cb14-15"><a href="#cb14-15" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> atdata.Dataset[MySample](source)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 865</div> 866<p><strong>Approach 2: Manual Blob Publishing</strong></p> 867<p>For more control, use <code>DatasetPublisher.publish_with_blobs()</code> directly:</p> 868<div id="46c8ca86" class="cell"> 869<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> io</span> 870<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span> 871<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a></span> 872<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Create tar data in memory</span></span> 873<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a>tar_buffer <span class="op">=</span> io.BytesIO()</span> 874<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> wds.writer.TarWriter(tar_buffer) <span class="im">as</span> sink:</span> 875<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i, sample <span class="kw">in</span> <span class="bu">enumerate</span>(samples):</span> 876<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a> sink.write({<span class="op">**</span>sample.as_wds, <span class="st">"__key__"</span>: <span class="ss">f"</span><span class="sc">{</span>i<span class="sc">:06d}</span><span class="ss">"</span>})</span> 877<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a></span> 878<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Publish with blob storage</span></span> 879<span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> publisher.publish_with_blobs(</span> 880<span id="cb15-12"><a href="#cb15-12" aria-hidden="true" tabindex="-1"></a> blobs<span class="op">=</span>[tar_buffer.getvalue()],</span> 881<span id="cb15-13"><a href="#cb15-13" aria-hidden="true" tabindex="-1"></a> schema_uri<span class="op">=</span>schema_uri,</span> 882<span id="cb15-14"><a href="#cb15-14" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"small-dataset"</span>,</span> 883<span id="cb15-15"><a href="#cb15-15" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Dataset stored in ATProto blobs"</span>,</span> 884<span id="cb15-16"><a href="#cb15-16" aria-hidden="true" tabindex="-1"></a> tags<span class="op">=</span>[<span class="st">"small"</span>, <span class="st">"demo"</span>],</span> 885<span id="cb15-17"><a href="#cb15-17" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 886</div> 887<p><strong>Loading Blob-Stored Datasets</strong></p> 888<div id="01f518ca" class="cell"> 889<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> DatasetLoader</span> 890<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata <span class="im">import</span> BlobSource</span> 891<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span> 892<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>loader <span class="op">=</span> DatasetLoader(client)</span> 893<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a></span> 894<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Check storage type</span></span> 895<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a>storage_type <span class="op">=</span> loader.get_storage_type(uri) <span class="co"># "external" or "blobs"</span></span> 896<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a></span> 897<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> storage_type <span class="op">==</span> <span class="st">"blobs"</span>:</span> 898<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a> <span class="co"># Get blob URLs and create BlobSource</span></span> 899<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a> blob_urls <span class="op">=</span> loader.get_blob_urls(uri)</span> 900<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a> <span class="co"># Parse to blob refs for BlobSource</span></span> 901<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a> <span class="co"># Or use loader.to_dataset() which handles this automatically</span></span> 902<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a></span> 903<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a><span class="co"># to_dataset() handles both storage types automatically</span></span> 904<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> loader.to_dataset(uri, MySample)</span> 905<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 906<span id="cb16-18"><a href="#cb16-18" aria-hidden="true" tabindex="-1"></a> process(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 907</div> 908</section> 909</section> 910<section id="lenspublisher" class="level3"> 911<h3 class="anchored" data-anchor-id="lenspublisher">LensPublisher</h3> 912<div id="7733653d" class="cell"> 913<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> LensPublisher</span> 914<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a></span> 915<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>publisher <span class="op">=</span> LensPublisher(client)</span> 916<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a></span> 917<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a><span class="co"># With code references</span></span> 918<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> publisher.publish(</span> 919<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"simplify"</span>,</span> 920<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a> source_schema<span class="op">=</span>full_schema_uri,</span> 921<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a> target_schema<span class="op">=</span>simple_schema_uri,</span> 922<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Extract label only"</span>,</span> 923<span id="cb17-11"><a href="#cb17-11" aria-hidden="true" tabindex="-1"></a> getter_code<span class="op">=</span>{</span> 924<span id="cb17-12"><a href="#cb17-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"repository"</span>: <span class="st">"https://github.com/org/repo"</span>,</span> 925<span id="cb17-13"><a href="#cb17-13" aria-hidden="true" tabindex="-1"></a> <span class="st">"commit"</span>: <span class="st">"abc123def..."</span>,</span> 926<span id="cb17-14"><a href="#cb17-14" aria-hidden="true" tabindex="-1"></a> <span class="st">"path"</span>: <span class="st">"transforms/simplify.py:simplify_getter"</span>,</span> 927<span id="cb17-15"><a href="#cb17-15" aria-hidden="true" tabindex="-1"></a> },</span> 928<span id="cb17-16"><a href="#cb17-16" aria-hidden="true" tabindex="-1"></a> putter_code<span class="op">=</span>{</span> 929<span id="cb17-17"><a href="#cb17-17" aria-hidden="true" tabindex="-1"></a> <span class="st">"repository"</span>: <span class="st">"https://github.com/org/repo"</span>,</span> 930<span id="cb17-18"><a href="#cb17-18" aria-hidden="true" tabindex="-1"></a> <span class="st">"commit"</span>: <span class="st">"abc123def..."</span>,</span> 931<span id="cb17-19"><a href="#cb17-19" aria-hidden="true" tabindex="-1"></a> <span class="st">"path"</span>: <span class="st">"transforms/simplify.py:simplify_putter"</span>,</span> 932<span id="cb17-20"><a href="#cb17-20" aria-hidden="true" tabindex="-1"></a> },</span> 933<span id="cb17-21"><a href="#cb17-21" aria-hidden="true" tabindex="-1"></a>)</span> 934<span id="cb17-22"><a href="#cb17-22" aria-hidden="true" tabindex="-1"></a></span> 935<span id="cb17-23"><a href="#cb17-23" aria-hidden="true" tabindex="-1"></a><span class="co"># Or publish from a Lens object</span></span> 936<span id="cb17-24"><a href="#cb17-24" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.lens <span class="im">import</span> lens</span> 937<span id="cb17-25"><a href="#cb17-25" aria-hidden="true" tabindex="-1"></a></span> 938<span id="cb17-26"><a href="#cb17-26" aria-hidden="true" tabindex="-1"></a><span class="at">@lens</span></span> 939<span id="cb17-27"><a href="#cb17-27" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> simplify(src: FullSample) <span class="op">-&gt;</span> SimpleSample:</span> 940<span id="cb17-28"><a href="#cb17-28" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> SimpleSample(label<span class="op">=</span>src.label)</span> 941<span id="cb17-29"><a href="#cb17-29" aria-hidden="true" tabindex="-1"></a></span> 942<span id="cb17-30"><a href="#cb17-30" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> publisher.publish_from_lens(</span> 943<span id="cb17-31"><a href="#cb17-31" aria-hidden="true" tabindex="-1"></a> simplify,</span> 944<span id="cb17-32"><a href="#cb17-32" aria-hidden="true" tabindex="-1"></a> source_schema<span class="op">=</span>full_schema_uri,</span> 945<span id="cb17-33"><a href="#cb17-33" aria-hidden="true" tabindex="-1"></a> target_schema<span class="op">=</span>simple_schema_uri,</span> 946<span id="cb17-34"><a href="#cb17-34" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 947</div> 948</section> 949</section> 950<section id="lower-level-loaders" class="level2"> 951<h2 class="anchored" data-anchor-id="lower-level-loaders">Lower-Level Loaders</h2> 952<p>For direct access to records, use the loader classes:</p> 953<section id="schemaloader" class="level3"> 954<h3 class="anchored" data-anchor-id="schemaloader">SchemaLoader</h3> 955<div id="7ea59338" class="cell"> 956<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> SchemaLoader</span> 957<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a></span> 958<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>loader <span class="op">=</span> SchemaLoader(client)</span> 959<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a></span> 960<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Get a specific schema</span></span> 961<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a>schema <span class="op">=</span> loader.get(<span class="st">"at://did:plc:abc/ac.foundation.dataset.sampleSchema/xyz"</span>)</span> 962<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(schema[<span class="st">"name"</span>], schema[<span class="st">"version"</span>])</span> 963<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a></span> 964<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a><span class="co"># List all schemas from a repository</span></span> 965<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> schema <span class="kw">in</span> loader.list_all(repo<span class="op">=</span><span class="st">"did:plc:other-user"</span>):</span> 966<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(schema[<span class="st">"name"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 967</div> 968</section> 969<section id="datasetloader" class="level3"> 970<h3 class="anchored" data-anchor-id="datasetloader">DatasetLoader</h3> 971<div id="18fc9b5a" class="cell"> 972<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> DatasetLoader</span> 973<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a></span> 974<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>loader <span class="op">=</span> DatasetLoader(client)</span> 975<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a></span> 976<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Get a specific dataset record</span></span> 977<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a>record <span class="op">=</span> loader.get(<span class="st">"at://did:plc:abc/ac.foundation.dataset.record/xyz"</span>)</span> 978<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a></span> 979<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Check storage type</span></span> 980<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a>storage_type <span class="op">=</span> loader.get_storage_type(uri) <span class="co"># "external" or "blobs"</span></span> 981<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a></span> 982<span id="cb19-11"><a href="#cb19-11" aria-hidden="true" tabindex="-1"></a><span class="co"># Get URLs based on storage type</span></span> 983<span id="cb19-12"><a href="#cb19-12" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> storage_type <span class="op">==</span> <span class="st">"external"</span>:</span> 984<span id="cb19-13"><a href="#cb19-13" aria-hidden="true" tabindex="-1"></a> urls <span class="op">=</span> loader.get_urls(uri)</span> 985<span id="cb19-14"><a href="#cb19-14" aria-hidden="true" tabindex="-1"></a><span class="cf">else</span>:</span> 986<span id="cb19-15"><a href="#cb19-15" aria-hidden="true" tabindex="-1"></a> urls <span class="op">=</span> loader.get_blob_urls(uri)</span> 987<span id="cb19-16"><a href="#cb19-16" aria-hidden="true" tabindex="-1"></a></span> 988<span id="cb19-17"><a href="#cb19-17" aria-hidden="true" tabindex="-1"></a><span class="co"># Get metadata</span></span> 989<span id="cb19-18"><a href="#cb19-18" aria-hidden="true" tabindex="-1"></a>metadata <span class="op">=</span> loader.get_metadata(uri)</span> 990<span id="cb19-19"><a href="#cb19-19" aria-hidden="true" tabindex="-1"></a></span> 991<span id="cb19-20"><a href="#cb19-20" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a Dataset object directly</span></span> 992<span id="cb19-21"><a href="#cb19-21" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> loader.to_dataset(uri, MySampleType)</span> 993<span id="cb19-22"><a href="#cb19-22" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 994<span id="cb19-23"><a href="#cb19-23" aria-hidden="true" tabindex="-1"></a> process(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 995</div> 996</section> 997<section id="lensloader" class="level3"> 998<h3 class="anchored" data-anchor-id="lensloader">LensLoader</h3> 999<div id="fa26f4fb" class="cell"> 1000<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> LensLoader</span> 1001<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a></span> 1002<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>loader <span class="op">=</span> LensLoader(client)</span> 1003<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a></span> 1004<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Get a specific lens record</span></span> 1005<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a>lens <span class="op">=</span> loader.get(<span class="st">"at://did:plc:abc/ac.foundation.dataset.lens/xyz"</span>)</span> 1006<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(lens[<span class="st">"name"</span>])</span> 1007<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(lens[<span class="st">"sourceSchema"</span>], <span class="st">"-&gt;"</span>, lens[<span class="st">"targetSchema"</span>])</span> 1008<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a></span> 1009<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a><span class="co"># List all lenses from a repository</span></span> 1010<span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> lens <span class="kw">in</span> loader.list_all():</span> 1011<span id="cb20-12"><a href="#cb20-12" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(lens[<span class="st">"name"</span>])</span> 1012<span id="cb20-13"><a href="#cb20-13" aria-hidden="true" tabindex="-1"></a></span> 1013<span id="cb20-14"><a href="#cb20-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Find lenses by schema</span></span> 1014<span id="cb20-15"><a href="#cb20-15" aria-hidden="true" tabindex="-1"></a>lenses <span class="op">=</span> loader.find_by_schemas(</span> 1015<span id="cb20-16"><a href="#cb20-16" aria-hidden="true" tabindex="-1"></a> source_schema_uri<span class="op">=</span><span class="st">"at://did:plc:abc/ac.foundation.dataset.sampleSchema/source"</span>,</span> 1016<span id="cb20-17"><a href="#cb20-17" aria-hidden="true" tabindex="-1"></a> target_schema_uri<span class="op">=</span><span class="st">"at://did:plc:abc/ac.foundation.dataset.sampleSchema/target"</span>,</span> 1017<span id="cb20-18"><a href="#cb20-18" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 1018</div> 1019</section> 1020</section> 1021<section id="at-uris" class="level2"> 1022<h2 class="anchored" data-anchor-id="at-uris">AT URIs</h2> 1023<p>ATProto records are identified by AT URIs:</p> 1024<div id="892f4f44" class="cell"> 1025<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtUri</span> 1026<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a></span> 1027<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Parse an AT URI</span></span> 1028<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> AtUri.parse(<span class="st">"at://did:plc:abc123/ac.foundation.dataset.sampleSchema/xyz"</span>)</span> 1029<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a></span> 1030<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(uri.authority) <span class="co"># 'did:plc:abc123'</span></span> 1031<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(uri.collection) <span class="co"># 'ac.foundation.dataset.sampleSchema'</span></span> 1032<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(uri.rkey) <span class="co"># 'xyz'</span></span> 1033<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a></span> 1034<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Format back to string</span></span> 1035<span id="cb21-11"><a href="#cb21-11" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="bu">str</span>(uri)) <span class="co"># 'at://did:plc:abc123/ac.foundation.dataset.sampleSchema/xyz'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 1036</div> 1037</section> 1038<section id="supported-field-types" class="level2"> 1039<h2 class="anchored" data-anchor-id="supported-field-types">Supported Field Types</h2> 1040<p>Schemas support these field types:</p> 1041<table class="caption-top table"> 1042<thead> 1043<tr class="header"> 1044<th>Python Type</th> 1045<th>ATProto Type</th> 1046</tr> 1047</thead> 1048<tbody> 1049<tr class="odd"> 1050<td><code>str</code></td> 1051<td><code>primitive/str</code></td> 1052</tr> 1053<tr class="even"> 1054<td><code>int</code></td> 1055<td><code>primitive/int</code></td> 1056</tr> 1057<tr class="odd"> 1058<td><code>float</code></td> 1059<td><code>primitive/float</code></td> 1060</tr> 1061<tr class="even"> 1062<td><code>bool</code></td> 1063<td><code>primitive/bool</code></td> 1064</tr> 1065<tr class="odd"> 1066<td><code>bytes</code></td> 1067<td><code>primitive/bytes</code></td> 1068</tr> 1069<tr class="even"> 1070<td><code>NDArray</code></td> 1071<td><code>ndarray</code> (default dtype: float32)</td> 1072</tr> 1073<tr class="odd"> 1074<td><code>NDArray[np.float64]</code></td> 1075<td><code>ndarray</code> (dtype: float64)</td> 1076</tr> 1077<tr class="even"> 1078<td><code>list[str]</code></td> 1079<td><code>array</code> with items</td> 1080</tr> 1081<tr class="odd"> 1082<td><code>T \| None</code></td> 1083<td>Optional field</td> 1084</tr> 1085</tbody> 1086</table> 1087</section> 1088<section id="complete-example" class="level2"> 1089<h2 class="anchored" data-anchor-id="complete-example">Complete Example</h2> 1090<p>This example shows the full workflow using <code>PDSBlobStore</code> for decentralized storage:</p> 1091<div id="8ea58305" class="cell"> 1092<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 1093<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 1094<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 1095<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient, AtmosphereIndex, PDSBlobStore</span> 1096<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span> 1097<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a></span> 1098<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Define and create samples</span></span> 1099<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 1100<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> FeatureSample:</span> 1101<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a> features: NDArray</span> 1102<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">int</span></span> 1103<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a> source: <span class="bu">str</span></span> 1104<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a></span> 1105<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a>samples <span class="op">=</span> [</span> 1106<span id="cb22-15"><a href="#cb22-15" aria-hidden="true" tabindex="-1"></a> FeatureSample(</span> 1107<span id="cb22-16"><a href="#cb22-16" aria-hidden="true" tabindex="-1"></a> features<span class="op">=</span>np.random.randn(<span class="dv">128</span>).astype(np.float32),</span> 1108<span id="cb22-17"><a href="#cb22-17" aria-hidden="true" tabindex="-1"></a> label<span class="op">=</span>i <span class="op">%</span> <span class="dv">10</span>,</span> 1109<span id="cb22-18"><a href="#cb22-18" aria-hidden="true" tabindex="-1"></a> source<span class="op">=</span><span class="st">"synthetic"</span>,</span> 1110<span id="cb22-19"><a href="#cb22-19" aria-hidden="true" tabindex="-1"></a> )</span> 1111<span id="cb22-20"><a href="#cb22-20" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">1000</span>)</span> 1112<span id="cb22-21"><a href="#cb22-21" aria-hidden="true" tabindex="-1"></a>]</span> 1113<span id="cb22-22"><a href="#cb22-22" aria-hidden="true" tabindex="-1"></a></span> 1114<span id="cb22-23"><a href="#cb22-23" aria-hidden="true" tabindex="-1"></a><span class="co"># 2. Write to tar</span></span> 1115<span id="cb22-24"><a href="#cb22-24" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> wds.writer.TarWriter(<span class="st">"features.tar"</span>) <span class="im">as</span> sink:</span> 1116<span id="cb22-25"><a href="#cb22-25" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i, s <span class="kw">in</span> <span class="bu">enumerate</span>(samples):</span> 1117<span id="cb22-26"><a href="#cb22-26" aria-hidden="true" tabindex="-1"></a> sink.write({<span class="op">**</span>s.as_wds, <span class="st">"__key__"</span>: <span class="ss">f"</span><span class="sc">{</span>i<span class="sc">:06d}</span><span class="ss">"</span>})</span> 1118<span id="cb22-27"><a href="#cb22-27" aria-hidden="true" tabindex="-1"></a></span> 1119<span id="cb22-28"><a href="#cb22-28" aria-hidden="true" tabindex="-1"></a><span class="co"># 3. Authenticate and set up blob storage</span></span> 1120<span id="cb22-29"><a href="#cb22-29" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span> 1121<span id="cb22-30"><a href="#cb22-30" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"myhandle.bsky.social"</span>, <span class="st">"app-password"</span>)</span> 1122<span id="cb22-31"><a href="#cb22-31" aria-hidden="true" tabindex="-1"></a></span> 1123<span id="cb22-32"><a href="#cb22-32" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> PDSBlobStore(client)</span> 1124<span id="cb22-33"><a href="#cb22-33" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> AtmosphereIndex(client, data_store<span class="op">=</span>store)</span> 1125<span id="cb22-34"><a href="#cb22-34" aria-hidden="true" tabindex="-1"></a></span> 1126<span id="cb22-35"><a href="#cb22-35" aria-hidden="true" tabindex="-1"></a><span class="co"># 4. Publish schema</span></span> 1127<span id="cb22-36"><a href="#cb22-36" aria-hidden="true" tabindex="-1"></a>schema_uri <span class="op">=</span> index.publish_schema(</span> 1128<span id="cb22-37"><a href="#cb22-37" aria-hidden="true" tabindex="-1"></a> FeatureSample,</span> 1129<span id="cb22-38"><a href="#cb22-38" aria-hidden="true" tabindex="-1"></a> version<span class="op">=</span><span class="st">"1.0.0"</span>,</span> 1130<span id="cb22-39"><a href="#cb22-39" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Feature vectors with labels"</span>,</span> 1131<span id="cb22-40"><a href="#cb22-40" aria-hidden="true" tabindex="-1"></a>)</span> 1132<span id="cb22-41"><a href="#cb22-41" aria-hidden="true" tabindex="-1"></a></span> 1133<span id="cb22-42"><a href="#cb22-42" aria-hidden="true" tabindex="-1"></a><span class="co"># 5. Publish dataset (shards uploaded as blobs)</span></span> 1134<span id="cb22-43"><a href="#cb22-43" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[FeatureSample](<span class="st">"features.tar"</span>)</span> 1135<span id="cb22-44"><a href="#cb22-44" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(</span> 1136<span id="cb22-45"><a href="#cb22-45" aria-hidden="true" tabindex="-1"></a> dataset,</span> 1137<span id="cb22-46"><a href="#cb22-46" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"synthetic-features-v1"</span>,</span> 1138<span id="cb22-47"><a href="#cb22-47" aria-hidden="true" tabindex="-1"></a> schema_ref<span class="op">=</span>schema_uri,</span> 1139<span id="cb22-48"><a href="#cb22-48" aria-hidden="true" tabindex="-1"></a> tags<span class="op">=</span>[<span class="st">"features"</span>, <span class="st">"synthetic"</span>],</span> 1140<span id="cb22-49"><a href="#cb22-49" aria-hidden="true" tabindex="-1"></a>)</span> 1141<span id="cb22-50"><a href="#cb22-50" aria-hidden="true" tabindex="-1"></a></span> 1142<span id="cb22-51"><a href="#cb22-51" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Published: </span><span class="sc">{</span>entry<span class="sc">.</span>uri<span class="sc">}</span><span class="ss">"</span>)</span> 1143<span id="cb22-52"><a href="#cb22-52" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Blob URLs: </span><span class="sc">{</span>entry<span class="sc">.</span>data_urls<span class="sc">}</span><span class="ss">"</span>)</span> 1144<span id="cb22-53"><a href="#cb22-53" aria-hidden="true" tabindex="-1"></a></span> 1145<span id="cb22-54"><a href="#cb22-54" aria-hidden="true" tabindex="-1"></a><span class="co"># 6. Later: discover and load from blobs</span></span> 1146<span id="cb22-55"><a href="#cb22-55" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> dataset_entry <span class="kw">in</span> index.list_datasets():</span> 1147<span id="cb22-56"><a href="#cb22-56" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"Found: </span><span class="sc">{</span>dataset_entry<span class="sc">.</span>name<span class="sc">}</span><span class="ss">"</span>)</span> 1148<span id="cb22-57"><a href="#cb22-57" aria-hidden="true" tabindex="-1"></a></span> 1149<span id="cb22-58"><a href="#cb22-58" aria-hidden="true" tabindex="-1"></a> <span class="co"># Reconstruct type from schema</span></span> 1150<span id="cb22-59"><a href="#cb22-59" aria-hidden="true" tabindex="-1"></a> SampleType <span class="op">=</span> index.decode_schema(dataset_entry.schema_ref)</span> 1151<span id="cb22-60"><a href="#cb22-60" aria-hidden="true" tabindex="-1"></a></span> 1152<span id="cb22-61"><a href="#cb22-61" aria-hidden="true" tabindex="-1"></a> <span class="co"># Create source from blob URLs</span></span> 1153<span id="cb22-62"><a href="#cb22-62" aria-hidden="true" tabindex="-1"></a> source <span class="op">=</span> store.create_source(dataset_entry.data_urls)</span> 1154<span id="cb22-63"><a href="#cb22-63" aria-hidden="true" tabindex="-1"></a></span> 1155<span id="cb22-64"><a href="#cb22-64" aria-hidden="true" tabindex="-1"></a> <span class="co"># Load dataset from blobs</span></span> 1156<span id="cb22-65"><a href="#cb22-65" aria-hidden="true" tabindex="-1"></a> ds <span class="op">=</span> atdata.Dataset[SampleType](source)</span> 1157<span id="cb22-66"><a href="#cb22-66" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> batch <span class="kw">in</span> ds.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 1158<span id="cb22-67"><a href="#cb22-67" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(batch.features.shape)</span> 1159<span id="cb22-68"><a href="#cb22-68" aria-hidden="true" tabindex="-1"></a> <span class="cf">break</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 1160</div> 1161<p>For external URL storage (without <code>PDSBlobStore</code>):</p> 1162<div id="1f55ae9a" class="cell"> 1163<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Use AtmosphereIndex without data_store</span></span> 1164<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> AtmosphereIndex(client)</span> 1165<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a></span> 1166<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Dataset URLs will be stored as-is (external references)</span></span> 1167<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(</span> 1168<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a> dataset,</span> 1169<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"external-features"</span>,</span> 1170<span id="cb23-8"><a href="#cb23-8" aria-hidden="true" tabindex="-1"></a> schema_ref<span class="op">=</span>schema_uri,</span> 1171<span id="cb23-9"><a href="#cb23-9" aria-hidden="true" tabindex="-1"></a>)</span> 1172<span id="cb23-10"><a href="#cb23-10" aria-hidden="true" tabindex="-1"></a></span> 1173<span id="cb23-11"><a href="#cb23-11" aria-hidden="true" tabindex="-1"></a><span class="co"># Load using standard URL source</span></span> 1174<span id="cb23-12"><a href="#cb23-12" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> atdata.Dataset[FeatureSample](entry.data_urls[<span class="dv">0</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 1175</div> 1176</section> 1177<section id="related" class="level2"> 1178<h2 class="anchored" data-anchor-id="related">Related</h2> 1179<ul> 1180<li><a href="../reference/local-storage.html">Local Storage</a> - Redis + S3 backend</li> 1181<li><a href="../reference/promotion.html">Promotion</a> - Promoting local datasets to ATProto</li> 1182<li><a href="../reference/protocols.html">Protocols</a> - AbstractIndex interface</li> 1183<li><a href="../reference/packable-samples.html">Packable Samples</a> - Defining sample types</li> 1184</ul> 1185 1186 1187</section> 1188 1189</main> <!-- /main --> 1190<script id="quarto-html-after-body" type="application/javascript"> 1191 window.document.addEventListener("DOMContentLoaded", function (event) { 1192 // Ensure there is a toggle, if there isn't float one in the top right 1193 if (window.document.querySelector('.quarto-color-scheme-toggle') === null) { 1194 const a = window.document.createElement('a'); 1195 a.classList.add('top-right'); 1196 a.classList.add('quarto-color-scheme-toggle'); 1197 a.href = ""; 1198 a.onclick = function() { try { window.quartoToggleColorScheme(); } catch {} return false; }; 1199 const i = window.document.createElement("i"); 1200 i.classList.add('bi'); 1201 a.appendChild(i); 1202 window.document.body.appendChild(a); 1203 } 1204 setColorSchemeToggle(hasAlternateSentinel()) 1205 const icon = ""; 1206 const anchorJS = new window.AnchorJS(); 1207 anchorJS.options = { 1208 placement: 'right', 1209 icon: icon 1210 }; 1211 anchorJS.add('.anchored'); 1212 const isCodeAnnotation = (el) => { 1213 for (const clz of el.classList) { 1214 if (clz.startsWith('code-annotation-')) { 1215 return true; 1216 } 1217 } 1218 return false; 1219 } 1220 const onCopySuccess = function(e) { 1221 // button target 1222 const button = e.trigger; 1223 // don't keep focus 1224 button.blur(); 1225 // flash "checked" 1226 button.classList.add('code-copy-button-checked'); 1227 var currentTitle = button.getAttribute("title"); 1228 button.setAttribute("title", "Copied!"); 1229 let tooltip; 1230 if (window.bootstrap) { 1231 button.setAttribute("data-bs-toggle", "tooltip"); 1232 button.setAttribute("data-bs-placement", "left"); 1233 button.setAttribute("data-bs-title", "Copied!"); 1234 tooltip = new bootstrap.Tooltip(button, 1235 { trigger: "manual", 1236 customClass: "code-copy-button-tooltip", 1237 offset: [0, -8]}); 1238 tooltip.show(); 1239 } 1240 setTimeout(function() { 1241 if (tooltip) { 1242 tooltip.hide(); 1243 button.removeAttribute("data-bs-title"); 1244 button.removeAttribute("data-bs-toggle"); 1245 button.removeAttribute("data-bs-placement"); 1246 } 1247 button.setAttribute("title", currentTitle); 1248 button.classList.remove('code-copy-button-checked'); 1249 }, 1000); 1250 // clear code selection 1251 e.clearSelection(); 1252 } 1253 const getTextToCopy = function(trigger) { 1254 const codeEl = trigger.previousElementSibling.cloneNode(true); 1255 for (const childEl of codeEl.children) { 1256 if (isCodeAnnotation(childEl)) { 1257 childEl.remove(); 1258 } 1259 } 1260 return codeEl.innerText; 1261 } 1262 const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', { 1263 text: getTextToCopy 1264 }); 1265 clipboard.on('success', onCopySuccess); 1266 if (window.document.getElementById('quarto-embedded-source-code-modal')) { 1267 const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', { 1268 text: getTextToCopy, 1269 container: window.document.getElementById('quarto-embedded-source-code-modal') 1270 }); 1271 clipboardModal.on('success', onCopySuccess); 1272 } 1273 var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//); 1274 var mailtoRegex = new RegExp(/^mailto:/); 1275 var filterRegex = new RegExp("https:\/\/github\.com\/your-org\/atdata"); 1276 var isInternal = (href) => { 1277 return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href); 1278 } 1279 // Inspect non-navigation links and adorn them if external 1280 var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)'); 1281 for (var i=0; i<links.length; i++) { 1282 const link = links[i]; 1283 if (!isInternal(link.href)) { 1284 // undo the damage that might have been done by quarto-nav.js in the case of 1285 // links that we want to consider external 1286 if (link.dataset.originalHref !== undefined) { 1287 link.href = link.dataset.originalHref; 1288 } 1289 } 1290 } 1291 function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) { 1292 const config = { 1293 allowHTML: true, 1294 maxWidth: 500, 1295 delay: 100, 1296 arrow: false, 1297 appendTo: function(el) { 1298 return el.parentElement; 1299 }, 1300 interactive: true, 1301 interactiveBorder: 10, 1302 theme: 'quarto', 1303 placement: 'bottom-start', 1304 }; 1305 if (contentFn) { 1306 config.content = contentFn; 1307 } 1308 if (onTriggerFn) { 1309 config.onTrigger = onTriggerFn; 1310 } 1311 if (onUntriggerFn) { 1312 config.onUntrigger = onUntriggerFn; 1313 } 1314 window.tippy(el, config); 1315 } 1316 const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]'); 1317 for (var i=0; i<noterefs.length; i++) { 1318 const ref = noterefs[i]; 1319 tippyHover(ref, function() { 1320 // use id or data attribute instead here 1321 let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href'); 1322 try { href = new URL(href).hash; } catch {} 1323 const id = href.replace(/^#\/?/, ""); 1324 const note = window.document.getElementById(id); 1325 if (note) { 1326 return note.innerHTML; 1327 } else { 1328 return ""; 1329 } 1330 }); 1331 } 1332 const xrefs = window.document.querySelectorAll('a.quarto-xref'); 1333 const processXRef = (id, note) => { 1334 // Strip column container classes 1335 const stripColumnClz = (el) => { 1336 el.classList.remove("page-full", "page-columns"); 1337 if (el.children) { 1338 for (const child of el.children) { 1339 stripColumnClz(child); 1340 } 1341 } 1342 } 1343 stripColumnClz(note) 1344 if (id === null || id.startsWith('sec-')) { 1345 // Special case sections, only their first couple elements 1346 const container = document.createElement("div"); 1347 if (note.children && note.children.length > 2) { 1348 container.appendChild(note.children[0].cloneNode(true)); 1349 for (let i = 1; i < note.children.length; i++) { 1350 const child = note.children[i]; 1351 if (child.tagName === "P" && child.innerText === "") { 1352 continue; 1353 } else { 1354 container.appendChild(child.cloneNode(true)); 1355 break; 1356 } 1357 } 1358 if (window.Quarto?.typesetMath) { 1359 window.Quarto.typesetMath(container); 1360 } 1361 return container.innerHTML 1362 } else { 1363 if (window.Quarto?.typesetMath) { 1364 window.Quarto.typesetMath(note); 1365 } 1366 return note.innerHTML; 1367 } 1368 } else { 1369 // Remove any anchor links if they are present 1370 const anchorLink = note.querySelector('a.anchorjs-link'); 1371 if (anchorLink) { 1372 anchorLink.remove(); 1373 } 1374 if (window.Quarto?.typesetMath) { 1375 window.Quarto.typesetMath(note); 1376 } 1377 if (note.classList.contains("callout")) { 1378 return note.outerHTML; 1379 } else { 1380 return note.innerHTML; 1381 } 1382 } 1383 } 1384 for (var i=0; i<xrefs.length; i++) { 1385 const xref = xrefs[i]; 1386 tippyHover(xref, undefined, function(instance) { 1387 instance.disable(); 1388 let url = xref.getAttribute('href'); 1389 let hash = undefined; 1390 if (url.startsWith('#')) { 1391 hash = url; 1392 } else { 1393 try { hash = new URL(url).hash; } catch {} 1394 } 1395 if (hash) { 1396 const id = hash.replace(/^#\/?/, ""); 1397 const note = window.document.getElementById(id); 1398 if (note !== null) { 1399 try { 1400 const html = processXRef(id, note.cloneNode(true)); 1401 instance.setContent(html); 1402 } finally { 1403 instance.enable(); 1404 instance.show(); 1405 } 1406 } else { 1407 // See if we can fetch this 1408 fetch(url.split('#')[0]) 1409 .then(res => res.text()) 1410 .then(html => { 1411 const parser = new DOMParser(); 1412 const htmlDoc = parser.parseFromString(html, "text/html"); 1413 const note = htmlDoc.getElementById(id); 1414 if (note !== null) { 1415 const html = processXRef(id, note); 1416 instance.setContent(html); 1417 } 1418 }).finally(() => { 1419 instance.enable(); 1420 instance.show(); 1421 }); 1422 } 1423 } else { 1424 // See if we can fetch a full url (with no hash to target) 1425 // This is a special case and we should probably do some content thinning / targeting 1426 fetch(url) 1427 .then(res => res.text()) 1428 .then(html => { 1429 const parser = new DOMParser(); 1430 const htmlDoc = parser.parseFromString(html, "text/html"); 1431 const note = htmlDoc.querySelector('main.content'); 1432 if (note !== null) { 1433 // This should only happen for chapter cross references 1434 // (since there is no id in the URL) 1435 // remove the first header 1436 if (note.children.length > 0 && note.children[0].tagName === "HEADER") { 1437 note.children[0].remove(); 1438 } 1439 const html = processXRef(null, note); 1440 instance.setContent(html); 1441 } 1442 }).finally(() => { 1443 instance.enable(); 1444 instance.show(); 1445 }); 1446 } 1447 }, function(instance) { 1448 }); 1449 } 1450 let selectedAnnoteEl; 1451 const selectorForAnnotation = ( cell, annotation) => { 1452 let cellAttr = 'data-code-cell="' + cell + '"'; 1453 let lineAttr = 'data-code-annotation="' + annotation + '"'; 1454 const selector = 'span[' + cellAttr + '][' + lineAttr + ']'; 1455 return selector; 1456 } 1457 const selectCodeLines = (annoteEl) => { 1458 const doc = window.document; 1459 const targetCell = annoteEl.getAttribute("data-target-cell"); 1460 const targetAnnotation = annoteEl.getAttribute("data-target-annotation"); 1461 const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation)); 1462 const lines = annoteSpan.getAttribute("data-code-lines").split(","); 1463 const lineIds = lines.map((line) => { 1464 return targetCell + "-" + line; 1465 }) 1466 let top = null; 1467 let height = null; 1468 let parent = null; 1469 if (lineIds.length > 0) { 1470 //compute the position of the single el (top and bottom and make a div) 1471 const el = window.document.getElementById(lineIds[0]); 1472 top = el.offsetTop; 1473 height = el.offsetHeight; 1474 parent = el.parentElement.parentElement; 1475 if (lineIds.length > 1) { 1476 const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]); 1477 const bottom = lastEl.offsetTop + lastEl.offsetHeight; 1478 height = bottom - top; 1479 } 1480 if (top !== null && height !== null && parent !== null) { 1481 // cook up a div (if necessary) and position it 1482 let div = window.document.getElementById("code-annotation-line-highlight"); 1483 if (div === null) { 1484 div = window.document.createElement("div"); 1485 div.setAttribute("id", "code-annotation-line-highlight"); 1486 div.style.position = 'absolute'; 1487 parent.appendChild(div); 1488 } 1489 div.style.top = top - 2 + "px"; 1490 div.style.height = height + 4 + "px"; 1491 div.style.left = 0; 1492 let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter"); 1493 if (gutterDiv === null) { 1494 gutterDiv = window.document.createElement("div"); 1495 gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter"); 1496 gutterDiv.style.position = 'absolute'; 1497 const codeCell = window.document.getElementById(targetCell); 1498 const gutter = codeCell.querySelector('.code-annotation-gutter'); 1499 gutter.appendChild(gutterDiv); 1500 } 1501 gutterDiv.style.top = top - 2 + "px"; 1502 gutterDiv.style.height = height + 4 + "px"; 1503 } 1504 selectedAnnoteEl = annoteEl; 1505 } 1506 }; 1507 const unselectCodeLines = () => { 1508 const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"]; 1509 elementsIds.forEach((elId) => { 1510 const div = window.document.getElementById(elId); 1511 if (div) { 1512 div.remove(); 1513 } 1514 }); 1515 selectedAnnoteEl = undefined; 1516 }; 1517 // Handle positioning of the toggle 1518 window.addEventListener( 1519 "resize", 1520 throttle(() => { 1521 elRect = undefined; 1522 if (selectedAnnoteEl) { 1523 selectCodeLines(selectedAnnoteEl); 1524 } 1525 }, 10) 1526 ); 1527 function throttle(fn, ms) { 1528 let throttle = false; 1529 let timer; 1530 return (...args) => { 1531 if(!throttle) { // first call gets through 1532 fn.apply(this, args); 1533 throttle = true; 1534 } else { // all the others get throttled 1535 if(timer) clearTimeout(timer); // cancel #2 1536 timer = setTimeout(() => { 1537 fn.apply(this, args); 1538 timer = throttle = false; 1539 }, ms); 1540 } 1541 }; 1542 } 1543 // Attach click handler to the DT 1544 const annoteDls = window.document.querySelectorAll('dt[data-target-cell]'); 1545 for (const annoteDlNode of annoteDls) { 1546 annoteDlNode.addEventListener('click', (event) => { 1547 const clickedEl = event.target; 1548 if (clickedEl !== selectedAnnoteEl) { 1549 unselectCodeLines(); 1550 const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active'); 1551 if (activeEl) { 1552 activeEl.classList.remove('code-annotation-active'); 1553 } 1554 selectCodeLines(clickedEl); 1555 clickedEl.classList.add('code-annotation-active'); 1556 } else { 1557 // Unselect the line 1558 unselectCodeLines(); 1559 clickedEl.classList.remove('code-annotation-active'); 1560 } 1561 }); 1562 } 1563 const findCites = (el) => { 1564 const parentEl = el.parentElement; 1565 if (parentEl) { 1566 const cites = parentEl.dataset.cites; 1567 if (cites) { 1568 return { 1569 el, 1570 cites: cites.split(' ') 1571 }; 1572 } else { 1573 return findCites(el.parentElement) 1574 } 1575 } else { 1576 return undefined; 1577 } 1578 }; 1579 var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]'); 1580 for (var i=0; i<bibliorefs.length; i++) { 1581 const ref = bibliorefs[i]; 1582 const citeInfo = findCites(ref); 1583 if (citeInfo) { 1584 tippyHover(citeInfo.el, function() { 1585 var popup = window.document.createElement('div'); 1586 citeInfo.cites.forEach(function(cite) { 1587 var citeDiv = window.document.createElement('div'); 1588 citeDiv.classList.add('hanging-indent'); 1589 citeDiv.classList.add('csl-entry'); 1590 var biblioDiv = window.document.getElementById('ref-' + cite); 1591 if (biblioDiv) { 1592 citeDiv.innerHTML = biblioDiv.innerHTML; 1593 } 1594 popup.appendChild(citeDiv); 1595 }); 1596 return popup.innerHTML; 1597 }); 1598 } 1599 } 1600 }); 1601 </script> 1602</div> <!-- /content --> 1603<footer class="footer"> 1604 <div class="nav-footer"> 1605 <div class="nav-footer-left"> 1606<p>Built with <a href="https://quarto.org/">Quarto</a></p> 1607</div> 1608 <div class="nav-footer-center"> 1609 &nbsp; 1610 <div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/your-org/atdata/edit/main/reference/atmosphere.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></div> 1611 <div class="nav-footer-right"> 1612<p>MIT License</p> 1613</div> 1614 </div> 1615</footer> 1616 1617 1618 1619 1620</body></html>