A loose federation of distributed, typed datasets
1<!DOCTYPE html>
2<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
3
4<meta charset="utf-8">
5<meta name="generator" content="quarto-1.7.34">
6
7<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
8
9<meta name="description" content="Publishing and discovering datasets on the AT Protocol network">
10
11<title>Atmosphere (ATProto Integration) – atdata</title>
12<style>
13code{white-space: pre-wrap;}
14span.smallcaps{font-variant: small-caps;}
15div.columns{display: flex; gap: min(4vw, 1.5em);}
16div.column{flex: auto; overflow-x: auto;}
17div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
18ul.task-list{list-style: none;}
19ul.task-list li input[type="checkbox"] {
20 width: 0.8em;
21 margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
22 vertical-align: middle;
23}
24/* CSS for syntax highlighting */
25html { -webkit-text-size-adjust: 100%; }
26pre > code.sourceCode { white-space: pre; position: relative; }
27pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
28pre > code.sourceCode > span:empty { height: 1.2em; }
29.sourceCode { overflow: visible; }
30code.sourceCode > span { color: inherit; text-decoration: inherit; }
31div.sourceCode { margin: 1em 0; }
32pre.sourceCode { margin: 0; }
33@media screen {
34div.sourceCode { overflow: auto; }
35}
36@media print {
37pre > code.sourceCode { white-space: pre-wrap; }
38pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
39}
40pre.numberSource code
41 { counter-reset: source-line 0; }
42pre.numberSource code > span
43 { position: relative; left: -4em; counter-increment: source-line; }
44pre.numberSource code > span > a:first-child::before
45 { content: counter(source-line);
46 position: relative; left: -1em; text-align: right; vertical-align: baseline;
47 border: none; display: inline-block;
48 -webkit-touch-callout: none; -webkit-user-select: none;
49 -khtml-user-select: none; -moz-user-select: none;
50 -ms-user-select: none; user-select: none;
51 padding: 0 4px; width: 4em;
52 }
53pre.numberSource { margin-left: 3em; padding-left: 4px; }
54div.sourceCode
55 { }
56@media screen {
57pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
58}
59</style>
60
61
62<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
63<script src="../site_libs/quarto-nav/headroom.min.js"></script>
64<script src="../site_libs/clipboard/clipboard.min.js"></script>
65<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
66<script src="../site_libs/quarto-search/fuse.min.js"></script>
67<script src="../site_libs/quarto-search/quarto-search.js"></script>
68<meta name="quarto:offset" content="../">
69<script src="../site_libs/quarto-html/quarto.js" type="module"></script>
70<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
71<script src="../site_libs/quarto-html/popper.min.js"></script>
72<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
73<script src="../site_libs/quarto-html/anchor.min.js"></script>
74<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
75<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme" id="quarto-text-highlighting-styles">
76<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8dcd8563ea6803ab7cbb3d71ca5772e1.css" rel="stylesheet" class="quarto-color-scheme quarto-color-alternate" id="quarto-text-highlighting-styles">
77<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme-extra" id="quarto-text-highlighting-styles">
78<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
79<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
80<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme" id="quarto-bootstrap" data-mode="light">
81<link href="../site_libs/bootstrap/bootstrap-dark-7964ffd8887b0991fe8d71c6c8bc75d6.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme quarto-color-alternate" id="quarto-bootstrap" data-mode="dark">
82<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme-extra" id="quarto-bootstrap" data-mode="light">
83<script id="quarto-search-options" type="application/json">{
84 "location": "navbar",
85 "copy-button": false,
86 "collapse-after": 3,
87 "panel-placement": "end",
88 "type": "overlay",
89 "limit": 50,
90 "keyboard-shortcut": [
91 "f",
92 "/",
93 "s"
94 ],
95 "show-item-context": false,
96 "language": {
97 "search-no-results-text": "No results",
98 "search-matching-documents-text": "matching documents",
99 "search-copy-link-title": "Copy link to search",
100 "search-hide-matches-text": "Hide additional matches",
101 "search-more-match-text": "more match in this document",
102 "search-more-matches-text": "more matches in this document",
103 "search-clear-button-title": "Clear",
104 "search-text-placeholder": "",
105 "search-detached-cancel-button-title": "Cancel",
106 "search-submit-button-title": "Submit",
107 "search-label": "Search"
108 }
109}</script>
110
111
112<link rel="stylesheet" href="../assets/styles.css">
113</head>
114
115<body class="nav-sidebar docked nav-fixed quarto-light"><script id="quarto-html-before-body" type="application/javascript">
116 const toggleBodyColorMode = (bsSheetEl) => {
117 const mode = bsSheetEl.getAttribute("data-mode");
118 const bodyEl = window.document.querySelector("body");
119 if (mode === "dark") {
120 bodyEl.classList.add("quarto-dark");
121 bodyEl.classList.remove("quarto-light");
122 } else {
123 bodyEl.classList.add("quarto-light");
124 bodyEl.classList.remove("quarto-dark");
125 }
126 }
127 const toggleBodyColorPrimary = () => {
128 const bsSheetEl = window.document.querySelector("link#quarto-bootstrap:not([rel=disabled-stylesheet])");
129 if (bsSheetEl) {
130 toggleBodyColorMode(bsSheetEl);
131 }
132 }
133 const setColorSchemeToggle = (alternate) => {
134 const toggles = window.document.querySelectorAll('.quarto-color-scheme-toggle');
135 for (let i=0; i < toggles.length; i++) {
136 const toggle = toggles[i];
137 if (toggle) {
138 if (alternate) {
139 toggle.classList.add("alternate");
140 } else {
141 toggle.classList.remove("alternate");
142 }
143 }
144 }
145 };
146 const toggleColorMode = (alternate) => {
147 // Switch the stylesheets
148 const primaryStylesheets = window.document.querySelectorAll('link.quarto-color-scheme:not(.quarto-color-alternate)');
149 const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate');
150 manageTransitions('#quarto-margin-sidebar .nav-link', false);
151 if (alternate) {
152 // note: dark is layered on light, we don't disable primary!
153 enableStylesheet(alternateStylesheets);
154 for (const sheetNode of alternateStylesheets) {
155 if (sheetNode.id === "quarto-bootstrap") {
156 toggleBodyColorMode(sheetNode);
157 }
158 }
159 } else {
160 disableStylesheet(alternateStylesheets);
161 enableStylesheet(primaryStylesheets)
162 toggleBodyColorPrimary();
163 }
164 manageTransitions('#quarto-margin-sidebar .nav-link', true);
165 // Switch the toggles
166 setColorSchemeToggle(alternate)
167 // Hack to workaround the fact that safari doesn't
168 // properly recolor the scrollbar when toggling (#1455)
169 if (navigator.userAgent.indexOf('Safari') > 0 && navigator.userAgent.indexOf('Chrome') == -1) {
170 manageTransitions("body", false);
171 window.scrollTo(0, 1);
172 setTimeout(() => {
173 window.scrollTo(0, 0);
174 manageTransitions("body", true);
175 }, 40);
176 }
177 }
178 const disableStylesheet = (stylesheets) => {
179 for (let i=0; i < stylesheets.length; i++) {
180 const stylesheet = stylesheets[i];
181 stylesheet.rel = 'disabled-stylesheet';
182 }
183 }
184 const enableStylesheet = (stylesheets) => {
185 for (let i=0; i < stylesheets.length; i++) {
186 const stylesheet = stylesheets[i];
187 if(stylesheet.rel !== 'stylesheet') { // for Chrome, which will still FOUC without this check
188 stylesheet.rel = 'stylesheet';
189 }
190 }
191 }
192 const manageTransitions = (selector, allowTransitions) => {
193 const els = window.document.querySelectorAll(selector);
194 for (let i=0; i < els.length; i++) {
195 const el = els[i];
196 if (allowTransitions) {
197 el.classList.remove('notransition');
198 } else {
199 el.classList.add('notransition');
200 }
201 }
202 }
203 const isFileUrl = () => {
204 return window.location.protocol === 'file:';
205 }
206 const hasAlternateSentinel = () => {
207 let styleSentinel = getColorSchemeSentinel();
208 if (styleSentinel !== null) {
209 return styleSentinel === "alternate";
210 } else {
211 return false;
212 }
213 }
214 const setStyleSentinel = (alternate) => {
215 const value = alternate ? "alternate" : "default";
216 if (!isFileUrl()) {
217 window.localStorage.setItem("quarto-color-scheme", value);
218 } else {
219 localAlternateSentinel = value;
220 }
221 }
222 const getColorSchemeSentinel = () => {
223 if (!isFileUrl()) {
224 const storageValue = window.localStorage.getItem("quarto-color-scheme");
225 return storageValue != null ? storageValue : localAlternateSentinel;
226 } else {
227 return localAlternateSentinel;
228 }
229 }
230 const toggleGiscusIfUsed = (isAlternate, darkModeDefault) => {
231 const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light';
232 const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark';
233 let newTheme = '';
234 if(authorPrefersDark) {
235 newTheme = isAlternate ? baseTheme : alternateTheme;
236 } else {
237 newTheme = isAlternate ? alternateTheme : baseTheme;
238 }
239 const changeGiscusTheme = () => {
240 // From: https://github.com/giscus/giscus/issues/336
241 const sendMessage = (message) => {
242 const iframe = document.querySelector('iframe.giscus-frame');
243 if (!iframe) return;
244 iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app');
245 }
246 sendMessage({
247 setConfig: {
248 theme: newTheme
249 }
250 });
251 }
252 const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null;
253 if (isGiscussLoaded) {
254 changeGiscusTheme();
255 }
256 };
257 const authorPrefersDark = false;
258 const darkModeDefault = authorPrefersDark;
259 document.querySelector('link#quarto-text-highlighting-styles.quarto-color-scheme-extra').rel = 'disabled-stylesheet';
260 document.querySelector('link#quarto-bootstrap.quarto-color-scheme-extra').rel = 'disabled-stylesheet';
261 let localAlternateSentinel = darkModeDefault ? 'alternate' : 'default';
262 // Dark / light mode switch
263 window.quartoToggleColorScheme = () => {
264 // Read the current dark / light value
265 let toAlternate = !hasAlternateSentinel();
266 toggleColorMode(toAlternate);
267 setStyleSentinel(toAlternate);
268 toggleGiscusIfUsed(toAlternate, darkModeDefault);
269 window.dispatchEvent(new Event('resize'));
270 };
271 // Switch to dark mode if need be
272 if (hasAlternateSentinel()) {
273 toggleColorMode(true);
274 } else {
275 toggleColorMode(false);
276 }
277 </script>
278
279<div id="quarto-search-results"></div>
280 <header id="quarto-header" class="headroom fixed-top">
281 <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
282 <div class="navbar-container container-fluid">
283 <div class="navbar-brand-container mx-auto">
284 <a class="navbar-brand" href="../index.html">
285 <span class="navbar-title">atdata</span>
286 </a>
287 </div>
288 <div id="quarto-search" class="" title="Search"></div>
289 <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" role="menu" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
290 <span class="navbar-toggler-icon"></span>
291</button>
292 <div class="collapse navbar-collapse" id="navbarCollapse">
293 <ul class="navbar-nav navbar-nav-scroll me-auto">
294 <li class="nav-item">
295 <a class="nav-link active" href="../index.html" aria-current="page">
296<span class="menu-text">Guide</span></a>
297 </li>
298 <li class="nav-item dropdown ">
299 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-tutorials" role="link" data-bs-toggle="dropdown" aria-expanded="false">
300 <span class="menu-text">Tutorials</span>
301 </a>
302 <ul class="dropdown-menu" aria-labelledby="nav-menu-tutorials">
303 <li>
304 <a class="dropdown-item" href="../tutorials/quickstart.html">
305 <span class="dropdown-text">Quick Start</span></a>
306 </li>
307 <li>
308 <a class="dropdown-item" href="../tutorials/local-workflow.html">
309 <span class="dropdown-text">Local Workflow</span></a>
310 </li>
311 <li>
312 <a class="dropdown-item" href="../tutorials/atmosphere.html">
313 <span class="dropdown-text">Atmosphere Publishing</span></a>
314 </li>
315 <li>
316 <a class="dropdown-item" href="../tutorials/promotion.html">
317 <span class="dropdown-text">Promotion Workflow</span></a>
318 </li>
319 </ul>
320 </li>
321 <li class="nav-item dropdown ">
322 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-reference" role="link" data-bs-toggle="dropdown" aria-expanded="false">
323 <span class="menu-text">Reference</span>
324 </a>
325 <ul class="dropdown-menu" aria-labelledby="nav-menu-reference">
326 <li>
327 <a class="dropdown-item" href="../reference/architecture.html">
328 <span class="dropdown-text">Architecture Overview</span></a>
329 </li>
330 <li>
331 <a class="dropdown-item" href="../reference/packable-samples.html">
332 <span class="dropdown-text">Packable Samples</span></a>
333 </li>
334 <li>
335 <a class="dropdown-item" href="../reference/datasets.html">
336 <span class="dropdown-text">Datasets</span></a>
337 </li>
338 <li>
339 <a class="dropdown-item" href="../reference/lenses.html">
340 <span class="dropdown-text">Lenses</span></a>
341 </li>
342 <li>
343 <a class="dropdown-item" href="../reference/local-storage.html">
344 <span class="dropdown-text">Local Storage</span></a>
345 </li>
346 <li>
347 <a class="dropdown-item" href="../reference/atmosphere.html">
348 <span class="dropdown-text">Atmosphere</span></a>
349 </li>
350 <li>
351 <a class="dropdown-item" href="../reference/promotion.html">
352 <span class="dropdown-text">Promotion</span></a>
353 </li>
354 <li>
355 <a class="dropdown-item" href="../reference/load-dataset.html">
356 <span class="dropdown-text">load_dataset API</span></a>
357 </li>
358 <li>
359 <a class="dropdown-item" href="../reference/protocols.html">
360 <span class="dropdown-text">Protocols</span></a>
361 </li>
362 <li>
363 <a class="dropdown-item" href="../reference/uri-spec.html">
364 <span class="dropdown-text">URI Specification</span></a>
365 </li>
366 <li>
367 <a class="dropdown-item" href="../reference/troubleshooting.html">
368 <span class="dropdown-text">Troubleshooting & FAQ</span></a>
369 </li>
370 <li>
371 <a class="dropdown-item" href="../reference/deployment.html">
372 <span class="dropdown-text">Deployment Guide</span></a>
373 </li>
374 </ul>
375 </li>
376 <li class="nav-item">
377 <a class="nav-link" href="../api/index.html">
378<span class="menu-text">API</span></a>
379 </li>
380</ul>
381 <ul class="navbar-nav navbar-nav-scroll ms-auto">
382 <li class="nav-item compact">
383 <a class="nav-link" href="https://github.com/your-org/atdata"> <i class="bi bi-github" role="img">
384</i>
385<span class="menu-text"></span></a>
386 </li>
387</ul>
388 </div> <!-- /navcollapse -->
389 <div class="quarto-navbar-tools">
390 <a href="" class="quarto-color-scheme-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleColorScheme(); return false;" title="Toggle dark mode"><i class="bi"></i></a>
391</div>
392 </div> <!-- /container-fluid -->
393 </nav>
394 <nav class="quarto-secondary-nav">
395 <div class="container-fluid d-flex">
396 <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
397 <i class="bi bi-layout-text-sidebar-reverse"></i>
398 </button>
399 <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../reference/architecture.html">Reference</a></li><li class="breadcrumb-item"><a href="../reference/atmosphere.html">Atmosphere (ATProto Integration)</a></li></ol></nav>
400 <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
401 </a>
402 </div>
403 </nav>
404</header>
405<!-- content -->
406<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
407<!-- sidebar -->
408 <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
409 <div class="sidebar-menu-container">
410 <ul class="list-unstyled mt-1">
411 <li class="sidebar-item">
412 <div class="sidebar-item-container">
413 <a href="../index.html" class="sidebar-item-text sidebar-link">
414 <span class="menu-text">atdata</span></a>
415 </div>
416</li>
417 <li class="sidebar-item sidebar-item-section">
418 <div class="sidebar-item-container">
419 <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
420 <span class="menu-text">Getting Started</span></a>
421 <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
422 <i class="bi bi-chevron-right ms-2"></i>
423 </a>
424 </div>
425 <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
426 <li class="sidebar-item">
427 <div class="sidebar-item-container">
428 <a href="../tutorials/quickstart.html" class="sidebar-item-text sidebar-link">
429 <span class="menu-text">Quick Start</span></a>
430 </div>
431</li>
432 <li class="sidebar-item">
433 <div class="sidebar-item-container">
434 <a href="../tutorials/local-workflow.html" class="sidebar-item-text sidebar-link">
435 <span class="menu-text">Local Workflow</span></a>
436 </div>
437</li>
438 <li class="sidebar-item">
439 <div class="sidebar-item-container">
440 <a href="../tutorials/atmosphere.html" class="sidebar-item-text sidebar-link">
441 <span class="menu-text">Atmosphere Publishing</span></a>
442 </div>
443</li>
444 <li class="sidebar-item">
445 <div class="sidebar-item-container">
446 <a href="../tutorials/promotion.html" class="sidebar-item-text sidebar-link">
447 <span class="menu-text">Promotion Workflow</span></a>
448 </div>
449</li>
450 </ul>
451 </li>
452 <li class="sidebar-item sidebar-item-section">
453 <div class="sidebar-item-container">
454 <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true">
455 <span class="menu-text">Reference</span></a>
456 <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
457 <i class="bi bi-chevron-right ms-2"></i>
458 </a>
459 </div>
460 <ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
461 <li class="sidebar-item">
462 <div class="sidebar-item-container">
463 <a href="../reference/architecture.html" class="sidebar-item-text sidebar-link">
464 <span class="menu-text">Architecture Overview</span></a>
465 </div>
466</li>
467 <li class="sidebar-item">
468 <div class="sidebar-item-container">
469 <a href="../reference/packable-samples.html" class="sidebar-item-text sidebar-link">
470 <span class="menu-text">Packable Samples</span></a>
471 </div>
472</li>
473 <li class="sidebar-item">
474 <div class="sidebar-item-container">
475 <a href="../reference/datasets.html" class="sidebar-item-text sidebar-link">
476 <span class="menu-text">Datasets</span></a>
477 </div>
478</li>
479 <li class="sidebar-item">
480 <div class="sidebar-item-container">
481 <a href="../reference/lenses.html" class="sidebar-item-text sidebar-link">
482 <span class="menu-text">Lenses</span></a>
483 </div>
484</li>
485 <li class="sidebar-item">
486 <div class="sidebar-item-container">
487 <a href="../reference/local-storage.html" class="sidebar-item-text sidebar-link">
488 <span class="menu-text">Local Storage</span></a>
489 </div>
490</li>
491 <li class="sidebar-item">
492 <div class="sidebar-item-container">
493 <a href="../reference/atmosphere.html" class="sidebar-item-text sidebar-link active">
494 <span class="menu-text">Atmosphere (ATProto Integration)</span></a>
495 </div>
496</li>
497 <li class="sidebar-item">
498 <div class="sidebar-item-container">
499 <a href="../reference/promotion.html" class="sidebar-item-text sidebar-link">
500 <span class="menu-text">Promotion Workflow</span></a>
501 </div>
502</li>
503 <li class="sidebar-item">
504 <div class="sidebar-item-container">
505 <a href="../reference/load-dataset.html" class="sidebar-item-text sidebar-link">
506 <span class="menu-text">load_dataset API</span></a>
507 </div>
508</li>
509 <li class="sidebar-item">
510 <div class="sidebar-item-container">
511 <a href="../reference/protocols.html" class="sidebar-item-text sidebar-link">
512 <span class="menu-text">Protocols</span></a>
513 </div>
514</li>
515 <li class="sidebar-item">
516 <div class="sidebar-item-container">
517 <a href="../reference/uri-spec.html" class="sidebar-item-text sidebar-link">
518 <span class="menu-text">URI Specification</span></a>
519 </div>
520</li>
521 <li class="sidebar-item">
522 <div class="sidebar-item-container">
523 <a href="../reference/troubleshooting.html" class="sidebar-item-text sidebar-link">
524 <span class="menu-text">Troubleshooting & FAQ</span></a>
525 </div>
526</li>
527 <li class="sidebar-item">
528 <div class="sidebar-item-container">
529 <a href="../reference/deployment.html" class="sidebar-item-text sidebar-link">
530 <span class="menu-text">Deployment Guide</span></a>
531 </div>
532</li>
533 </ul>
534 </li>
535 </ul>
536 </div>
537</nav>
538<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
539<!-- margin-sidebar -->
540 <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
541 <nav id="TOC" role="doc-toc" class="toc-active">
542 <h2 id="toc-title">On this page</h2>
543
544 <ul>
545 <li><a href="#installation" id="toc-installation" class="nav-link active" data-scroll-target="#installation">Installation</a></li>
546 <li><a href="#overview" id="toc-overview" class="nav-link" data-scroll-target="#overview">Overview</a></li>
547 <li><a href="#atmosphereclient" id="toc-atmosphereclient" class="nav-link" data-scroll-target="#atmosphereclient">AtmosphereClient</a>
548 <ul class="collapse">
549 <li><a href="#session-management" id="toc-session-management" class="nav-link" data-scroll-target="#session-management">Session Management</a></li>
550 <li><a href="#custom-pds" id="toc-custom-pds" class="nav-link" data-scroll-target="#custom-pds">Custom PDS</a></li>
551 </ul></li>
552 <li><a href="#pdsblobstore" id="toc-pdsblobstore" class="nav-link" data-scroll-target="#pdsblobstore">PDSBlobStore</a>
553 <ul class="collapse">
554 <li><a href="#size-limits" id="toc-size-limits" class="nav-link" data-scroll-target="#size-limits">Size Limits</a></li>
555 </ul></li>
556 <li><a href="#blobsource" id="toc-blobsource" class="nav-link" data-scroll-target="#blobsource">BlobSource</a></li>
557 <li><a href="#atmosphereindex" id="toc-atmosphereindex" class="nav-link" data-scroll-target="#atmosphereindex">AtmosphereIndex</a>
558 <ul class="collapse">
559 <li><a href="#publishing-schemas" id="toc-publishing-schemas" class="nav-link" data-scroll-target="#publishing-schemas">Publishing Schemas</a></li>
560 <li><a href="#publishing-datasets" id="toc-publishing-datasets" class="nav-link" data-scroll-target="#publishing-datasets">Publishing Datasets</a></li>
561 <li><a href="#listing-and-retrieving" id="toc-listing-and-retrieving" class="nav-link" data-scroll-target="#listing-and-retrieving">Listing and Retrieving</a></li>
562 </ul></li>
563 <li><a href="#lower-level-publishers" id="toc-lower-level-publishers" class="nav-link" data-scroll-target="#lower-level-publishers">Lower-Level Publishers</a>
564 <ul class="collapse">
565 <li><a href="#schemapublisher" id="toc-schemapublisher" class="nav-link" data-scroll-target="#schemapublisher">SchemaPublisher</a></li>
566 <li><a href="#datasetpublisher" id="toc-datasetpublisher" class="nav-link" data-scroll-target="#datasetpublisher">DatasetPublisher</a></li>
567 <li><a href="#lenspublisher" id="toc-lenspublisher" class="nav-link" data-scroll-target="#lenspublisher">LensPublisher</a></li>
568 </ul></li>
569 <li><a href="#lower-level-loaders" id="toc-lower-level-loaders" class="nav-link" data-scroll-target="#lower-level-loaders">Lower-Level Loaders</a>
570 <ul class="collapse">
571 <li><a href="#schemaloader" id="toc-schemaloader" class="nav-link" data-scroll-target="#schemaloader">SchemaLoader</a></li>
572 <li><a href="#datasetloader" id="toc-datasetloader" class="nav-link" data-scroll-target="#datasetloader">DatasetLoader</a></li>
573 <li><a href="#lensloader" id="toc-lensloader" class="nav-link" data-scroll-target="#lensloader">LensLoader</a></li>
574 </ul></li>
575 <li><a href="#at-uris" id="toc-at-uris" class="nav-link" data-scroll-target="#at-uris">AT URIs</a></li>
576 <li><a href="#supported-field-types" id="toc-supported-field-types" class="nav-link" data-scroll-target="#supported-field-types">Supported Field Types</a></li>
577 <li><a href="#complete-example" id="toc-complete-example" class="nav-link" data-scroll-target="#complete-example">Complete Example</a></li>
578 <li><a href="#related" id="toc-related" class="nav-link" data-scroll-target="#related">Related</a></li>
579 </ul>
580<div class="toc-actions"><ul><li><a href="https://github.com/your-org/atdata/edit/main/reference/atmosphere.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav>
581 </div>
582<!-- main -->
583<main class="content" id="quarto-document-content">
584
585
586<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../reference/architecture.html">Reference</a></li><li class="breadcrumb-item"><a href="../reference/atmosphere.html">Atmosphere (ATProto Integration)</a></li></ol></nav>
587<div class="quarto-title">
588<h1 class="title">Atmosphere (ATProto Integration)</h1>
589</div>
590
591<div>
592 <div class="description">
593 Publishing and discovering datasets on the AT Protocol network
594 </div>
595</div>
596
597
598<div class="quarto-title-meta">
599
600
601
602
603 </div>
604
605
606
607</header>
608
609
610<p>The atmosphere module enables publishing and discovering datasets on the ATProto network, creating a federated ecosystem for typed datasets.</p>
611<section id="installation" class="level2">
612<h2 class="anchored" data-anchor-id="installation">Installation</h2>
613<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install atdata<span class="pp">[</span><span class="ss">atmosphere</span><span class="pp">]</span></span>
614<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="co"># or</span></span>
615<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install atproto</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
616</section>
617<section id="overview" class="level2">
618<h2 class="anchored" data-anchor-id="overview">Overview</h2>
619<p>ATProto integration publishes datasets, schemas, and lenses as records in the <code>ac.foundation.dataset.*</code> namespace. This enables:</p>
620<ul>
621<li><strong>Discovery</strong> through the ATProto network</li>
622<li><strong>Federation</strong> across different hosts</li>
623<li><strong>Verifiability</strong> through content-addressable records</li>
624</ul>
625</section>
626<section id="atmosphereclient" class="level2">
627<h2 class="anchored" data-anchor-id="atmosphereclient">AtmosphereClient</h2>
628<p>The client handles authentication and record operations:</p>
629<div id="a55b5480" class="cell">
630<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient</span>
631<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a></span>
632<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span>
633<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a></span>
634<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Login with app-specific password (not your main password!)</span></span>
635<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"alice.bsky.social"</span>, <span class="st">"app-password"</span>)</span>
636<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a></span>
637<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(client.did) <span class="co"># 'did:plc:...'</span></span>
638<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(client.handle) <span class="co"># 'alice.bsky.social'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
639</div>
640<div class="callout callout-style-default callout-warning callout-titled">
641<div class="callout-header d-flex align-content-center">
642<div class="callout-icon-container">
643<i class="callout-icon"></i>
644</div>
645<div class="callout-title-container flex-fill">
646Warning
647</div>
648</div>
649<div class="callout-body-container callout-body">
650<p>Always use an app-specific password, not your main Bluesky password. Create app passwords at <a href="https://bsky.app/settings/app-passwords">bsky.app/settings/app-passwords</a>.</p>
651</div>
652</div>
653<section id="session-management" class="level3">
654<h3 class="anchored" data-anchor-id="session-management">Session Management</h3>
655<p>Save and restore sessions to avoid re-authentication:</p>
656<div id="546ad519" class="cell">
657<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Export session for later</span></span>
658<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>session_string <span class="op">=</span> client.export_session()</span>
659<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span>
660<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Later: restore session</span></span>
661<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>new_client <span class="op">=</span> AtmosphereClient()</span>
662<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>new_client.login_with_session(session_string)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
663</div>
664</section>
665<section id="custom-pds" class="level3">
666<h3 class="anchored" data-anchor-id="custom-pds">Custom PDS</h3>
667<p>Connect to a custom PDS instead of bsky.social:</p>
668<div id="02895206" class="cell">
669<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient(base_url<span class="op">=</span><span class="st">"https://pds.example.com"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
670</div>
671</section>
672</section>
673<section id="pdsblobstore" class="level2">
674<h2 class="anchored" data-anchor-id="pdsblobstore">PDSBlobStore</h2>
675<p>Store dataset shards as ATProto blobs for fully decentralized storage:</p>
676<div id="ddcafefe" class="cell">
677<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient, PDSBlobStore</span>
678<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span>
679<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span>
680<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"handle.bsky.social"</span>, <span class="st">"app-password"</span>)</span>
681<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a></span>
682<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> PDSBlobStore(client)</span>
683<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a></span>
684<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Write shards as blobs</span></span>
685<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a>urls <span class="op">=</span> store.write_shards(dataset, prefix<span class="op">=</span><span class="st">"my-data/v1"</span>)</span>
686<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Returns: ['at://did:plc:.../blob/bafyrei...', ...]</span></span>
687<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a></span>
688<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Transform AT URIs to HTTP URLs for reading</span></span>
689<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a>http_url <span class="op">=</span> store.read_url(urls[<span class="dv">0</span>])</span>
690<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Returns: 'https://pds.example.com/xrpc/com.atproto.sync.getBlob?...'</span></span>
691<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a></span>
692<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a BlobSource for streaming</span></span>
693<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> store.create_source(urls)</span>
694<span id="cb5-18"><a href="#cb5-18" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> atdata.Dataset[MySample](source)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
695</div>
696<section id="size-limits" class="level3">
697<h3 class="anchored" data-anchor-id="size-limits">Size Limits</h3>
698<p>PDS blobs typically have size limits (often 50MB-5GB depending on the PDS). Use <code>maxcount</code> and <code>maxsize</code> parameters to control shard sizes:</p>
699<div id="8d86e8e3" class="cell">
700<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>urls <span class="op">=</span> store.write_shards(</span>
701<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a> dataset,</span>
702<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> prefix<span class="op">=</span><span class="st">"large-data/v1"</span>,</span>
703<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> maxcount<span class="op">=</span><span class="dv">5000</span>, <span class="co"># Max 5000 samples per shard</span></span>
704<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> maxsize<span class="op">=</span><span class="fl">50e6</span>, <span class="co"># Max 50MB per shard</span></span>
705<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
706</div>
707</section>
708</section>
709<section id="blobsource" class="level2">
710<h2 class="anchored" data-anchor-id="blobsource">BlobSource</h2>
711<p>Read datasets stored as PDS blobs:</p>
712<div id="bc781aa8" class="cell">
713<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata <span class="im">import</span> BlobSource</span>
714<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a></span>
715<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="co"># From blob references</span></span>
716<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> BlobSource.from_refs([</span>
717<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a> {<span class="st">"did"</span>: <span class="st">"did:plc:abc123"</span>, <span class="st">"cid"</span>: <span class="st">"bafyrei111"</span>},</span>
718<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a> {<span class="st">"did"</span>: <span class="st">"did:plc:abc123"</span>, <span class="st">"cid"</span>: <span class="st">"bafyrei222"</span>},</span>
719<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>])</span>
720<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a></span>
721<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Or from PDSBlobStore</span></span>
722<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> store.create_source(urls)</span>
723<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a></span>
724<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Use with Dataset</span></span>
725<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> atdata.Dataset[MySample](source)</span>
726<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> ds.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span>
727<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a> process(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
728</div>
729</section>
730<section id="atmosphereindex" class="level2">
731<h2 class="anchored" data-anchor-id="atmosphereindex">AtmosphereIndex</h2>
732<p>The unified interface for ATProto operations, implementing the AbstractIndex protocol:</p>
733<div id="5a53b688" class="cell">
734<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient, AtmosphereIndex, PDSBlobStore</span>
735<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span>
736<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span>
737<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"handle.bsky.social"</span>, <span class="st">"app-password"</span>)</span>
738<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a></span>
739<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Without blob storage (use external URLs)</span></span>
740<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> AtmosphereIndex(client)</span>
741<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a></span>
742<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a><span class="co"># With PDS blob storage (recommended for full decentralization)</span></span>
743<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> PDSBlobStore(client)</span>
744<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> AtmosphereIndex(client, data_store<span class="op">=</span>store)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
745</div>
746<section id="publishing-schemas" class="level3">
747<h3 class="anchored" data-anchor-id="publishing-schemas">Publishing Schemas</h3>
748<div id="582e7fb0" class="cell">
749<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span>
750<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span>
751<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a></span>
752<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span>
753<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> ImageSample:</span>
754<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a> image: NDArray</span>
755<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">str</span></span>
756<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a> confidence: <span class="bu">float</span></span>
757<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a></span>
758<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Publish schema</span></span>
759<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a>schema_uri <span class="op">=</span> index.publish_schema(</span>
760<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a> ImageSample,</span>
761<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a> version<span class="op">=</span><span class="st">"1.0.0"</span>,</span>
762<span id="cb9-14"><a href="#cb9-14" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Image classification sample"</span>,</span>
763<span id="cb9-15"><a href="#cb9-15" aria-hidden="true" tabindex="-1"></a>)</span>
764<span id="cb9-16"><a href="#cb9-16" aria-hidden="true" tabindex="-1"></a><span class="co"># Returns: "at://did:plc:.../ac.foundation.dataset.sampleSchema/..."</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
765</div>
766</section>
767<section id="publishing-datasets" class="level3">
768<h3 class="anchored" data-anchor-id="publishing-datasets">Publishing Datasets</h3>
769<div id="f658a5cc" class="cell">
770<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"data-{000000..000009}.tar"</span>)</span>
771<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span>
772<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(</span>
773<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a> dataset,</span>
774<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"imagenet-subset"</span>,</span>
775<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a> schema_ref<span class="op">=</span>schema_uri, <span class="co"># Optional - auto-publishes if omitted</span></span>
776<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"ImageNet subset"</span>,</span>
777<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a> tags<span class="op">=</span>[<span class="st">"images"</span>, <span class="st">"classification"</span>],</span>
778<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a> license<span class="op">=</span><span class="st">"MIT"</span>,</span>
779<span id="cb10-10"><a href="#cb10-10" aria-hidden="true" tabindex="-1"></a>)</span>
780<span id="cb10-11"><a href="#cb10-11" aria-hidden="true" tabindex="-1"></a></span>
781<span id="cb10-12"><a href="#cb10-12" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(entry.uri) <span class="co"># AT URI of the record</span></span>
782<span id="cb10-13"><a href="#cb10-13" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(entry.data_urls) <span class="co"># WebDataset URLs</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
783</div>
784</section>
785<section id="listing-and-retrieving" class="level3">
786<h3 class="anchored" data-anchor-id="listing-and-retrieving">Listing and Retrieving</h3>
787<div id="bc1fd369" class="cell">
788<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># List your datasets</span></span>
789<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> entry <span class="kw">in</span> index.list_datasets():</span>
790<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>entry<span class="sc">.</span>name<span class="sc">}</span><span class="ss">: </span><span class="sc">{</span>entry<span class="sc">.</span>schema_ref<span class="sc">}</span><span class="ss">"</span>)</span>
791<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a></span>
792<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a><span class="co"># List from another user</span></span>
793<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> entry <span class="kw">in</span> index.list_datasets(repo<span class="op">=</span><span class="st">"did:plc:other-user"</span>):</span>
794<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(entry.name)</span>
795<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a></span>
796<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Get specific dataset</span></span>
797<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.get_dataset(<span class="st">"at://did:plc:.../ac.foundation.dataset.record/..."</span>)</span>
798<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a></span>
799<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a><span class="co"># List schemas</span></span>
800<span id="cb11-13"><a href="#cb11-13" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> schema <span class="kw">in</span> index.list_schemas():</span>
801<span id="cb11-14"><a href="#cb11-14" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>schema[<span class="st">'name'</span>]<span class="sc">}</span><span class="ss"> v</span><span class="sc">{</span>schema[<span class="st">'version'</span>]<span class="sc">}</span><span class="ss">"</span>)</span>
802<span id="cb11-15"><a href="#cb11-15" aria-hidden="true" tabindex="-1"></a></span>
803<span id="cb11-16"><a href="#cb11-16" aria-hidden="true" tabindex="-1"></a><span class="co"># Decode schema to Python type</span></span>
804<span id="cb11-17"><a href="#cb11-17" aria-hidden="true" tabindex="-1"></a>SampleType <span class="op">=</span> index.decode_schema(schema_uri)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
805</div>
806</section>
807</section>
808<section id="lower-level-publishers" class="level2">
809<h2 class="anchored" data-anchor-id="lower-level-publishers">Lower-Level Publishers</h2>
810<p>For more control, use the individual publisher classes:</p>
811<section id="schemapublisher" class="level3">
812<h3 class="anchored" data-anchor-id="schemapublisher">SchemaPublisher</h3>
813<div id="b4deca81" class="cell">
814<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> SchemaPublisher</span>
815<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a></span>
816<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>publisher <span class="op">=</span> SchemaPublisher(client)</span>
817<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a></span>
818<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> publisher.publish(</span>
819<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a> ImageSample,</span>
820<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"ImageSample"</span>,</span>
821<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a> version<span class="op">=</span><span class="st">"1.0.0"</span>,</span>
822<span id="cb12-9"><a href="#cb12-9" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Image with label"</span>,</span>
823<span id="cb12-10"><a href="#cb12-10" aria-hidden="true" tabindex="-1"></a> metadata<span class="op">=</span>{<span class="st">"source"</span>: <span class="st">"training"</span>},</span>
824<span id="cb12-11"><a href="#cb12-11" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
825</div>
826</section>
827<section id="datasetpublisher" class="level3">
828<h3 class="anchored" data-anchor-id="datasetpublisher">DatasetPublisher</h3>
829<div id="626a0256" class="cell">
830<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> DatasetPublisher</span>
831<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a></span>
832<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>publisher <span class="op">=</span> DatasetPublisher(client)</span>
833<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a></span>
834<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> publisher.publish(</span>
835<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a> dataset,</span>
836<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"training-images"</span>,</span>
837<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a> schema_uri<span class="op">=</span>schema_uri, <span class="co"># Required if auto_publish_schema=False</span></span>
838<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a> auto_publish_schema<span class="op">=</span><span class="va">True</span>, <span class="co"># Publish schema automatically</span></span>
839<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Training images"</span>,</span>
840<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a> tags<span class="op">=</span>[<span class="st">"training"</span>, <span class="st">"images"</span>],</span>
841<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a> license<span class="op">=</span><span class="st">"MIT"</span>,</span>
842<span id="cb13-13"><a href="#cb13-13" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
843</div>
844<section id="blob-storage" class="level4">
845<h4 class="anchored" data-anchor-id="blob-storage">Blob Storage</h4>
846<p>There are two approaches to storing data as ATProto blobs:</p>
847<p><strong>Approach 1: PDSBlobStore (Recommended)</strong></p>
848<p>Use <code>PDSBlobStore</code> with <code>AtmosphereIndex</code> for automatic shard management:</p>
849<div id="ff58eaa6" class="cell">
850<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> PDSBlobStore, AtmosphereIndex</span>
851<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a></span>
852<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> PDSBlobStore(client)</span>
853<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> AtmosphereIndex(client, data_store<span class="op">=</span>store)</span>
854<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a></span>
855<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Dataset shards are automatically uploaded as blobs</span></span>
856<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(</span>
857<span id="cb14-8"><a href="#cb14-8" aria-hidden="true" tabindex="-1"></a> dataset,</span>
858<span id="cb14-9"><a href="#cb14-9" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"my-dataset"</span>,</span>
859<span id="cb14-10"><a href="#cb14-10" aria-hidden="true" tabindex="-1"></a> schema_ref<span class="op">=</span>schema_uri,</span>
860<span id="cb14-11"><a href="#cb14-11" aria-hidden="true" tabindex="-1"></a>)</span>
861<span id="cb14-12"><a href="#cb14-12" aria-hidden="true" tabindex="-1"></a></span>
862<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Later: load using BlobSource</span></span>
863<span id="cb14-14"><a href="#cb14-14" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> store.create_source(entry.data_urls)</span>
864<span id="cb14-15"><a href="#cb14-15" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> atdata.Dataset[MySample](source)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
865</div>
866<p><strong>Approach 2: Manual Blob Publishing</strong></p>
867<p>For more control, use <code>DatasetPublisher.publish_with_blobs()</code> directly:</p>
868<div id="46c8ca86" class="cell">
869<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> io</span>
870<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span>
871<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a></span>
872<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Create tar data in memory</span></span>
873<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a>tar_buffer <span class="op">=</span> io.BytesIO()</span>
874<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> wds.writer.TarWriter(tar_buffer) <span class="im">as</span> sink:</span>
875<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i, sample <span class="kw">in</span> <span class="bu">enumerate</span>(samples):</span>
876<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a> sink.write({<span class="op">**</span>sample.as_wds, <span class="st">"__key__"</span>: <span class="ss">f"</span><span class="sc">{</span>i<span class="sc">:06d}</span><span class="ss">"</span>})</span>
877<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a></span>
878<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Publish with blob storage</span></span>
879<span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> publisher.publish_with_blobs(</span>
880<span id="cb15-12"><a href="#cb15-12" aria-hidden="true" tabindex="-1"></a> blobs<span class="op">=</span>[tar_buffer.getvalue()],</span>
881<span id="cb15-13"><a href="#cb15-13" aria-hidden="true" tabindex="-1"></a> schema_uri<span class="op">=</span>schema_uri,</span>
882<span id="cb15-14"><a href="#cb15-14" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"small-dataset"</span>,</span>
883<span id="cb15-15"><a href="#cb15-15" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Dataset stored in ATProto blobs"</span>,</span>
884<span id="cb15-16"><a href="#cb15-16" aria-hidden="true" tabindex="-1"></a> tags<span class="op">=</span>[<span class="st">"small"</span>, <span class="st">"demo"</span>],</span>
885<span id="cb15-17"><a href="#cb15-17" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
886</div>
887<p><strong>Loading Blob-Stored Datasets</strong></p>
888<div id="01f518ca" class="cell">
889<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> DatasetLoader</span>
890<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata <span class="im">import</span> BlobSource</span>
891<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span>
892<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>loader <span class="op">=</span> DatasetLoader(client)</span>
893<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a></span>
894<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Check storage type</span></span>
895<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a>storage_type <span class="op">=</span> loader.get_storage_type(uri) <span class="co"># "external" or "blobs"</span></span>
896<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a></span>
897<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> storage_type <span class="op">==</span> <span class="st">"blobs"</span>:</span>
898<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a> <span class="co"># Get blob URLs and create BlobSource</span></span>
899<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a> blob_urls <span class="op">=</span> loader.get_blob_urls(uri)</span>
900<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a> <span class="co"># Parse to blob refs for BlobSource</span></span>
901<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a> <span class="co"># Or use loader.to_dataset() which handles this automatically</span></span>
902<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a></span>
903<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a><span class="co"># to_dataset() handles both storage types automatically</span></span>
904<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> loader.to_dataset(uri, MySample)</span>
905<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span>
906<span id="cb16-18"><a href="#cb16-18" aria-hidden="true" tabindex="-1"></a> process(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
907</div>
908</section>
909</section>
910<section id="lenspublisher" class="level3">
911<h3 class="anchored" data-anchor-id="lenspublisher">LensPublisher</h3>
912<div id="7733653d" class="cell">
913<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> LensPublisher</span>
914<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a></span>
915<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>publisher <span class="op">=</span> LensPublisher(client)</span>
916<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a></span>
917<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a><span class="co"># With code references</span></span>
918<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> publisher.publish(</span>
919<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"simplify"</span>,</span>
920<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a> source_schema<span class="op">=</span>full_schema_uri,</span>
921<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a> target_schema<span class="op">=</span>simple_schema_uri,</span>
922<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Extract label only"</span>,</span>
923<span id="cb17-11"><a href="#cb17-11" aria-hidden="true" tabindex="-1"></a> getter_code<span class="op">=</span>{</span>
924<span id="cb17-12"><a href="#cb17-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"repository"</span>: <span class="st">"https://github.com/org/repo"</span>,</span>
925<span id="cb17-13"><a href="#cb17-13" aria-hidden="true" tabindex="-1"></a> <span class="st">"commit"</span>: <span class="st">"abc123def..."</span>,</span>
926<span id="cb17-14"><a href="#cb17-14" aria-hidden="true" tabindex="-1"></a> <span class="st">"path"</span>: <span class="st">"transforms/simplify.py:simplify_getter"</span>,</span>
927<span id="cb17-15"><a href="#cb17-15" aria-hidden="true" tabindex="-1"></a> },</span>
928<span id="cb17-16"><a href="#cb17-16" aria-hidden="true" tabindex="-1"></a> putter_code<span class="op">=</span>{</span>
929<span id="cb17-17"><a href="#cb17-17" aria-hidden="true" tabindex="-1"></a> <span class="st">"repository"</span>: <span class="st">"https://github.com/org/repo"</span>,</span>
930<span id="cb17-18"><a href="#cb17-18" aria-hidden="true" tabindex="-1"></a> <span class="st">"commit"</span>: <span class="st">"abc123def..."</span>,</span>
931<span id="cb17-19"><a href="#cb17-19" aria-hidden="true" tabindex="-1"></a> <span class="st">"path"</span>: <span class="st">"transforms/simplify.py:simplify_putter"</span>,</span>
932<span id="cb17-20"><a href="#cb17-20" aria-hidden="true" tabindex="-1"></a> },</span>
933<span id="cb17-21"><a href="#cb17-21" aria-hidden="true" tabindex="-1"></a>)</span>
934<span id="cb17-22"><a href="#cb17-22" aria-hidden="true" tabindex="-1"></a></span>
935<span id="cb17-23"><a href="#cb17-23" aria-hidden="true" tabindex="-1"></a><span class="co"># Or publish from a Lens object</span></span>
936<span id="cb17-24"><a href="#cb17-24" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.lens <span class="im">import</span> lens</span>
937<span id="cb17-25"><a href="#cb17-25" aria-hidden="true" tabindex="-1"></a></span>
938<span id="cb17-26"><a href="#cb17-26" aria-hidden="true" tabindex="-1"></a><span class="at">@lens</span></span>
939<span id="cb17-27"><a href="#cb17-27" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> simplify(src: FullSample) <span class="op">-></span> SimpleSample:</span>
940<span id="cb17-28"><a href="#cb17-28" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> SimpleSample(label<span class="op">=</span>src.label)</span>
941<span id="cb17-29"><a href="#cb17-29" aria-hidden="true" tabindex="-1"></a></span>
942<span id="cb17-30"><a href="#cb17-30" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> publisher.publish_from_lens(</span>
943<span id="cb17-31"><a href="#cb17-31" aria-hidden="true" tabindex="-1"></a> simplify,</span>
944<span id="cb17-32"><a href="#cb17-32" aria-hidden="true" tabindex="-1"></a> source_schema<span class="op">=</span>full_schema_uri,</span>
945<span id="cb17-33"><a href="#cb17-33" aria-hidden="true" tabindex="-1"></a> target_schema<span class="op">=</span>simple_schema_uri,</span>
946<span id="cb17-34"><a href="#cb17-34" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
947</div>
948</section>
949</section>
950<section id="lower-level-loaders" class="level2">
951<h2 class="anchored" data-anchor-id="lower-level-loaders">Lower-Level Loaders</h2>
952<p>For direct access to records, use the loader classes:</p>
953<section id="schemaloader" class="level3">
954<h3 class="anchored" data-anchor-id="schemaloader">SchemaLoader</h3>
955<div id="7ea59338" class="cell">
956<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> SchemaLoader</span>
957<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a></span>
958<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>loader <span class="op">=</span> SchemaLoader(client)</span>
959<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a></span>
960<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Get a specific schema</span></span>
961<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a>schema <span class="op">=</span> loader.get(<span class="st">"at://did:plc:abc/ac.foundation.dataset.sampleSchema/xyz"</span>)</span>
962<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(schema[<span class="st">"name"</span>], schema[<span class="st">"version"</span>])</span>
963<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a></span>
964<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a><span class="co"># List all schemas from a repository</span></span>
965<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> schema <span class="kw">in</span> loader.list_all(repo<span class="op">=</span><span class="st">"did:plc:other-user"</span>):</span>
966<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(schema[<span class="st">"name"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
967</div>
968</section>
969<section id="datasetloader" class="level3">
970<h3 class="anchored" data-anchor-id="datasetloader">DatasetLoader</h3>
971<div id="18fc9b5a" class="cell">
972<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> DatasetLoader</span>
973<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a></span>
974<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>loader <span class="op">=</span> DatasetLoader(client)</span>
975<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a></span>
976<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Get a specific dataset record</span></span>
977<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a>record <span class="op">=</span> loader.get(<span class="st">"at://did:plc:abc/ac.foundation.dataset.record/xyz"</span>)</span>
978<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a></span>
979<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Check storage type</span></span>
980<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a>storage_type <span class="op">=</span> loader.get_storage_type(uri) <span class="co"># "external" or "blobs"</span></span>
981<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a></span>
982<span id="cb19-11"><a href="#cb19-11" aria-hidden="true" tabindex="-1"></a><span class="co"># Get URLs based on storage type</span></span>
983<span id="cb19-12"><a href="#cb19-12" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> storage_type <span class="op">==</span> <span class="st">"external"</span>:</span>
984<span id="cb19-13"><a href="#cb19-13" aria-hidden="true" tabindex="-1"></a> urls <span class="op">=</span> loader.get_urls(uri)</span>
985<span id="cb19-14"><a href="#cb19-14" aria-hidden="true" tabindex="-1"></a><span class="cf">else</span>:</span>
986<span id="cb19-15"><a href="#cb19-15" aria-hidden="true" tabindex="-1"></a> urls <span class="op">=</span> loader.get_blob_urls(uri)</span>
987<span id="cb19-16"><a href="#cb19-16" aria-hidden="true" tabindex="-1"></a></span>
988<span id="cb19-17"><a href="#cb19-17" aria-hidden="true" tabindex="-1"></a><span class="co"># Get metadata</span></span>
989<span id="cb19-18"><a href="#cb19-18" aria-hidden="true" tabindex="-1"></a>metadata <span class="op">=</span> loader.get_metadata(uri)</span>
990<span id="cb19-19"><a href="#cb19-19" aria-hidden="true" tabindex="-1"></a></span>
991<span id="cb19-20"><a href="#cb19-20" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a Dataset object directly</span></span>
992<span id="cb19-21"><a href="#cb19-21" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> loader.to_dataset(uri, MySampleType)</span>
993<span id="cb19-22"><a href="#cb19-22" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span>
994<span id="cb19-23"><a href="#cb19-23" aria-hidden="true" tabindex="-1"></a> process(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
995</div>
996</section>
997<section id="lensloader" class="level3">
998<h3 class="anchored" data-anchor-id="lensloader">LensLoader</h3>
999<div id="fa26f4fb" class="cell">
1000<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> LensLoader</span>
1001<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a></span>
1002<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>loader <span class="op">=</span> LensLoader(client)</span>
1003<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a></span>
1004<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Get a specific lens record</span></span>
1005<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a>lens <span class="op">=</span> loader.get(<span class="st">"at://did:plc:abc/ac.foundation.dataset.lens/xyz"</span>)</span>
1006<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(lens[<span class="st">"name"</span>])</span>
1007<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(lens[<span class="st">"sourceSchema"</span>], <span class="st">"->"</span>, lens[<span class="st">"targetSchema"</span>])</span>
1008<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a></span>
1009<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a><span class="co"># List all lenses from a repository</span></span>
1010<span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> lens <span class="kw">in</span> loader.list_all():</span>
1011<span id="cb20-12"><a href="#cb20-12" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(lens[<span class="st">"name"</span>])</span>
1012<span id="cb20-13"><a href="#cb20-13" aria-hidden="true" tabindex="-1"></a></span>
1013<span id="cb20-14"><a href="#cb20-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Find lenses by schema</span></span>
1014<span id="cb20-15"><a href="#cb20-15" aria-hidden="true" tabindex="-1"></a>lenses <span class="op">=</span> loader.find_by_schemas(</span>
1015<span id="cb20-16"><a href="#cb20-16" aria-hidden="true" tabindex="-1"></a> source_schema_uri<span class="op">=</span><span class="st">"at://did:plc:abc/ac.foundation.dataset.sampleSchema/source"</span>,</span>
1016<span id="cb20-17"><a href="#cb20-17" aria-hidden="true" tabindex="-1"></a> target_schema_uri<span class="op">=</span><span class="st">"at://did:plc:abc/ac.foundation.dataset.sampleSchema/target"</span>,</span>
1017<span id="cb20-18"><a href="#cb20-18" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
1018</div>
1019</section>
1020</section>
1021<section id="at-uris" class="level2">
1022<h2 class="anchored" data-anchor-id="at-uris">AT URIs</h2>
1023<p>ATProto records are identified by AT URIs:</p>
1024<div id="892f4f44" class="cell">
1025<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtUri</span>
1026<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a></span>
1027<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Parse an AT URI</span></span>
1028<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a>uri <span class="op">=</span> AtUri.parse(<span class="st">"at://did:plc:abc123/ac.foundation.dataset.sampleSchema/xyz"</span>)</span>
1029<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a></span>
1030<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(uri.authority) <span class="co"># 'did:plc:abc123'</span></span>
1031<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(uri.collection) <span class="co"># 'ac.foundation.dataset.sampleSchema'</span></span>
1032<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(uri.rkey) <span class="co"># 'xyz'</span></span>
1033<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a></span>
1034<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Format back to string</span></span>
1035<span id="cb21-11"><a href="#cb21-11" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="bu">str</span>(uri)) <span class="co"># 'at://did:plc:abc123/ac.foundation.dataset.sampleSchema/xyz'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
1036</div>
1037</section>
1038<section id="supported-field-types" class="level2">
1039<h2 class="anchored" data-anchor-id="supported-field-types">Supported Field Types</h2>
1040<p>Schemas support these field types:</p>
1041<table class="caption-top table">
1042<thead>
1043<tr class="header">
1044<th>Python Type</th>
1045<th>ATProto Type</th>
1046</tr>
1047</thead>
1048<tbody>
1049<tr class="odd">
1050<td><code>str</code></td>
1051<td><code>primitive/str</code></td>
1052</tr>
1053<tr class="even">
1054<td><code>int</code></td>
1055<td><code>primitive/int</code></td>
1056</tr>
1057<tr class="odd">
1058<td><code>float</code></td>
1059<td><code>primitive/float</code></td>
1060</tr>
1061<tr class="even">
1062<td><code>bool</code></td>
1063<td><code>primitive/bool</code></td>
1064</tr>
1065<tr class="odd">
1066<td><code>bytes</code></td>
1067<td><code>primitive/bytes</code></td>
1068</tr>
1069<tr class="even">
1070<td><code>NDArray</code></td>
1071<td><code>ndarray</code> (default dtype: float32)</td>
1072</tr>
1073<tr class="odd">
1074<td><code>NDArray[np.float64]</code></td>
1075<td><code>ndarray</code> (dtype: float64)</td>
1076</tr>
1077<tr class="even">
1078<td><code>list[str]</code></td>
1079<td><code>array</code> with items</td>
1080</tr>
1081<tr class="odd">
1082<td><code>T \| None</code></td>
1083<td>Optional field</td>
1084</tr>
1085</tbody>
1086</table>
1087</section>
1088<section id="complete-example" class="level2">
1089<h2 class="anchored" data-anchor-id="complete-example">Complete Example</h2>
1090<p>This example shows the full workflow using <code>PDSBlobStore</code> for decentralized storage:</p>
1091<div id="8ea58305" class="cell">
1092<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
1093<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span>
1094<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span>
1095<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient, AtmosphereIndex, PDSBlobStore</span>
1096<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span>
1097<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a></span>
1098<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Define and create samples</span></span>
1099<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span>
1100<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> FeatureSample:</span>
1101<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a> features: NDArray</span>
1102<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">int</span></span>
1103<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a> source: <span class="bu">str</span></span>
1104<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a></span>
1105<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a>samples <span class="op">=</span> [</span>
1106<span id="cb22-15"><a href="#cb22-15" aria-hidden="true" tabindex="-1"></a> FeatureSample(</span>
1107<span id="cb22-16"><a href="#cb22-16" aria-hidden="true" tabindex="-1"></a> features<span class="op">=</span>np.random.randn(<span class="dv">128</span>).astype(np.float32),</span>
1108<span id="cb22-17"><a href="#cb22-17" aria-hidden="true" tabindex="-1"></a> label<span class="op">=</span>i <span class="op">%</span> <span class="dv">10</span>,</span>
1109<span id="cb22-18"><a href="#cb22-18" aria-hidden="true" tabindex="-1"></a> source<span class="op">=</span><span class="st">"synthetic"</span>,</span>
1110<span id="cb22-19"><a href="#cb22-19" aria-hidden="true" tabindex="-1"></a> )</span>
1111<span id="cb22-20"><a href="#cb22-20" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">1000</span>)</span>
1112<span id="cb22-21"><a href="#cb22-21" aria-hidden="true" tabindex="-1"></a>]</span>
1113<span id="cb22-22"><a href="#cb22-22" aria-hidden="true" tabindex="-1"></a></span>
1114<span id="cb22-23"><a href="#cb22-23" aria-hidden="true" tabindex="-1"></a><span class="co"># 2. Write to tar</span></span>
1115<span id="cb22-24"><a href="#cb22-24" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> wds.writer.TarWriter(<span class="st">"features.tar"</span>) <span class="im">as</span> sink:</span>
1116<span id="cb22-25"><a href="#cb22-25" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i, s <span class="kw">in</span> <span class="bu">enumerate</span>(samples):</span>
1117<span id="cb22-26"><a href="#cb22-26" aria-hidden="true" tabindex="-1"></a> sink.write({<span class="op">**</span>s.as_wds, <span class="st">"__key__"</span>: <span class="ss">f"</span><span class="sc">{</span>i<span class="sc">:06d}</span><span class="ss">"</span>})</span>
1118<span id="cb22-27"><a href="#cb22-27" aria-hidden="true" tabindex="-1"></a></span>
1119<span id="cb22-28"><a href="#cb22-28" aria-hidden="true" tabindex="-1"></a><span class="co"># 3. Authenticate and set up blob storage</span></span>
1120<span id="cb22-29"><a href="#cb22-29" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span>
1121<span id="cb22-30"><a href="#cb22-30" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"myhandle.bsky.social"</span>, <span class="st">"app-password"</span>)</span>
1122<span id="cb22-31"><a href="#cb22-31" aria-hidden="true" tabindex="-1"></a></span>
1123<span id="cb22-32"><a href="#cb22-32" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> PDSBlobStore(client)</span>
1124<span id="cb22-33"><a href="#cb22-33" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> AtmosphereIndex(client, data_store<span class="op">=</span>store)</span>
1125<span id="cb22-34"><a href="#cb22-34" aria-hidden="true" tabindex="-1"></a></span>
1126<span id="cb22-35"><a href="#cb22-35" aria-hidden="true" tabindex="-1"></a><span class="co"># 4. Publish schema</span></span>
1127<span id="cb22-36"><a href="#cb22-36" aria-hidden="true" tabindex="-1"></a>schema_uri <span class="op">=</span> index.publish_schema(</span>
1128<span id="cb22-37"><a href="#cb22-37" aria-hidden="true" tabindex="-1"></a> FeatureSample,</span>
1129<span id="cb22-38"><a href="#cb22-38" aria-hidden="true" tabindex="-1"></a> version<span class="op">=</span><span class="st">"1.0.0"</span>,</span>
1130<span id="cb22-39"><a href="#cb22-39" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Feature vectors with labels"</span>,</span>
1131<span id="cb22-40"><a href="#cb22-40" aria-hidden="true" tabindex="-1"></a>)</span>
1132<span id="cb22-41"><a href="#cb22-41" aria-hidden="true" tabindex="-1"></a></span>
1133<span id="cb22-42"><a href="#cb22-42" aria-hidden="true" tabindex="-1"></a><span class="co"># 5. Publish dataset (shards uploaded as blobs)</span></span>
1134<span id="cb22-43"><a href="#cb22-43" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[FeatureSample](<span class="st">"features.tar"</span>)</span>
1135<span id="cb22-44"><a href="#cb22-44" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(</span>
1136<span id="cb22-45"><a href="#cb22-45" aria-hidden="true" tabindex="-1"></a> dataset,</span>
1137<span id="cb22-46"><a href="#cb22-46" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"synthetic-features-v1"</span>,</span>
1138<span id="cb22-47"><a href="#cb22-47" aria-hidden="true" tabindex="-1"></a> schema_ref<span class="op">=</span>schema_uri,</span>
1139<span id="cb22-48"><a href="#cb22-48" aria-hidden="true" tabindex="-1"></a> tags<span class="op">=</span>[<span class="st">"features"</span>, <span class="st">"synthetic"</span>],</span>
1140<span id="cb22-49"><a href="#cb22-49" aria-hidden="true" tabindex="-1"></a>)</span>
1141<span id="cb22-50"><a href="#cb22-50" aria-hidden="true" tabindex="-1"></a></span>
1142<span id="cb22-51"><a href="#cb22-51" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Published: </span><span class="sc">{</span>entry<span class="sc">.</span>uri<span class="sc">}</span><span class="ss">"</span>)</span>
1143<span id="cb22-52"><a href="#cb22-52" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Blob URLs: </span><span class="sc">{</span>entry<span class="sc">.</span>data_urls<span class="sc">}</span><span class="ss">"</span>)</span>
1144<span id="cb22-53"><a href="#cb22-53" aria-hidden="true" tabindex="-1"></a></span>
1145<span id="cb22-54"><a href="#cb22-54" aria-hidden="true" tabindex="-1"></a><span class="co"># 6. Later: discover and load from blobs</span></span>
1146<span id="cb22-55"><a href="#cb22-55" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> dataset_entry <span class="kw">in</span> index.list_datasets():</span>
1147<span id="cb22-56"><a href="#cb22-56" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"Found: </span><span class="sc">{</span>dataset_entry<span class="sc">.</span>name<span class="sc">}</span><span class="ss">"</span>)</span>
1148<span id="cb22-57"><a href="#cb22-57" aria-hidden="true" tabindex="-1"></a></span>
1149<span id="cb22-58"><a href="#cb22-58" aria-hidden="true" tabindex="-1"></a> <span class="co"># Reconstruct type from schema</span></span>
1150<span id="cb22-59"><a href="#cb22-59" aria-hidden="true" tabindex="-1"></a> SampleType <span class="op">=</span> index.decode_schema(dataset_entry.schema_ref)</span>
1151<span id="cb22-60"><a href="#cb22-60" aria-hidden="true" tabindex="-1"></a></span>
1152<span id="cb22-61"><a href="#cb22-61" aria-hidden="true" tabindex="-1"></a> <span class="co"># Create source from blob URLs</span></span>
1153<span id="cb22-62"><a href="#cb22-62" aria-hidden="true" tabindex="-1"></a> source <span class="op">=</span> store.create_source(dataset_entry.data_urls)</span>
1154<span id="cb22-63"><a href="#cb22-63" aria-hidden="true" tabindex="-1"></a></span>
1155<span id="cb22-64"><a href="#cb22-64" aria-hidden="true" tabindex="-1"></a> <span class="co"># Load dataset from blobs</span></span>
1156<span id="cb22-65"><a href="#cb22-65" aria-hidden="true" tabindex="-1"></a> ds <span class="op">=</span> atdata.Dataset[SampleType](source)</span>
1157<span id="cb22-66"><a href="#cb22-66" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> batch <span class="kw">in</span> ds.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span>
1158<span id="cb22-67"><a href="#cb22-67" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(batch.features.shape)</span>
1159<span id="cb22-68"><a href="#cb22-68" aria-hidden="true" tabindex="-1"></a> <span class="cf">break</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
1160</div>
1161<p>For external URL storage (without <code>PDSBlobStore</code>):</p>
1162<div id="1f55ae9a" class="cell">
1163<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Use AtmosphereIndex without data_store</span></span>
1164<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> AtmosphereIndex(client)</span>
1165<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a></span>
1166<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Dataset URLs will be stored as-is (external references)</span></span>
1167<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(</span>
1168<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a> dataset,</span>
1169<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"external-features"</span>,</span>
1170<span id="cb23-8"><a href="#cb23-8" aria-hidden="true" tabindex="-1"></a> schema_ref<span class="op">=</span>schema_uri,</span>
1171<span id="cb23-9"><a href="#cb23-9" aria-hidden="true" tabindex="-1"></a>)</span>
1172<span id="cb23-10"><a href="#cb23-10" aria-hidden="true" tabindex="-1"></a></span>
1173<span id="cb23-11"><a href="#cb23-11" aria-hidden="true" tabindex="-1"></a><span class="co"># Load using standard URL source</span></span>
1174<span id="cb23-12"><a href="#cb23-12" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> atdata.Dataset[FeatureSample](entry.data_urls[<span class="dv">0</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
1175</div>
1176</section>
1177<section id="related" class="level2">
1178<h2 class="anchored" data-anchor-id="related">Related</h2>
1179<ul>
1180<li><a href="../reference/local-storage.html">Local Storage</a> - Redis + S3 backend</li>
1181<li><a href="../reference/promotion.html">Promotion</a> - Promoting local datasets to ATProto</li>
1182<li><a href="../reference/protocols.html">Protocols</a> - AbstractIndex interface</li>
1183<li><a href="../reference/packable-samples.html">Packable Samples</a> - Defining sample types</li>
1184</ul>
1185
1186
1187</section>
1188
1189</main> <!-- /main -->
1190<script id="quarto-html-after-body" type="application/javascript">
1191 window.document.addEventListener("DOMContentLoaded", function (event) {
1192 // Ensure there is a toggle, if there isn't float one in the top right
1193 if (window.document.querySelector('.quarto-color-scheme-toggle') === null) {
1194 const a = window.document.createElement('a');
1195 a.classList.add('top-right');
1196 a.classList.add('quarto-color-scheme-toggle');
1197 a.href = "";
1198 a.onclick = function() { try { window.quartoToggleColorScheme(); } catch {} return false; };
1199 const i = window.document.createElement("i");
1200 i.classList.add('bi');
1201 a.appendChild(i);
1202 window.document.body.appendChild(a);
1203 }
1204 setColorSchemeToggle(hasAlternateSentinel())
1205 const icon = "";
1206 const anchorJS = new window.AnchorJS();
1207 anchorJS.options = {
1208 placement: 'right',
1209 icon: icon
1210 };
1211 anchorJS.add('.anchored');
1212 const isCodeAnnotation = (el) => {
1213 for (const clz of el.classList) {
1214 if (clz.startsWith('code-annotation-')) {
1215 return true;
1216 }
1217 }
1218 return false;
1219 }
1220 const onCopySuccess = function(e) {
1221 // button target
1222 const button = e.trigger;
1223 // don't keep focus
1224 button.blur();
1225 // flash "checked"
1226 button.classList.add('code-copy-button-checked');
1227 var currentTitle = button.getAttribute("title");
1228 button.setAttribute("title", "Copied!");
1229 let tooltip;
1230 if (window.bootstrap) {
1231 button.setAttribute("data-bs-toggle", "tooltip");
1232 button.setAttribute("data-bs-placement", "left");
1233 button.setAttribute("data-bs-title", "Copied!");
1234 tooltip = new bootstrap.Tooltip(button,
1235 { trigger: "manual",
1236 customClass: "code-copy-button-tooltip",
1237 offset: [0, -8]});
1238 tooltip.show();
1239 }
1240 setTimeout(function() {
1241 if (tooltip) {
1242 tooltip.hide();
1243 button.removeAttribute("data-bs-title");
1244 button.removeAttribute("data-bs-toggle");
1245 button.removeAttribute("data-bs-placement");
1246 }
1247 button.setAttribute("title", currentTitle);
1248 button.classList.remove('code-copy-button-checked');
1249 }, 1000);
1250 // clear code selection
1251 e.clearSelection();
1252 }
1253 const getTextToCopy = function(trigger) {
1254 const codeEl = trigger.previousElementSibling.cloneNode(true);
1255 for (const childEl of codeEl.children) {
1256 if (isCodeAnnotation(childEl)) {
1257 childEl.remove();
1258 }
1259 }
1260 return codeEl.innerText;
1261 }
1262 const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
1263 text: getTextToCopy
1264 });
1265 clipboard.on('success', onCopySuccess);
1266 if (window.document.getElementById('quarto-embedded-source-code-modal')) {
1267 const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
1268 text: getTextToCopy,
1269 container: window.document.getElementById('quarto-embedded-source-code-modal')
1270 });
1271 clipboardModal.on('success', onCopySuccess);
1272 }
1273 var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
1274 var mailtoRegex = new RegExp(/^mailto:/);
1275 var filterRegex = new RegExp("https:\/\/github\.com\/your-org\/atdata");
1276 var isInternal = (href) => {
1277 return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
1278 }
1279 // Inspect non-navigation links and adorn them if external
1280 var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
1281 for (var i=0; i<links.length; i++) {
1282 const link = links[i];
1283 if (!isInternal(link.href)) {
1284 // undo the damage that might have been done by quarto-nav.js in the case of
1285 // links that we want to consider external
1286 if (link.dataset.originalHref !== undefined) {
1287 link.href = link.dataset.originalHref;
1288 }
1289 }
1290 }
1291 function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
1292 const config = {
1293 allowHTML: true,
1294 maxWidth: 500,
1295 delay: 100,
1296 arrow: false,
1297 appendTo: function(el) {
1298 return el.parentElement;
1299 },
1300 interactive: true,
1301 interactiveBorder: 10,
1302 theme: 'quarto',
1303 placement: 'bottom-start',
1304 };
1305 if (contentFn) {
1306 config.content = contentFn;
1307 }
1308 if (onTriggerFn) {
1309 config.onTrigger = onTriggerFn;
1310 }
1311 if (onUntriggerFn) {
1312 config.onUntrigger = onUntriggerFn;
1313 }
1314 window.tippy(el, config);
1315 }
1316 const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
1317 for (var i=0; i<noterefs.length; i++) {
1318 const ref = noterefs[i];
1319 tippyHover(ref, function() {
1320 // use id or data attribute instead here
1321 let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
1322 try { href = new URL(href).hash; } catch {}
1323 const id = href.replace(/^#\/?/, "");
1324 const note = window.document.getElementById(id);
1325 if (note) {
1326 return note.innerHTML;
1327 } else {
1328 return "";
1329 }
1330 });
1331 }
1332 const xrefs = window.document.querySelectorAll('a.quarto-xref');
1333 const processXRef = (id, note) => {
1334 // Strip column container classes
1335 const stripColumnClz = (el) => {
1336 el.classList.remove("page-full", "page-columns");
1337 if (el.children) {
1338 for (const child of el.children) {
1339 stripColumnClz(child);
1340 }
1341 }
1342 }
1343 stripColumnClz(note)
1344 if (id === null || id.startsWith('sec-')) {
1345 // Special case sections, only their first couple elements
1346 const container = document.createElement("div");
1347 if (note.children && note.children.length > 2) {
1348 container.appendChild(note.children[0].cloneNode(true));
1349 for (let i = 1; i < note.children.length; i++) {
1350 const child = note.children[i];
1351 if (child.tagName === "P" && child.innerText === "") {
1352 continue;
1353 } else {
1354 container.appendChild(child.cloneNode(true));
1355 break;
1356 }
1357 }
1358 if (window.Quarto?.typesetMath) {
1359 window.Quarto.typesetMath(container);
1360 }
1361 return container.innerHTML
1362 } else {
1363 if (window.Quarto?.typesetMath) {
1364 window.Quarto.typesetMath(note);
1365 }
1366 return note.innerHTML;
1367 }
1368 } else {
1369 // Remove any anchor links if they are present
1370 const anchorLink = note.querySelector('a.anchorjs-link');
1371 if (anchorLink) {
1372 anchorLink.remove();
1373 }
1374 if (window.Quarto?.typesetMath) {
1375 window.Quarto.typesetMath(note);
1376 }
1377 if (note.classList.contains("callout")) {
1378 return note.outerHTML;
1379 } else {
1380 return note.innerHTML;
1381 }
1382 }
1383 }
1384 for (var i=0; i<xrefs.length; i++) {
1385 const xref = xrefs[i];
1386 tippyHover(xref, undefined, function(instance) {
1387 instance.disable();
1388 let url = xref.getAttribute('href');
1389 let hash = undefined;
1390 if (url.startsWith('#')) {
1391 hash = url;
1392 } else {
1393 try { hash = new URL(url).hash; } catch {}
1394 }
1395 if (hash) {
1396 const id = hash.replace(/^#\/?/, "");
1397 const note = window.document.getElementById(id);
1398 if (note !== null) {
1399 try {
1400 const html = processXRef(id, note.cloneNode(true));
1401 instance.setContent(html);
1402 } finally {
1403 instance.enable();
1404 instance.show();
1405 }
1406 } else {
1407 // See if we can fetch this
1408 fetch(url.split('#')[0])
1409 .then(res => res.text())
1410 .then(html => {
1411 const parser = new DOMParser();
1412 const htmlDoc = parser.parseFromString(html, "text/html");
1413 const note = htmlDoc.getElementById(id);
1414 if (note !== null) {
1415 const html = processXRef(id, note);
1416 instance.setContent(html);
1417 }
1418 }).finally(() => {
1419 instance.enable();
1420 instance.show();
1421 });
1422 }
1423 } else {
1424 // See if we can fetch a full url (with no hash to target)
1425 // This is a special case and we should probably do some content thinning / targeting
1426 fetch(url)
1427 .then(res => res.text())
1428 .then(html => {
1429 const parser = new DOMParser();
1430 const htmlDoc = parser.parseFromString(html, "text/html");
1431 const note = htmlDoc.querySelector('main.content');
1432 if (note !== null) {
1433 // This should only happen for chapter cross references
1434 // (since there is no id in the URL)
1435 // remove the first header
1436 if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
1437 note.children[0].remove();
1438 }
1439 const html = processXRef(null, note);
1440 instance.setContent(html);
1441 }
1442 }).finally(() => {
1443 instance.enable();
1444 instance.show();
1445 });
1446 }
1447 }, function(instance) {
1448 });
1449 }
1450 let selectedAnnoteEl;
1451 const selectorForAnnotation = ( cell, annotation) => {
1452 let cellAttr = 'data-code-cell="' + cell + '"';
1453 let lineAttr = 'data-code-annotation="' + annotation + '"';
1454 const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
1455 return selector;
1456 }
1457 const selectCodeLines = (annoteEl) => {
1458 const doc = window.document;
1459 const targetCell = annoteEl.getAttribute("data-target-cell");
1460 const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
1461 const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
1462 const lines = annoteSpan.getAttribute("data-code-lines").split(",");
1463 const lineIds = lines.map((line) => {
1464 return targetCell + "-" + line;
1465 })
1466 let top = null;
1467 let height = null;
1468 let parent = null;
1469 if (lineIds.length > 0) {
1470 //compute the position of the single el (top and bottom and make a div)
1471 const el = window.document.getElementById(lineIds[0]);
1472 top = el.offsetTop;
1473 height = el.offsetHeight;
1474 parent = el.parentElement.parentElement;
1475 if (lineIds.length > 1) {
1476 const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
1477 const bottom = lastEl.offsetTop + lastEl.offsetHeight;
1478 height = bottom - top;
1479 }
1480 if (top !== null && height !== null && parent !== null) {
1481 // cook up a div (if necessary) and position it
1482 let div = window.document.getElementById("code-annotation-line-highlight");
1483 if (div === null) {
1484 div = window.document.createElement("div");
1485 div.setAttribute("id", "code-annotation-line-highlight");
1486 div.style.position = 'absolute';
1487 parent.appendChild(div);
1488 }
1489 div.style.top = top - 2 + "px";
1490 div.style.height = height + 4 + "px";
1491 div.style.left = 0;
1492 let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
1493 if (gutterDiv === null) {
1494 gutterDiv = window.document.createElement("div");
1495 gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
1496 gutterDiv.style.position = 'absolute';
1497 const codeCell = window.document.getElementById(targetCell);
1498 const gutter = codeCell.querySelector('.code-annotation-gutter');
1499 gutter.appendChild(gutterDiv);
1500 }
1501 gutterDiv.style.top = top - 2 + "px";
1502 gutterDiv.style.height = height + 4 + "px";
1503 }
1504 selectedAnnoteEl = annoteEl;
1505 }
1506 };
1507 const unselectCodeLines = () => {
1508 const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
1509 elementsIds.forEach((elId) => {
1510 const div = window.document.getElementById(elId);
1511 if (div) {
1512 div.remove();
1513 }
1514 });
1515 selectedAnnoteEl = undefined;
1516 };
1517 // Handle positioning of the toggle
1518 window.addEventListener(
1519 "resize",
1520 throttle(() => {
1521 elRect = undefined;
1522 if (selectedAnnoteEl) {
1523 selectCodeLines(selectedAnnoteEl);
1524 }
1525 }, 10)
1526 );
1527 function throttle(fn, ms) {
1528 let throttle = false;
1529 let timer;
1530 return (...args) => {
1531 if(!throttle) { // first call gets through
1532 fn.apply(this, args);
1533 throttle = true;
1534 } else { // all the others get throttled
1535 if(timer) clearTimeout(timer); // cancel #2
1536 timer = setTimeout(() => {
1537 fn.apply(this, args);
1538 timer = throttle = false;
1539 }, ms);
1540 }
1541 };
1542 }
1543 // Attach click handler to the DT
1544 const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
1545 for (const annoteDlNode of annoteDls) {
1546 annoteDlNode.addEventListener('click', (event) => {
1547 const clickedEl = event.target;
1548 if (clickedEl !== selectedAnnoteEl) {
1549 unselectCodeLines();
1550 const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
1551 if (activeEl) {
1552 activeEl.classList.remove('code-annotation-active');
1553 }
1554 selectCodeLines(clickedEl);
1555 clickedEl.classList.add('code-annotation-active');
1556 } else {
1557 // Unselect the line
1558 unselectCodeLines();
1559 clickedEl.classList.remove('code-annotation-active');
1560 }
1561 });
1562 }
1563 const findCites = (el) => {
1564 const parentEl = el.parentElement;
1565 if (parentEl) {
1566 const cites = parentEl.dataset.cites;
1567 if (cites) {
1568 return {
1569 el,
1570 cites: cites.split(' ')
1571 };
1572 } else {
1573 return findCites(el.parentElement)
1574 }
1575 } else {
1576 return undefined;
1577 }
1578 };
1579 var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
1580 for (var i=0; i<bibliorefs.length; i++) {
1581 const ref = bibliorefs[i];
1582 const citeInfo = findCites(ref);
1583 if (citeInfo) {
1584 tippyHover(citeInfo.el, function() {
1585 var popup = window.document.createElement('div');
1586 citeInfo.cites.forEach(function(cite) {
1587 var citeDiv = window.document.createElement('div');
1588 citeDiv.classList.add('hanging-indent');
1589 citeDiv.classList.add('csl-entry');
1590 var biblioDiv = window.document.getElementById('ref-' + cite);
1591 if (biblioDiv) {
1592 citeDiv.innerHTML = biblioDiv.innerHTML;
1593 }
1594 popup.appendChild(citeDiv);
1595 });
1596 return popup.innerHTML;
1597 });
1598 }
1599 }
1600 });
1601 </script>
1602</div> <!-- /content -->
1603<footer class="footer">
1604 <div class="nav-footer">
1605 <div class="nav-footer-left">
1606<p>Built with <a href="https://quarto.org/">Quarto</a></p>
1607</div>
1608 <div class="nav-footer-center">
1609
1610 <div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/your-org/atdata/edit/main/reference/atmosphere.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></div>
1611 <div class="nav-footer-right">
1612<p>MIT License</p>
1613</div>
1614 </div>
1615</footer>
1616
1617
1618
1619
1620</body></html>