A loose federation of distributed, typed datasets
1<!DOCTYPE html>
2<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
3
4<meta charset="utf-8">
5<meta name="generator" content="quarto-1.7.34">
6
7<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
8
9
10<title>atdata</title>
11<style>
12code{white-space: pre-wrap;}
13span.smallcaps{font-variant: small-caps;}
14div.columns{display: flex; gap: min(4vw, 1.5em);}
15div.column{flex: auto; overflow-x: auto;}
16div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
17ul.task-list{list-style: none;}
18ul.task-list li input[type="checkbox"] {
19 width: 0.8em;
20 margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
21 vertical-align: middle;
22}
23/* CSS for syntax highlighting */
24html { -webkit-text-size-adjust: 100%; }
25pre > code.sourceCode { white-space: pre; position: relative; }
26pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
27pre > code.sourceCode > span:empty { height: 1.2em; }
28.sourceCode { overflow: visible; }
29code.sourceCode > span { color: inherit; text-decoration: inherit; }
30div.sourceCode { margin: 1em 0; }
31pre.sourceCode { margin: 0; }
32@media screen {
33div.sourceCode { overflow: auto; }
34}
35@media print {
36pre > code.sourceCode { white-space: pre-wrap; }
37pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
38}
39pre.numberSource code
40 { counter-reset: source-line 0; }
41pre.numberSource code > span
42 { position: relative; left: -4em; counter-increment: source-line; }
43pre.numberSource code > span > a:first-child::before
44 { content: counter(source-line);
45 position: relative; left: -1em; text-align: right; vertical-align: baseline;
46 border: none; display: inline-block;
47 -webkit-touch-callout: none; -webkit-user-select: none;
48 -khtml-user-select: none; -moz-user-select: none;
49 -ms-user-select: none; user-select: none;
50 padding: 0 4px; width: 4em;
51 }
52pre.numberSource { margin-left: 3em; padding-left: 4px; }
53div.sourceCode
54 { }
55@media screen {
56pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
57}
58</style>
59
60
61<script src="site_libs/quarto-nav/quarto-nav.js"></script>
62<script src="site_libs/quarto-nav/headroom.min.js"></script>
63<script src="site_libs/clipboard/clipboard.min.js"></script>
64<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
65<script src="site_libs/quarto-search/fuse.min.js"></script>
66<script src="site_libs/quarto-search/quarto-search.js"></script>
67<meta name="quarto:offset" content="./">
68<script src="site_libs/quarto-html/quarto.js" type="module"></script>
69<script src="site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
70<script src="site_libs/quarto-html/popper.min.js"></script>
71<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
72<script src="site_libs/quarto-html/anchor.min.js"></script>
73<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
74<link href="site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme" id="quarto-text-highlighting-styles">
75<link href="site_libs/quarto-html/quarto-syntax-highlighting-dark-8dcd8563ea6803ab7cbb3d71ca5772e1.css" rel="stylesheet" class="quarto-color-scheme quarto-color-alternate" id="quarto-text-highlighting-styles">
76<link href="site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme-extra" id="quarto-text-highlighting-styles">
77<script src="site_libs/bootstrap/bootstrap.min.js"></script>
78<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
79<link href="site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme" id="quarto-bootstrap" data-mode="light">
80<link href="site_libs/bootstrap/bootstrap-dark-7964ffd8887b0991fe8d71c6c8bc75d6.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme quarto-color-alternate" id="quarto-bootstrap" data-mode="dark">
81<link href="site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme-extra" id="quarto-bootstrap" data-mode="light">
82<script id="quarto-search-options" type="application/json">{
83 "location": "navbar",
84 "copy-button": false,
85 "collapse-after": 3,
86 "panel-placement": "end",
87 "type": "overlay",
88 "limit": 50,
89 "keyboard-shortcut": [
90 "f",
91 "/",
92 "s"
93 ],
94 "show-item-context": false,
95 "language": {
96 "search-no-results-text": "No results",
97 "search-matching-documents-text": "matching documents",
98 "search-copy-link-title": "Copy link to search",
99 "search-hide-matches-text": "Hide additional matches",
100 "search-more-match-text": "more match in this document",
101 "search-more-matches-text": "more matches in this document",
102 "search-clear-button-title": "Clear",
103 "search-text-placeholder": "",
104 "search-detached-cancel-button-title": "Cancel",
105 "search-submit-button-title": "Submit",
106 "search-label": "Search"
107 }
108}</script>
109
110
111<link rel="stylesheet" href="assets/styles.css">
112</head>
113
114<body class="nav-sidebar docked nav-fixed quarto-light"><script id="quarto-html-before-body" type="application/javascript">
115 const toggleBodyColorMode = (bsSheetEl) => {
116 const mode = bsSheetEl.getAttribute("data-mode");
117 const bodyEl = window.document.querySelector("body");
118 if (mode === "dark") {
119 bodyEl.classList.add("quarto-dark");
120 bodyEl.classList.remove("quarto-light");
121 } else {
122 bodyEl.classList.add("quarto-light");
123 bodyEl.classList.remove("quarto-dark");
124 }
125 }
126 const toggleBodyColorPrimary = () => {
127 const bsSheetEl = window.document.querySelector("link#quarto-bootstrap:not([rel=disabled-stylesheet])");
128 if (bsSheetEl) {
129 toggleBodyColorMode(bsSheetEl);
130 }
131 }
132 const setColorSchemeToggle = (alternate) => {
133 const toggles = window.document.querySelectorAll('.quarto-color-scheme-toggle');
134 for (let i=0; i < toggles.length; i++) {
135 const toggle = toggles[i];
136 if (toggle) {
137 if (alternate) {
138 toggle.classList.add("alternate");
139 } else {
140 toggle.classList.remove("alternate");
141 }
142 }
143 }
144 };
145 const toggleColorMode = (alternate) => {
146 // Switch the stylesheets
147 const primaryStylesheets = window.document.querySelectorAll('link.quarto-color-scheme:not(.quarto-color-alternate)');
148 const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate');
149 manageTransitions('#quarto-margin-sidebar .nav-link', false);
150 if (alternate) {
151 // note: dark is layered on light, we don't disable primary!
152 enableStylesheet(alternateStylesheets);
153 for (const sheetNode of alternateStylesheets) {
154 if (sheetNode.id === "quarto-bootstrap") {
155 toggleBodyColorMode(sheetNode);
156 }
157 }
158 } else {
159 disableStylesheet(alternateStylesheets);
160 enableStylesheet(primaryStylesheets)
161 toggleBodyColorPrimary();
162 }
163 manageTransitions('#quarto-margin-sidebar .nav-link', true);
164 // Switch the toggles
165 setColorSchemeToggle(alternate)
166 // Hack to workaround the fact that safari doesn't
167 // properly recolor the scrollbar when toggling (#1455)
168 if (navigator.userAgent.indexOf('Safari') > 0 && navigator.userAgent.indexOf('Chrome') == -1) {
169 manageTransitions("body", false);
170 window.scrollTo(0, 1);
171 setTimeout(() => {
172 window.scrollTo(0, 0);
173 manageTransitions("body", true);
174 }, 40);
175 }
176 }
177 const disableStylesheet = (stylesheets) => {
178 for (let i=0; i < stylesheets.length; i++) {
179 const stylesheet = stylesheets[i];
180 stylesheet.rel = 'disabled-stylesheet';
181 }
182 }
183 const enableStylesheet = (stylesheets) => {
184 for (let i=0; i < stylesheets.length; i++) {
185 const stylesheet = stylesheets[i];
186 if(stylesheet.rel !== 'stylesheet') { // for Chrome, which will still FOUC without this check
187 stylesheet.rel = 'stylesheet';
188 }
189 }
190 }
191 const manageTransitions = (selector, allowTransitions) => {
192 const els = window.document.querySelectorAll(selector);
193 for (let i=0; i < els.length; i++) {
194 const el = els[i];
195 if (allowTransitions) {
196 el.classList.remove('notransition');
197 } else {
198 el.classList.add('notransition');
199 }
200 }
201 }
202 const isFileUrl = () => {
203 return window.location.protocol === 'file:';
204 }
205 const hasAlternateSentinel = () => {
206 let styleSentinel = getColorSchemeSentinel();
207 if (styleSentinel !== null) {
208 return styleSentinel === "alternate";
209 } else {
210 return false;
211 }
212 }
213 const setStyleSentinel = (alternate) => {
214 const value = alternate ? "alternate" : "default";
215 if (!isFileUrl()) {
216 window.localStorage.setItem("quarto-color-scheme", value);
217 } else {
218 localAlternateSentinel = value;
219 }
220 }
221 const getColorSchemeSentinel = () => {
222 if (!isFileUrl()) {
223 const storageValue = window.localStorage.getItem("quarto-color-scheme");
224 return storageValue != null ? storageValue : localAlternateSentinel;
225 } else {
226 return localAlternateSentinel;
227 }
228 }
229 const toggleGiscusIfUsed = (isAlternate, darkModeDefault) => {
230 const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light';
231 const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark';
232 let newTheme = '';
233 if(authorPrefersDark) {
234 newTheme = isAlternate ? baseTheme : alternateTheme;
235 } else {
236 newTheme = isAlternate ? alternateTheme : baseTheme;
237 }
238 const changeGiscusTheme = () => {
239 // From: https://github.com/giscus/giscus/issues/336
240 const sendMessage = (message) => {
241 const iframe = document.querySelector('iframe.giscus-frame');
242 if (!iframe) return;
243 iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app');
244 }
245 sendMessage({
246 setConfig: {
247 theme: newTheme
248 }
249 });
250 }
251 const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null;
252 if (isGiscussLoaded) {
253 changeGiscusTheme();
254 }
255 };
256 const authorPrefersDark = false;
257 const darkModeDefault = authorPrefersDark;
258 document.querySelector('link#quarto-text-highlighting-styles.quarto-color-scheme-extra').rel = 'disabled-stylesheet';
259 document.querySelector('link#quarto-bootstrap.quarto-color-scheme-extra').rel = 'disabled-stylesheet';
260 let localAlternateSentinel = darkModeDefault ? 'alternate' : 'default';
261 // Dark / light mode switch
262 window.quartoToggleColorScheme = () => {
263 // Read the current dark / light value
264 let toAlternate = !hasAlternateSentinel();
265 toggleColorMode(toAlternate);
266 setStyleSentinel(toAlternate);
267 toggleGiscusIfUsed(toAlternate, darkModeDefault);
268 window.dispatchEvent(new Event('resize'));
269 };
270 // Switch to dark mode if need be
271 if (hasAlternateSentinel()) {
272 toggleColorMode(true);
273 } else {
274 toggleColorMode(false);
275 }
276 </script>
277
278<div id="quarto-search-results"></div>
279 <header id="quarto-header" class="headroom fixed-top">
280 <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
281 <div class="navbar-container container-fluid">
282 <div class="navbar-brand-container mx-auto">
283 <a class="navbar-brand" href="./index.html">
284 <span class="navbar-title">atdata</span>
285 </a>
286 </div>
287 <div id="quarto-search" class="" title="Search"></div>
288 <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" role="menu" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
289 <span class="navbar-toggler-icon"></span>
290</button>
291 <div class="collapse navbar-collapse" id="navbarCollapse">
292 <ul class="navbar-nav navbar-nav-scroll me-auto">
293 <li class="nav-item">
294 <a class="nav-link active" href="./index.html" aria-current="page">
295<span class="menu-text">Guide</span></a>
296 </li>
297 <li class="nav-item dropdown ">
298 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-tutorials" role="link" data-bs-toggle="dropdown" aria-expanded="false">
299 <span class="menu-text">Tutorials</span>
300 </a>
301 <ul class="dropdown-menu" aria-labelledby="nav-menu-tutorials">
302 <li>
303 <a class="dropdown-item" href="./tutorials/quickstart.html">
304 <span class="dropdown-text">Quick Start</span></a>
305 </li>
306 <li>
307 <a class="dropdown-item" href="./tutorials/local-workflow.html">
308 <span class="dropdown-text">Local Workflow</span></a>
309 </li>
310 <li>
311 <a class="dropdown-item" href="./tutorials/atmosphere.html">
312 <span class="dropdown-text">Atmosphere Publishing</span></a>
313 </li>
314 <li>
315 <a class="dropdown-item" href="./tutorials/promotion.html">
316 <span class="dropdown-text">Promotion Workflow</span></a>
317 </li>
318 </ul>
319 </li>
320 <li class="nav-item dropdown ">
321 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-reference" role="link" data-bs-toggle="dropdown" aria-expanded="false">
322 <span class="menu-text">Reference</span>
323 </a>
324 <ul class="dropdown-menu" aria-labelledby="nav-menu-reference">
325 <li>
326 <a class="dropdown-item" href="./reference/architecture.html">
327 <span class="dropdown-text">Architecture Overview</span></a>
328 </li>
329 <li>
330 <a class="dropdown-item" href="./reference/packable-samples.html">
331 <span class="dropdown-text">Packable Samples</span></a>
332 </li>
333 <li>
334 <a class="dropdown-item" href="./reference/datasets.html">
335 <span class="dropdown-text">Datasets</span></a>
336 </li>
337 <li>
338 <a class="dropdown-item" href="./reference/lenses.html">
339 <span class="dropdown-text">Lenses</span></a>
340 </li>
341 <li>
342 <a class="dropdown-item" href="./reference/local-storage.html">
343 <span class="dropdown-text">Local Storage</span></a>
344 </li>
345 <li>
346 <a class="dropdown-item" href="./reference/atmosphere.html">
347 <span class="dropdown-text">Atmosphere</span></a>
348 </li>
349 <li>
350 <a class="dropdown-item" href="./reference/promotion.html">
351 <span class="dropdown-text">Promotion</span></a>
352 </li>
353 <li>
354 <a class="dropdown-item" href="./reference/load-dataset.html">
355 <span class="dropdown-text">load_dataset API</span></a>
356 </li>
357 <li>
358 <a class="dropdown-item" href="./reference/protocols.html">
359 <span class="dropdown-text">Protocols</span></a>
360 </li>
361 <li>
362 <a class="dropdown-item" href="./reference/uri-spec.html">
363 <span class="dropdown-text">URI Specification</span></a>
364 </li>
365 <li>
366 <a class="dropdown-item" href="./reference/troubleshooting.html">
367 <span class="dropdown-text">Troubleshooting & FAQ</span></a>
368 </li>
369 <li>
370 <a class="dropdown-item" href="./reference/deployment.html">
371 <span class="dropdown-text">Deployment Guide</span></a>
372 </li>
373 </ul>
374 </li>
375 <li class="nav-item">
376 <a class="nav-link" href="./api/index.html">
377<span class="menu-text">API</span></a>
378 </li>
379</ul>
380 <ul class="navbar-nav navbar-nav-scroll ms-auto">
381 <li class="nav-item compact">
382 <a class="nav-link" href="https://github.com/your-org/atdata"> <i class="bi bi-github" role="img">
383</i>
384<span class="menu-text"></span></a>
385 </li>
386</ul>
387 </div> <!-- /navcollapse -->
388 <div class="quarto-navbar-tools">
389 <a href="" class="quarto-color-scheme-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleColorScheme(); return false;" title="Toggle dark mode"><i class="bi"></i></a>
390</div>
391 </div> <!-- /container-fluid -->
392 </nav>
393 <nav class="quarto-secondary-nav">
394 <div class="container-fluid d-flex">
395 <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
396 <i class="bi bi-layout-text-sidebar-reverse"></i>
397 </button>
398 <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./index.html">atdata</a></li></ol></nav>
399 <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
400 </a>
401 </div>
402 </nav>
403</header>
404<!-- content -->
405<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
406<!-- sidebar -->
407 <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
408 <div class="sidebar-menu-container">
409 <ul class="list-unstyled mt-1">
410 <li class="sidebar-item">
411 <div class="sidebar-item-container">
412 <a href="./index.html" class="sidebar-item-text sidebar-link active">
413 <span class="menu-text">atdata</span></a>
414 </div>
415</li>
416 <li class="sidebar-item sidebar-item-section">
417 <div class="sidebar-item-container">
418 <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
419 <span class="menu-text">Getting Started</span></a>
420 <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
421 <i class="bi bi-chevron-right ms-2"></i>
422 </a>
423 </div>
424 <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
425 <li class="sidebar-item">
426 <div class="sidebar-item-container">
427 <a href="./tutorials/quickstart.html" class="sidebar-item-text sidebar-link">
428 <span class="menu-text">Quick Start</span></a>
429 </div>
430</li>
431 <li class="sidebar-item">
432 <div class="sidebar-item-container">
433 <a href="./tutorials/local-workflow.html" class="sidebar-item-text sidebar-link">
434 <span class="menu-text">Local Workflow</span></a>
435 </div>
436</li>
437 <li class="sidebar-item">
438 <div class="sidebar-item-container">
439 <a href="./tutorials/atmosphere.html" class="sidebar-item-text sidebar-link">
440 <span class="menu-text">Atmosphere Publishing</span></a>
441 </div>
442</li>
443 <li class="sidebar-item">
444 <div class="sidebar-item-container">
445 <a href="./tutorials/promotion.html" class="sidebar-item-text sidebar-link">
446 <span class="menu-text">Promotion Workflow</span></a>
447 </div>
448</li>
449 </ul>
450 </li>
451 <li class="sidebar-item sidebar-item-section">
452 <div class="sidebar-item-container">
453 <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true">
454 <span class="menu-text">Reference</span></a>
455 <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
456 <i class="bi bi-chevron-right ms-2"></i>
457 </a>
458 </div>
459 <ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
460 <li class="sidebar-item">
461 <div class="sidebar-item-container">
462 <a href="./reference/architecture.html" class="sidebar-item-text sidebar-link">
463 <span class="menu-text">Architecture Overview</span></a>
464 </div>
465</li>
466 <li class="sidebar-item">
467 <div class="sidebar-item-container">
468 <a href="./reference/packable-samples.html" class="sidebar-item-text sidebar-link">
469 <span class="menu-text">Packable Samples</span></a>
470 </div>
471</li>
472 <li class="sidebar-item">
473 <div class="sidebar-item-container">
474 <a href="./reference/datasets.html" class="sidebar-item-text sidebar-link">
475 <span class="menu-text">Datasets</span></a>
476 </div>
477</li>
478 <li class="sidebar-item">
479 <div class="sidebar-item-container">
480 <a href="./reference/lenses.html" class="sidebar-item-text sidebar-link">
481 <span class="menu-text">Lenses</span></a>
482 </div>
483</li>
484 <li class="sidebar-item">
485 <div class="sidebar-item-container">
486 <a href="./reference/local-storage.html" class="sidebar-item-text sidebar-link">
487 <span class="menu-text">Local Storage</span></a>
488 </div>
489</li>
490 <li class="sidebar-item">
491 <div class="sidebar-item-container">
492 <a href="./reference/atmosphere.html" class="sidebar-item-text sidebar-link">
493 <span class="menu-text">Atmosphere (ATProto Integration)</span></a>
494 </div>
495</li>
496 <li class="sidebar-item">
497 <div class="sidebar-item-container">
498 <a href="./reference/promotion.html" class="sidebar-item-text sidebar-link">
499 <span class="menu-text">Promotion Workflow</span></a>
500 </div>
501</li>
502 <li class="sidebar-item">
503 <div class="sidebar-item-container">
504 <a href="./reference/load-dataset.html" class="sidebar-item-text sidebar-link">
505 <span class="menu-text">load_dataset API</span></a>
506 </div>
507</li>
508 <li class="sidebar-item">
509 <div class="sidebar-item-container">
510 <a href="./reference/protocols.html" class="sidebar-item-text sidebar-link">
511 <span class="menu-text">Protocols</span></a>
512 </div>
513</li>
514 <li class="sidebar-item">
515 <div class="sidebar-item-container">
516 <a href="./reference/uri-spec.html" class="sidebar-item-text sidebar-link">
517 <span class="menu-text">URI Specification</span></a>
518 </div>
519</li>
520 <li class="sidebar-item">
521 <div class="sidebar-item-container">
522 <a href="./reference/troubleshooting.html" class="sidebar-item-text sidebar-link">
523 <span class="menu-text">Troubleshooting & FAQ</span></a>
524 </div>
525</li>
526 <li class="sidebar-item">
527 <div class="sidebar-item-container">
528 <a href="./reference/deployment.html" class="sidebar-item-text sidebar-link">
529 <span class="menu-text">Deployment Guide</span></a>
530 </div>
531</li>
532 </ul>
533 </li>
534 </ul>
535 </div>
536</nav>
537<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
538<!-- margin-sidebar -->
539 <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
540 <nav id="TOC" role="doc-toc" class="toc-active">
541 <h2 id="toc-title">On this page</h2>
542
543 <ul>
544 <li><a href="#atdata" id="toc-atdata" class="nav-link active" data-scroll-target="#atdata">atdata</a></li>
545 <li><a href="#the-challenge" id="toc-the-challenge" class="nav-link" data-scroll-target="#the-challenge">The Challenge</a></li>
546 <li><a href="#what-is-atdata" id="toc-what-is-atdata" class="nav-link" data-scroll-target="#what-is-atdata">What is atdata?</a></li>
547 <li><a href="#the-architecture" id="toc-the-architecture" class="nav-link" data-scroll-target="#the-architecture">The Architecture</a></li>
548 <li><a href="#installation" id="toc-installation" class="nav-link" data-scroll-target="#installation">Installation</a></li>
549 <li><a href="#quick-example" id="toc-quick-example" class="nav-link" data-scroll-target="#quick-example">Quick Example</a>
550 <ul class="collapse">
551 <li><a href="#define-a-sample-type" id="toc-define-a-sample-type" class="nav-link" data-scroll-target="#define-a-sample-type">1. Define a Sample Type</a></li>
552 <li><a href="#create-and-write-samples" id="toc-create-and-write-samples" class="nav-link" data-scroll-target="#create-and-write-samples">2. Create and Write Samples</a></li>
553 <li><a href="#load-and-iterate-with-type-safety" id="toc-load-and-iterate-with-type-safety" class="nav-link" data-scroll-target="#load-and-iterate-with-type-safety">3. Load and Iterate with Type Safety</a></li>
554 </ul></li>
555 <li><a href="#scaling-up" id="toc-scaling-up" class="nav-link" data-scroll-target="#scaling-up">Scaling Up</a>
556 <ul class="collapse">
557 <li><a href="#team-storage-with-redis-s3" id="toc-team-storage-with-redis-s3" class="nav-link" data-scroll-target="#team-storage-with-redis-s3">Team Storage with Redis + S3</a></li>
558 <li><a href="#federation-with-atproto" id="toc-federation-with-atproto" class="nav-link" data-scroll-target="#federation-with-atproto">Federation with ATProto</a></li>
559 </ul></li>
560 <li><a href="#huggingface-style-loading" id="toc-huggingface-style-loading" class="nav-link" data-scroll-target="#huggingface-style-loading">HuggingFace-Style Loading</a></li>
561 <li><a href="#why-atdata" id="toc-why-atdata" class="nav-link" data-scroll-target="#why-atdata">Why atdata?</a></li>
562 <li><a href="#next-steps" id="toc-next-steps" class="nav-link" data-scroll-target="#next-steps">Next Steps</a></li>
563 </ul>
564<div class="toc-actions"><ul><li><a href="https://github.com/your-org/atdata/edit/main/index.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav>
565 </div>
566<!-- main -->
567<main class="content" id="quarto-document-content">
568
569
570<header id="title-block-header" class="quarto-title-block default">
571<div class="quarto-title">
572<h1 class="title">atdata</h1>
573<p class="subtitle lead">A loose federation of distributed, typed datasets built on WebDataset</p>
574</div>
575
576
577
578<div class="quarto-title-meta">
579
580
581
582
583 </div>
584
585
586
587</header>
588
589
590<section id="atdata" class="level1 hero">
591<h1>atdata</h1>
592<p>A loose federation of distributed, typed datasets built on WebDataset.</p>
593<p><a href="./tutorials/quickstart.html" class="btn btn-primary btn-lg">Get Started</a> <a href="https://github.com/your-org/atdata" class="btn btn-outline-secondary btn-lg">View on GitHub</a></p>
594</section>
595<section id="the-challenge" class="level2">
596<h2 class="anchored" data-anchor-id="the-challenge">The Challenge</h2>
597<p>Machine learning datasets are everywhere—training data, validation sets, embeddings, features, model outputs. Yet working with them often means:</p>
598<ul>
599<li><strong>Runtime surprises</strong>: Discovering a field is missing or has the wrong type during training</li>
600<li><strong>Copy-paste schemas</strong>: Redefining the same sample structure across notebooks and scripts</li>
601<li><strong>Storage silos</strong>: Data stuck in one location, invisible to collaborators</li>
602<li><strong>Discovery friction</strong>: No standard way to find datasets across teams or organizations</li>
603</ul>
604<p>atdata solves these problems with a simple idea: <strong>typed, serializable samples</strong> that flow seamlessly from local development to team storage to federated sharing.</p>
605</section>
606<section id="what-is-atdata" class="level2">
607<h2 class="anchored" data-anchor-id="what-is-atdata">What is atdata?</h2>
608<p>atdata is a Python library that combines:</p>
609<div class="feature-cards">
610<section id="typed-samples" class="level3 feature-card">
611<h3 class="anchored" data-anchor-id="typed-samples">Typed Samples</h3>
612<p>Define dataclass-based sample types with automatic msgpack serialization. Catch schema errors at definition time, not training time.</p>
613</section>
614<section id="efficient-storage" class="level3 feature-card">
615<h3 class="anchored" data-anchor-id="efficient-storage">Efficient Storage</h3>
616<p>Built on WebDataset’s proven tar-based format. Stream large datasets without downloading everything first.</p>
617</section>
618<section id="lens-transformations" class="level3 feature-card">
619<h3 class="anchored" data-anchor-id="lens-transformations">Lens Transformations</h3>
620<p>View datasets through different schemas without duplicating data. Perfect for feature extraction, schema migration, and multi-task learning.</p>
621</section>
622<section id="batch-aggregation" class="level3 feature-card">
623<h3 class="anchored" data-anchor-id="batch-aggregation">Batch Aggregation</h3>
624<p>Automatic numpy stacking for NDArray fields. No more manual collation code—just iterate and train.</p>
625</section>
626<section id="team-storage" class="level3 feature-card">
627<h3 class="anchored" data-anchor-id="team-storage">Team Storage</h3>
628<p>Redis + S3 backend for shared dataset indexes. Publish schemas, track versions, and enable team discovery.</p>
629</section>
630<section id="atproto-federation" class="level3 feature-card">
631<h3 class="anchored" data-anchor-id="atproto-federation">ATProto Federation</h3>
632<p>Publish datasets to the decentralized AT Protocol network. Enable cross-organization discovery without centralized infrastructure.</p>
633</section>
634</div>
635</section>
636<section id="the-architecture" class="level2">
637<h2 class="anchored" data-anchor-id="the-architecture">The Architecture</h2>
638<p>atdata provides a three-layer progression for your datasets:</p>
639<pre><code>┌─────────────────────────────────────────────────────────────┐
640│ Federation: ATProto Atmosphere │
641│ Decentralized discovery, cross-org sharing │
642└─────────────────────────────────────────────────────────────┘
643 ↑ promote
644┌─────────────────────────────────────────────────────────────┐
645│ Team Storage: Redis + S3 │
646│ Shared index, versioned schemas, S3 data │
647└─────────────────────────────────────────────────────────────┘
648 ↑ insert
649┌─────────────────────────────────────────────────────────────┐
650│ Local Development │
651│ Typed samples, WebDataset files, fast iteration │
652└─────────────────────────────────────────────────────────────┘</code></pre>
653<p>Start local, scale to your team, and optionally share with the world—all with the same sample types and consistent APIs.</p>
654</section>
655<section id="installation" class="level2">
656<h2 class="anchored" data-anchor-id="installation">Installation</h2>
657<div class="install-box">
658<div class="sourceCode" id="cb2"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install atdata</span>
659<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a></span>
660<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="co"># With ATProto support</span></span>
661<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install atdata<span class="pp">[</span><span class="ss">atmosphere</span><span class="pp">]</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
662</div>
663</section>
664<section id="quick-example" class="level2">
665<h2 class="anchored" data-anchor-id="quick-example">Quick Example</h2>
666<section id="define-a-sample-type" class="level3">
667<h3 class="anchored" data-anchor-id="define-a-sample-type">1. Define a Sample Type</h3>
668<p>The <code>@packable</code> decorator creates a serializable dataclass:</p>
669<div id="bde2db89" class="cell">
670<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
671<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span>
672<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span>
673<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a></span>
674<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span>
675<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> ImageSample:</span>
676<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a> image: NDArray <span class="co"># Automatically handled as bytes</span></span>
677<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">str</span></span>
678<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> confidence: <span class="bu">float</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
679</div>
680</section>
681<section id="create-and-write-samples" class="level3">
682<h3 class="anchored" data-anchor-id="create-and-write-samples">2. Create and Write Samples</h3>
683<p>Use WebDataset’s standard TarWriter:</p>
684<div id="4fadf976" class="cell">
685<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span>
686<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a></span>
687<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>samples <span class="op">=</span> [</span>
688<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> ImageSample(</span>
689<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> image<span class="op">=</span>np.random.rand(<span class="dv">224</span>, <span class="dv">224</span>, <span class="dv">3</span>).astype(np.float32),</span>
690<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a> label<span class="op">=</span><span class="st">"cat"</span>,</span>
691<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a> confidence<span class="op">=</span><span class="fl">0.95</span>,</span>
692<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a> )</span>
693<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> _ <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">100</span>)</span>
694<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a>]</span>
695<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a></span>
696<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> wds.writer.TarWriter(<span class="st">"data-000000.tar"</span>) <span class="im">as</span> sink:</span>
697<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i, sample <span class="kw">in</span> <span class="bu">enumerate</span>(samples):</span>
698<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a> sink.write({<span class="op">**</span>sample.as_wds, <span class="st">"__key__"</span>: <span class="ss">f"sample_</span><span class="sc">{</span>i<span class="sc">:06d}</span><span class="ss">"</span>})</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
699</div>
700</section>
701<section id="load-and-iterate-with-type-safety" class="level3">
702<h3 class="anchored" data-anchor-id="load-and-iterate-with-type-safety">3. Load and Iterate with Type Safety</h3>
703<p>The generic <code>Dataset[T]</code> provides typed access:</p>
704<div id="908f965e" class="cell">
705<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"data-000000.tar"</span>)</span>
706<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span>
707<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.shuffled(batch_size<span class="op">=</span><span class="dv">32</span>):</span>
708<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a> images <span class="op">=</span> batch.image <span class="co"># numpy array (32, 224, 224, 3)</span></span>
709<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a> labels <span class="op">=</span> batch.label <span class="co"># list of 32 strings</span></span>
710<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a> confs <span class="op">=</span> batch.confidence <span class="co"># list of 32 floats</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
711</div>
712</section>
713</section>
714<section id="scaling-up" class="level2">
715<h2 class="anchored" data-anchor-id="scaling-up">Scaling Up</h2>
716<section id="team-storage-with-redis-s3" class="level3">
717<h3 class="anchored" data-anchor-id="team-storage-with-redis-s3">Team Storage with Redis + S3</h3>
718<p>When you’re ready to share with your team:</p>
719<div id="ac419917" class="cell">
720<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> LocalIndex, S3DataStore</span>
721<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a></span>
722<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Connect to team infrastructure</span></span>
723<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> S3DataStore(</span>
724<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> credentials<span class="op">=</span>{<span class="st">"AWS_ENDPOINT"</span>: <span class="st">"http://localhost:9000"</span>, ...},</span>
725<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a> bucket<span class="op">=</span><span class="st">"team-datasets"</span>,</span>
726<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>)</span>
727<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> LocalIndex(data_store<span class="op">=</span>store)</span>
728<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a></span>
729<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Publish schema for consistency</span></span>
730<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a>index.publish_schema(ImageSample, version<span class="op">=</span><span class="st">"1.0.0"</span>)</span>
731<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a></span>
732<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Insert dataset (writes to S3, indexes in Redis)</span></span>
733<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"data.tar"</span>)</span>
734<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(dataset, name<span class="op">=</span><span class="st">"training-images-v1"</span>)</span>
735<span id="cb6-16"><a href="#cb6-16" aria-hidden="true" tabindex="-1"></a></span>
736<span id="cb6-17"><a href="#cb6-17" aria-hidden="true" tabindex="-1"></a><span class="co"># Team members can now discover and load</span></span>
737<span id="cb6-18"><a href="#cb6-18" aria-hidden="true" tabindex="-1"></a><span class="co"># ds = atdata.load_dataset("@local/training-images-v1", index=index)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
738</div>
739</section>
740<section id="federation-with-atproto" class="level3">
741<h3 class="anchored" data-anchor-id="federation-with-atproto">Federation with ATProto</h3>
742<p>For public or cross-organization sharing:</p>
743<div id="de1b2de2" class="cell">
744<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient, AtmosphereIndex, PDSBlobStore</span>
745<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.promote <span class="im">import</span> promote_to_atmosphere</span>
746<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a></span>
747<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Authenticate with your ATProto identity</span></span>
748<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span>
749<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"handle.bsky.social"</span>, <span class="st">"app-password"</span>)</span>
750<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a></span>
751<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Option 1: Promote existing local dataset</span></span>
752<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.get_dataset(<span class="st">"training-images-v1"</span>)</span>
753<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>at_uri <span class="op">=</span> promote_to_atmosphere(entry, index, client)</span>
754<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a></span>
755<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Option 2: Publish directly with blob storage</span></span>
756<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> PDSBlobStore(client)</span>
757<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a>atm_index <span class="op">=</span> AtmosphereIndex(client, data_store<span class="op">=</span>store)</span>
758<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a>atm_index.insert_dataset(dataset, name<span class="op">=</span><span class="st">"public-images"</span>, schema_ref<span class="op">=</span>schema_uri)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
759</div>
760</section>
761</section>
762<section id="huggingface-style-loading" class="level2">
763<h2 class="anchored" data-anchor-id="huggingface-style-loading">HuggingFace-Style Loading</h2>
764<p>For convenient access to datasets:</p>
765<div id="b7319285" class="cell">
766<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata <span class="im">import</span> load_dataset</span>
767<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span>
768<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Load from local files</span></span>
769<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"path/to/data-{000000..000009}.tar"</span>)</span>
770<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a></span>
771<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Load with split detection</span></span>
772<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a>ds_dict <span class="op">=</span> load_dataset(<span class="st">"path/to/data/"</span>)</span>
773<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a>train_ds <span class="op">=</span> ds_dict[<span class="st">"train"</span>]</span>
774<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a>test_ds <span class="op">=</span> ds_dict[<span class="st">"test"</span>]</span>
775<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a></span>
776<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a><span class="co"># Load from index</span></span>
777<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"@local/my-dataset"</span>, index<span class="op">=</span>index)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
778</div>
779</section>
780<section id="why-atdata" class="level2">
781<h2 class="anchored" data-anchor-id="why-atdata">Why atdata?</h2>
782<table class="caption-top table">
783<colgroup>
784<col style="width: 37%">
785<col style="width: 62%">
786</colgroup>
787<thead>
788<tr class="header">
789<th>Need</th>
790<th>Solution</th>
791</tr>
792</thead>
793<tbody>
794<tr class="odd">
795<td>Type-safe samples</td>
796<td><code>@packable</code> decorator, <code>PackableSample</code> base class</td>
797</tr>
798<tr class="even">
799<td>Efficient large-scale storage</td>
800<td>WebDataset tar format, streaming iteration</td>
801</tr>
802<tr class="odd">
803<td>Schema flexibility</td>
804<td>Lens transformations, <code>DictSample</code> for exploration</td>
805</tr>
806<tr class="even">
807<td>Team collaboration</td>
808<td>Redis index, S3 data store, schema registry</td>
809</tr>
810<tr class="odd">
811<td>Public sharing</td>
812<td>ATProto federation, content-addressable CIDs</td>
813</tr>
814<tr class="even">
815<td>Multiple backends</td>
816<td>Protocol abstractions (<code>AbstractIndex</code>, <code>DataSource</code>)</td>
817</tr>
818</tbody>
819</table>
820</section>
821<section id="next-steps" class="level2">
822<h2 class="anchored" data-anchor-id="next-steps">Next Steps</h2>
823<div class="callout callout-style-default callout-tip callout-titled">
824<div class="callout-header d-flex align-content-center">
825<div class="callout-icon-container">
826<i class="callout-icon"></i>
827</div>
828<div class="callout-title-container flex-fill">
829Getting Started
830</div>
831</div>
832<div class="callout-body-container callout-body">
833<p><strong>New to atdata?</strong> Start with the <a href="./tutorials/quickstart.html">Quick Start Tutorial</a> to learn the basics of typed samples and datasets.</p>
834</div>
835</div>
836<ul>
837<li><strong><a href="./reference/architecture.html">Architecture Overview</a></strong> - Understand the design and how components fit together</li>
838<li><strong><a href="./tutorials/local-workflow.html">Local Workflow</a></strong> - Set up team storage with Redis + S3</li>
839<li><strong><a href="./tutorials/atmosphere.html">Atmosphere Publishing</a></strong> - Share datasets on the ATProto network</li>
840<li><strong><a href="./reference/packable-samples.html">Packable Samples</a></strong> - Deep dive into sample type definitions</li>
841<li><strong><a href="./reference/datasets.html">Datasets</a></strong> - Master iteration, batching, and transformations</li>
842</ul>
843
844
845</section>
846
847</main> <!-- /main -->
848<script id="quarto-html-after-body" type="application/javascript">
849 window.document.addEventListener("DOMContentLoaded", function (event) {
850 // Ensure there is a toggle, if there isn't float one in the top right
851 if (window.document.querySelector('.quarto-color-scheme-toggle') === null) {
852 const a = window.document.createElement('a');
853 a.classList.add('top-right');
854 a.classList.add('quarto-color-scheme-toggle');
855 a.href = "";
856 a.onclick = function() { try { window.quartoToggleColorScheme(); } catch {} return false; };
857 const i = window.document.createElement("i");
858 i.classList.add('bi');
859 a.appendChild(i);
860 window.document.body.appendChild(a);
861 }
862 setColorSchemeToggle(hasAlternateSentinel())
863 const icon = "";
864 const anchorJS = new window.AnchorJS();
865 anchorJS.options = {
866 placement: 'right',
867 icon: icon
868 };
869 anchorJS.add('.anchored');
870 const isCodeAnnotation = (el) => {
871 for (const clz of el.classList) {
872 if (clz.startsWith('code-annotation-')) {
873 return true;
874 }
875 }
876 return false;
877 }
878 const onCopySuccess = function(e) {
879 // button target
880 const button = e.trigger;
881 // don't keep focus
882 button.blur();
883 // flash "checked"
884 button.classList.add('code-copy-button-checked');
885 var currentTitle = button.getAttribute("title");
886 button.setAttribute("title", "Copied!");
887 let tooltip;
888 if (window.bootstrap) {
889 button.setAttribute("data-bs-toggle", "tooltip");
890 button.setAttribute("data-bs-placement", "left");
891 button.setAttribute("data-bs-title", "Copied!");
892 tooltip = new bootstrap.Tooltip(button,
893 { trigger: "manual",
894 customClass: "code-copy-button-tooltip",
895 offset: [0, -8]});
896 tooltip.show();
897 }
898 setTimeout(function() {
899 if (tooltip) {
900 tooltip.hide();
901 button.removeAttribute("data-bs-title");
902 button.removeAttribute("data-bs-toggle");
903 button.removeAttribute("data-bs-placement");
904 }
905 button.setAttribute("title", currentTitle);
906 button.classList.remove('code-copy-button-checked');
907 }, 1000);
908 // clear code selection
909 e.clearSelection();
910 }
911 const getTextToCopy = function(trigger) {
912 const codeEl = trigger.previousElementSibling.cloneNode(true);
913 for (const childEl of codeEl.children) {
914 if (isCodeAnnotation(childEl)) {
915 childEl.remove();
916 }
917 }
918 return codeEl.innerText;
919 }
920 const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
921 text: getTextToCopy
922 });
923 clipboard.on('success', onCopySuccess);
924 if (window.document.getElementById('quarto-embedded-source-code-modal')) {
925 const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
926 text: getTextToCopy,
927 container: window.document.getElementById('quarto-embedded-source-code-modal')
928 });
929 clipboardModal.on('success', onCopySuccess);
930 }
931 var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
932 var mailtoRegex = new RegExp(/^mailto:/);
933 var filterRegex = new RegExp("https:\/\/github\.com\/your-org\/atdata");
934 var isInternal = (href) => {
935 return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
936 }
937 // Inspect non-navigation links and adorn them if external
938 var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
939 for (var i=0; i<links.length; i++) {
940 const link = links[i];
941 if (!isInternal(link.href)) {
942 // undo the damage that might have been done by quarto-nav.js in the case of
943 // links that we want to consider external
944 if (link.dataset.originalHref !== undefined) {
945 link.href = link.dataset.originalHref;
946 }
947 }
948 }
949 function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
950 const config = {
951 allowHTML: true,
952 maxWidth: 500,
953 delay: 100,
954 arrow: false,
955 appendTo: function(el) {
956 return el.parentElement;
957 },
958 interactive: true,
959 interactiveBorder: 10,
960 theme: 'quarto',
961 placement: 'bottom-start',
962 };
963 if (contentFn) {
964 config.content = contentFn;
965 }
966 if (onTriggerFn) {
967 config.onTrigger = onTriggerFn;
968 }
969 if (onUntriggerFn) {
970 config.onUntrigger = onUntriggerFn;
971 }
972 window.tippy(el, config);
973 }
974 const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
975 for (var i=0; i<noterefs.length; i++) {
976 const ref = noterefs[i];
977 tippyHover(ref, function() {
978 // use id or data attribute instead here
979 let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
980 try { href = new URL(href).hash; } catch {}
981 const id = href.replace(/^#\/?/, "");
982 const note = window.document.getElementById(id);
983 if (note) {
984 return note.innerHTML;
985 } else {
986 return "";
987 }
988 });
989 }
990 const xrefs = window.document.querySelectorAll('a.quarto-xref');
991 const processXRef = (id, note) => {
992 // Strip column container classes
993 const stripColumnClz = (el) => {
994 el.classList.remove("page-full", "page-columns");
995 if (el.children) {
996 for (const child of el.children) {
997 stripColumnClz(child);
998 }
999 }
1000 }
1001 stripColumnClz(note)
1002 if (id === null || id.startsWith('sec-')) {
1003 // Special case sections, only their first couple elements
1004 const container = document.createElement("div");
1005 if (note.children && note.children.length > 2) {
1006 container.appendChild(note.children[0].cloneNode(true));
1007 for (let i = 1; i < note.children.length; i++) {
1008 const child = note.children[i];
1009 if (child.tagName === "P" && child.innerText === "") {
1010 continue;
1011 } else {
1012 container.appendChild(child.cloneNode(true));
1013 break;
1014 }
1015 }
1016 if (window.Quarto?.typesetMath) {
1017 window.Quarto.typesetMath(container);
1018 }
1019 return container.innerHTML
1020 } else {
1021 if (window.Quarto?.typesetMath) {
1022 window.Quarto.typesetMath(note);
1023 }
1024 return note.innerHTML;
1025 }
1026 } else {
1027 // Remove any anchor links if they are present
1028 const anchorLink = note.querySelector('a.anchorjs-link');
1029 if (anchorLink) {
1030 anchorLink.remove();
1031 }
1032 if (window.Quarto?.typesetMath) {
1033 window.Quarto.typesetMath(note);
1034 }
1035 if (note.classList.contains("callout")) {
1036 return note.outerHTML;
1037 } else {
1038 return note.innerHTML;
1039 }
1040 }
1041 }
1042 for (var i=0; i<xrefs.length; i++) {
1043 const xref = xrefs[i];
1044 tippyHover(xref, undefined, function(instance) {
1045 instance.disable();
1046 let url = xref.getAttribute('href');
1047 let hash = undefined;
1048 if (url.startsWith('#')) {
1049 hash = url;
1050 } else {
1051 try { hash = new URL(url).hash; } catch {}
1052 }
1053 if (hash) {
1054 const id = hash.replace(/^#\/?/, "");
1055 const note = window.document.getElementById(id);
1056 if (note !== null) {
1057 try {
1058 const html = processXRef(id, note.cloneNode(true));
1059 instance.setContent(html);
1060 } finally {
1061 instance.enable();
1062 instance.show();
1063 }
1064 } else {
1065 // See if we can fetch this
1066 fetch(url.split('#')[0])
1067 .then(res => res.text())
1068 .then(html => {
1069 const parser = new DOMParser();
1070 const htmlDoc = parser.parseFromString(html, "text/html");
1071 const note = htmlDoc.getElementById(id);
1072 if (note !== null) {
1073 const html = processXRef(id, note);
1074 instance.setContent(html);
1075 }
1076 }).finally(() => {
1077 instance.enable();
1078 instance.show();
1079 });
1080 }
1081 } else {
1082 // See if we can fetch a full url (with no hash to target)
1083 // This is a special case and we should probably do some content thinning / targeting
1084 fetch(url)
1085 .then(res => res.text())
1086 .then(html => {
1087 const parser = new DOMParser();
1088 const htmlDoc = parser.parseFromString(html, "text/html");
1089 const note = htmlDoc.querySelector('main.content');
1090 if (note !== null) {
1091 // This should only happen for chapter cross references
1092 // (since there is no id in the URL)
1093 // remove the first header
1094 if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
1095 note.children[0].remove();
1096 }
1097 const html = processXRef(null, note);
1098 instance.setContent(html);
1099 }
1100 }).finally(() => {
1101 instance.enable();
1102 instance.show();
1103 });
1104 }
1105 }, function(instance) {
1106 });
1107 }
1108 let selectedAnnoteEl;
1109 const selectorForAnnotation = ( cell, annotation) => {
1110 let cellAttr = 'data-code-cell="' + cell + '"';
1111 let lineAttr = 'data-code-annotation="' + annotation + '"';
1112 const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
1113 return selector;
1114 }
1115 const selectCodeLines = (annoteEl) => {
1116 const doc = window.document;
1117 const targetCell = annoteEl.getAttribute("data-target-cell");
1118 const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
1119 const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
1120 const lines = annoteSpan.getAttribute("data-code-lines").split(",");
1121 const lineIds = lines.map((line) => {
1122 return targetCell + "-" + line;
1123 })
1124 let top = null;
1125 let height = null;
1126 let parent = null;
1127 if (lineIds.length > 0) {
1128 //compute the position of the single el (top and bottom and make a div)
1129 const el = window.document.getElementById(lineIds[0]);
1130 top = el.offsetTop;
1131 height = el.offsetHeight;
1132 parent = el.parentElement.parentElement;
1133 if (lineIds.length > 1) {
1134 const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
1135 const bottom = lastEl.offsetTop + lastEl.offsetHeight;
1136 height = bottom - top;
1137 }
1138 if (top !== null && height !== null && parent !== null) {
1139 // cook up a div (if necessary) and position it
1140 let div = window.document.getElementById("code-annotation-line-highlight");
1141 if (div === null) {
1142 div = window.document.createElement("div");
1143 div.setAttribute("id", "code-annotation-line-highlight");
1144 div.style.position = 'absolute';
1145 parent.appendChild(div);
1146 }
1147 div.style.top = top - 2 + "px";
1148 div.style.height = height + 4 + "px";
1149 div.style.left = 0;
1150 let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
1151 if (gutterDiv === null) {
1152 gutterDiv = window.document.createElement("div");
1153 gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
1154 gutterDiv.style.position = 'absolute';
1155 const codeCell = window.document.getElementById(targetCell);
1156 const gutter = codeCell.querySelector('.code-annotation-gutter');
1157 gutter.appendChild(gutterDiv);
1158 }
1159 gutterDiv.style.top = top - 2 + "px";
1160 gutterDiv.style.height = height + 4 + "px";
1161 }
1162 selectedAnnoteEl = annoteEl;
1163 }
1164 };
1165 const unselectCodeLines = () => {
1166 const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
1167 elementsIds.forEach((elId) => {
1168 const div = window.document.getElementById(elId);
1169 if (div) {
1170 div.remove();
1171 }
1172 });
1173 selectedAnnoteEl = undefined;
1174 };
1175 // Handle positioning of the toggle
1176 window.addEventListener(
1177 "resize",
1178 throttle(() => {
1179 elRect = undefined;
1180 if (selectedAnnoteEl) {
1181 selectCodeLines(selectedAnnoteEl);
1182 }
1183 }, 10)
1184 );
1185 function throttle(fn, ms) {
1186 let throttle = false;
1187 let timer;
1188 return (...args) => {
1189 if(!throttle) { // first call gets through
1190 fn.apply(this, args);
1191 throttle = true;
1192 } else { // all the others get throttled
1193 if(timer) clearTimeout(timer); // cancel #2
1194 timer = setTimeout(() => {
1195 fn.apply(this, args);
1196 timer = throttle = false;
1197 }, ms);
1198 }
1199 };
1200 }
1201 // Attach click handler to the DT
1202 const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
1203 for (const annoteDlNode of annoteDls) {
1204 annoteDlNode.addEventListener('click', (event) => {
1205 const clickedEl = event.target;
1206 if (clickedEl !== selectedAnnoteEl) {
1207 unselectCodeLines();
1208 const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
1209 if (activeEl) {
1210 activeEl.classList.remove('code-annotation-active');
1211 }
1212 selectCodeLines(clickedEl);
1213 clickedEl.classList.add('code-annotation-active');
1214 } else {
1215 // Unselect the line
1216 unselectCodeLines();
1217 clickedEl.classList.remove('code-annotation-active');
1218 }
1219 });
1220 }
1221 const findCites = (el) => {
1222 const parentEl = el.parentElement;
1223 if (parentEl) {
1224 const cites = parentEl.dataset.cites;
1225 if (cites) {
1226 return {
1227 el,
1228 cites: cites.split(' ')
1229 };
1230 } else {
1231 return findCites(el.parentElement)
1232 }
1233 } else {
1234 return undefined;
1235 }
1236 };
1237 var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
1238 for (var i=0; i<bibliorefs.length; i++) {
1239 const ref = bibliorefs[i];
1240 const citeInfo = findCites(ref);
1241 if (citeInfo) {
1242 tippyHover(citeInfo.el, function() {
1243 var popup = window.document.createElement('div');
1244 citeInfo.cites.forEach(function(cite) {
1245 var citeDiv = window.document.createElement('div');
1246 citeDiv.classList.add('hanging-indent');
1247 citeDiv.classList.add('csl-entry');
1248 var biblioDiv = window.document.getElementById('ref-' + cite);
1249 if (biblioDiv) {
1250 citeDiv.innerHTML = biblioDiv.innerHTML;
1251 }
1252 popup.appendChild(citeDiv);
1253 });
1254 return popup.innerHTML;
1255 });
1256 }
1257 }
1258 });
1259 </script>
1260</div> <!-- /content -->
1261<footer class="footer">
1262 <div class="nav-footer">
1263 <div class="nav-footer-left">
1264<p>Built with <a href="https://quarto.org/">Quarto</a></p>
1265</div>
1266 <div class="nav-footer-center">
1267
1268 <div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/your-org/atdata/edit/main/index.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></div>
1269 <div class="nav-footer-right">
1270<p>MIT License</p>
1271</div>
1272 </div>
1273</footer>
1274
1275
1276
1277
1278</body></html>