A loose federation of distributed, typed datasets
1<!DOCTYPE html>
2<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
3
4<meta charset="utf-8">
5<meta name="generator" content="quarto-1.7.34">
6
7<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
8
9
10<title>abstractdatastore – atdata</title>
11<style>
12code{white-space: pre-wrap;}
13span.smallcaps{font-variant: small-caps;}
14div.columns{display: flex; gap: min(4vw, 1.5em);}
15div.column{flex: auto; overflow-x: auto;}
16div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
17ul.task-list{list-style: none;}
18ul.task-list li input[type="checkbox"] {
19 width: 0.8em;
20 margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
21 vertical-align: middle;
22}
23/* CSS for syntax highlighting */
24html { -webkit-text-size-adjust: 100%; }
25pre > code.sourceCode { white-space: pre; position: relative; }
26pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
27pre > code.sourceCode > span:empty { height: 1.2em; }
28.sourceCode { overflow: visible; }
29code.sourceCode > span { color: inherit; text-decoration: inherit; }
30div.sourceCode { margin: 1em 0; }
31pre.sourceCode { margin: 0; }
32@media screen {
33div.sourceCode { overflow: auto; }
34}
35@media print {
36pre > code.sourceCode { white-space: pre-wrap; }
37pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
38}
39pre.numberSource code
40 { counter-reset: source-line 0; }
41pre.numberSource code > span
42 { position: relative; left: -4em; counter-increment: source-line; }
43pre.numberSource code > span > a:first-child::before
44 { content: counter(source-line);
45 position: relative; left: -1em; text-align: right; vertical-align: baseline;
46 border: none; display: inline-block;
47 -webkit-touch-callout: none; -webkit-user-select: none;
48 -khtml-user-select: none; -moz-user-select: none;
49 -ms-user-select: none; user-select: none;
50 padding: 0 4px; width: 4em;
51 }
52pre.numberSource { margin-left: 3em; padding-left: 4px; }
53div.sourceCode
54 { }
55@media screen {
56pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
57}
58</style>
59
60
61<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
62<script src="../site_libs/quarto-nav/headroom.min.js"></script>
63<script src="../site_libs/clipboard/clipboard.min.js"></script>
64<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
65<script src="../site_libs/quarto-search/fuse.min.js"></script>
66<script src="../site_libs/quarto-search/quarto-search.js"></script>
67<meta name="quarto:offset" content="../">
68<script src="../site_libs/quarto-html/quarto.js" type="module"></script>
69<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
70<script src="../site_libs/quarto-html/popper.min.js"></script>
71<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
72<script src="../site_libs/quarto-html/anchor.min.js"></script>
73<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
74<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme" id="quarto-text-highlighting-styles">
75<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8dcd8563ea6803ab7cbb3d71ca5772e1.css" rel="stylesheet" class="quarto-color-scheme quarto-color-alternate" id="quarto-text-highlighting-styles">
76<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme-extra" id="quarto-text-highlighting-styles">
77<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
78<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
79<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme" id="quarto-bootstrap" data-mode="light">
80<link href="../site_libs/bootstrap/bootstrap-dark-7964ffd8887b0991fe8d71c6c8bc75d6.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme quarto-color-alternate" id="quarto-bootstrap" data-mode="dark">
81<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme-extra" id="quarto-bootstrap" data-mode="light">
82<script id="quarto-search-options" type="application/json">{
83 "location": "navbar",
84 "copy-button": false,
85 "collapse-after": 3,
86 "panel-placement": "end",
87 "type": "overlay",
88 "limit": 50,
89 "keyboard-shortcut": [
90 "f",
91 "/",
92 "s"
93 ],
94 "show-item-context": false,
95 "language": {
96 "search-no-results-text": "No results",
97 "search-matching-documents-text": "matching documents",
98 "search-copy-link-title": "Copy link to search",
99 "search-hide-matches-text": "Hide additional matches",
100 "search-more-match-text": "more match in this document",
101 "search-more-matches-text": "more matches in this document",
102 "search-clear-button-title": "Clear",
103 "search-text-placeholder": "",
104 "search-detached-cancel-button-title": "Cancel",
105 "search-submit-button-title": "Submit",
106 "search-label": "Search"
107 }
108}</script>
109
110
111<link rel="stylesheet" href="../assets/styles.css">
112</head>
113
114<body class="nav-fixed quarto-light"><script id="quarto-html-before-body" type="application/javascript">
115 const toggleBodyColorMode = (bsSheetEl) => {
116 const mode = bsSheetEl.getAttribute("data-mode");
117 const bodyEl = window.document.querySelector("body");
118 if (mode === "dark") {
119 bodyEl.classList.add("quarto-dark");
120 bodyEl.classList.remove("quarto-light");
121 } else {
122 bodyEl.classList.add("quarto-light");
123 bodyEl.classList.remove("quarto-dark");
124 }
125 }
126 const toggleBodyColorPrimary = () => {
127 const bsSheetEl = window.document.querySelector("link#quarto-bootstrap:not([rel=disabled-stylesheet])");
128 if (bsSheetEl) {
129 toggleBodyColorMode(bsSheetEl);
130 }
131 }
132 const setColorSchemeToggle = (alternate) => {
133 const toggles = window.document.querySelectorAll('.quarto-color-scheme-toggle');
134 for (let i=0; i < toggles.length; i++) {
135 const toggle = toggles[i];
136 if (toggle) {
137 if (alternate) {
138 toggle.classList.add("alternate");
139 } else {
140 toggle.classList.remove("alternate");
141 }
142 }
143 }
144 };
145 const toggleColorMode = (alternate) => {
146 // Switch the stylesheets
147 const primaryStylesheets = window.document.querySelectorAll('link.quarto-color-scheme:not(.quarto-color-alternate)');
148 const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate');
149 manageTransitions('#quarto-margin-sidebar .nav-link', false);
150 if (alternate) {
151 // note: dark is layered on light, we don't disable primary!
152 enableStylesheet(alternateStylesheets);
153 for (const sheetNode of alternateStylesheets) {
154 if (sheetNode.id === "quarto-bootstrap") {
155 toggleBodyColorMode(sheetNode);
156 }
157 }
158 } else {
159 disableStylesheet(alternateStylesheets);
160 enableStylesheet(primaryStylesheets)
161 toggleBodyColorPrimary();
162 }
163 manageTransitions('#quarto-margin-sidebar .nav-link', true);
164 // Switch the toggles
165 setColorSchemeToggle(alternate)
166 // Hack to workaround the fact that safari doesn't
167 // properly recolor the scrollbar when toggling (#1455)
168 if (navigator.userAgent.indexOf('Safari') > 0 && navigator.userAgent.indexOf('Chrome') == -1) {
169 manageTransitions("body", false);
170 window.scrollTo(0, 1);
171 setTimeout(() => {
172 window.scrollTo(0, 0);
173 manageTransitions("body", true);
174 }, 40);
175 }
176 }
177 const disableStylesheet = (stylesheets) => {
178 for (let i=0; i < stylesheets.length; i++) {
179 const stylesheet = stylesheets[i];
180 stylesheet.rel = 'disabled-stylesheet';
181 }
182 }
183 const enableStylesheet = (stylesheets) => {
184 for (let i=0; i < stylesheets.length; i++) {
185 const stylesheet = stylesheets[i];
186 if(stylesheet.rel !== 'stylesheet') { // for Chrome, which will still FOUC without this check
187 stylesheet.rel = 'stylesheet';
188 }
189 }
190 }
191 const manageTransitions = (selector, allowTransitions) => {
192 const els = window.document.querySelectorAll(selector);
193 for (let i=0; i < els.length; i++) {
194 const el = els[i];
195 if (allowTransitions) {
196 el.classList.remove('notransition');
197 } else {
198 el.classList.add('notransition');
199 }
200 }
201 }
202 const isFileUrl = () => {
203 return window.location.protocol === 'file:';
204 }
205 const hasAlternateSentinel = () => {
206 let styleSentinel = getColorSchemeSentinel();
207 if (styleSentinel !== null) {
208 return styleSentinel === "alternate";
209 } else {
210 return false;
211 }
212 }
213 const setStyleSentinel = (alternate) => {
214 const value = alternate ? "alternate" : "default";
215 if (!isFileUrl()) {
216 window.localStorage.setItem("quarto-color-scheme", value);
217 } else {
218 localAlternateSentinel = value;
219 }
220 }
221 const getColorSchemeSentinel = () => {
222 if (!isFileUrl()) {
223 const storageValue = window.localStorage.getItem("quarto-color-scheme");
224 return storageValue != null ? storageValue : localAlternateSentinel;
225 } else {
226 return localAlternateSentinel;
227 }
228 }
229 const toggleGiscusIfUsed = (isAlternate, darkModeDefault) => {
230 const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light';
231 const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark';
232 let newTheme = '';
233 if(authorPrefersDark) {
234 newTheme = isAlternate ? baseTheme : alternateTheme;
235 } else {
236 newTheme = isAlternate ? alternateTheme : baseTheme;
237 }
238 const changeGiscusTheme = () => {
239 // From: https://github.com/giscus/giscus/issues/336
240 const sendMessage = (message) => {
241 const iframe = document.querySelector('iframe.giscus-frame');
242 if (!iframe) return;
243 iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app');
244 }
245 sendMessage({
246 setConfig: {
247 theme: newTheme
248 }
249 });
250 }
251 const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null;
252 if (isGiscussLoaded) {
253 changeGiscusTheme();
254 }
255 };
256 const authorPrefersDark = false;
257 const darkModeDefault = authorPrefersDark;
258 document.querySelector('link#quarto-text-highlighting-styles.quarto-color-scheme-extra').rel = 'disabled-stylesheet';
259 document.querySelector('link#quarto-bootstrap.quarto-color-scheme-extra').rel = 'disabled-stylesheet';
260 let localAlternateSentinel = darkModeDefault ? 'alternate' : 'default';
261 // Dark / light mode switch
262 window.quartoToggleColorScheme = () => {
263 // Read the current dark / light value
264 let toAlternate = !hasAlternateSentinel();
265 toggleColorMode(toAlternate);
266 setStyleSentinel(toAlternate);
267 toggleGiscusIfUsed(toAlternate, darkModeDefault);
268 window.dispatchEvent(new Event('resize'));
269 };
270 // Switch to dark mode if need be
271 if (hasAlternateSentinel()) {
272 toggleColorMode(true);
273 } else {
274 toggleColorMode(false);
275 }
276 </script>
277
278<div id="quarto-search-results"></div>
279 <header id="quarto-header" class="headroom fixed-top">
280 <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
281 <div class="navbar-container container-fluid">
282 <div class="navbar-brand-container mx-auto">
283 <a class="navbar-brand" href="../index.html">
284 <span class="navbar-title">atdata</span>
285 </a>
286 </div>
287 <div id="quarto-search" class="" title="Search"></div>
288 <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" role="menu" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
289 <span class="navbar-toggler-icon"></span>
290</button>
291 <div class="collapse navbar-collapse" id="navbarCollapse">
292 <ul class="navbar-nav navbar-nav-scroll me-auto">
293 <li class="nav-item">
294 <a class="nav-link" href="../index.html">
295<span class="menu-text">Guide</span></a>
296 </li>
297 <li class="nav-item dropdown ">
298 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-tutorials" role="link" data-bs-toggle="dropdown" aria-expanded="false">
299 <span class="menu-text">Tutorials</span>
300 </a>
301 <ul class="dropdown-menu" aria-labelledby="nav-menu-tutorials">
302 <li>
303 <a class="dropdown-item" href="../tutorials/quickstart.html">
304 <span class="dropdown-text">Quick Start</span></a>
305 </li>
306 <li>
307 <a class="dropdown-item" href="../tutorials/local-workflow.html">
308 <span class="dropdown-text">Local Workflow</span></a>
309 </li>
310 <li>
311 <a class="dropdown-item" href="../tutorials/atmosphere.html">
312 <span class="dropdown-text">Atmosphere Publishing</span></a>
313 </li>
314 <li>
315 <a class="dropdown-item" href="../tutorials/promotion.html">
316 <span class="dropdown-text">Promotion Workflow</span></a>
317 </li>
318 </ul>
319 </li>
320 <li class="nav-item dropdown ">
321 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-reference" role="link" data-bs-toggle="dropdown" aria-expanded="false">
322 <span class="menu-text">Reference</span>
323 </a>
324 <ul class="dropdown-menu" aria-labelledby="nav-menu-reference">
325 <li>
326 <a class="dropdown-item" href="../reference/architecture.html">
327 <span class="dropdown-text">Architecture Overview</span></a>
328 </li>
329 <li>
330 <a class="dropdown-item" href="../reference/packable-samples.html">
331 <span class="dropdown-text">Packable Samples</span></a>
332 </li>
333 <li>
334 <a class="dropdown-item" href="../reference/datasets.html">
335 <span class="dropdown-text">Datasets</span></a>
336 </li>
337 <li>
338 <a class="dropdown-item" href="../reference/lenses.html">
339 <span class="dropdown-text">Lenses</span></a>
340 </li>
341 <li>
342 <a class="dropdown-item" href="../reference/local-storage.html">
343 <span class="dropdown-text">Local Storage</span></a>
344 </li>
345 <li>
346 <a class="dropdown-item" href="../reference/atmosphere.html">
347 <span class="dropdown-text">Atmosphere</span></a>
348 </li>
349 <li>
350 <a class="dropdown-item" href="../reference/promotion.html">
351 <span class="dropdown-text">Promotion</span></a>
352 </li>
353 <li>
354 <a class="dropdown-item" href="../reference/load-dataset.html">
355 <span class="dropdown-text">load_dataset API</span></a>
356 </li>
357 <li>
358 <a class="dropdown-item" href="../reference/protocols.html">
359 <span class="dropdown-text">Protocols</span></a>
360 </li>
361 <li>
362 <a class="dropdown-item" href="../reference/uri-spec.html">
363 <span class="dropdown-text">URI Specification</span></a>
364 </li>
365 <li>
366 <a class="dropdown-item" href="../reference/troubleshooting.html">
367 <span class="dropdown-text">Troubleshooting & FAQ</span></a>
368 </li>
369 <li>
370 <a class="dropdown-item" href="../reference/deployment.html">
371 <span class="dropdown-text">Deployment Guide</span></a>
372 </li>
373 </ul>
374 </li>
375 <li class="nav-item">
376 <a class="nav-link" href="../api/index.html">
377<span class="menu-text">API</span></a>
378 </li>
379</ul>
380 <ul class="navbar-nav navbar-nav-scroll ms-auto">
381 <li class="nav-item compact">
382 <a class="nav-link" href="https://github.com/your-org/atdata"> <i class="bi bi-github" role="img">
383</i>
384<span class="menu-text"></span></a>
385 </li>
386</ul>
387 </div> <!-- /navcollapse -->
388 <div class="quarto-navbar-tools">
389 <a href="" class="quarto-color-scheme-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleColorScheme(); return false;" title="Toggle dark mode"><i class="bi"></i></a>
390</div>
391 </div> <!-- /container-fluid -->
392 </nav>
393</header>
394<!-- content -->
395<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
396<!-- sidebar -->
397<!-- margin-sidebar -->
398 <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
399 <nav id="TOC" role="doc-toc" class="toc-active">
400 <h2 id="toc-title">On this page</h2>
401
402 <ul>
403 <li><a href="#atdata.AbstractDataStore" id="toc-atdata.AbstractDataStore" class="nav-link active" data-scroll-target="#atdata.AbstractDataStore">AbstractDataStore</a>
404 <ul class="collapse">
405 <li><a href="#examples" id="toc-examples" class="nav-link" data-scroll-target="#examples">Examples</a></li>
406 <li><a href="#methods" id="toc-methods" class="nav-link" data-scroll-target="#methods">Methods</a>
407 <ul class="collapse">
408 <li><a href="#atdata.AbstractDataStore.read_url" id="toc-atdata.AbstractDataStore.read_url" class="nav-link" data-scroll-target="#atdata.AbstractDataStore.read_url">read_url</a></li>
409 <li><a href="#atdata.AbstractDataStore.supports_streaming" id="toc-atdata.AbstractDataStore.supports_streaming" class="nav-link" data-scroll-target="#atdata.AbstractDataStore.supports_streaming">supports_streaming</a></li>
410 <li><a href="#atdata.AbstractDataStore.write_shards" id="toc-atdata.AbstractDataStore.write_shards" class="nav-link" data-scroll-target="#atdata.AbstractDataStore.write_shards">write_shards</a></li>
411 </ul></li>
412 </ul></li>
413 </ul>
414<div class="toc-actions"><ul><li><a href="https://github.com/your-org/atdata/edit/main/api/AbstractDataStore.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav>
415 </div>
416<!-- main -->
417<main class="content" id="quarto-document-content"><header id="title-block-header" class="quarto-title-block"></header>
418
419
420
421
422
423<section id="atdata.AbstractDataStore" class="level1">
424<h1>AbstractDataStore</h1>
425<div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>AbstractDataStore()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
426<p>Protocol for data storage operations.</p>
427<p>This protocol abstracts over different storage backends for dataset data: - S3DataStore: S3-compatible object storage - PDSBlobStore: ATProto PDS blob storage (future)</p>
428<p>The separation of index (metadata) from data store (actual files) allows flexible deployment: local index with S3 storage, atmosphere index with S3 storage, or atmosphere index with PDS blobs.</p>
429<section id="examples" class="level2 doc-section doc-section-examples">
430<h2 class="doc-section doc-section-examples anchored" data-anchor-id="examples">Examples</h2>
431<div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> store <span class="op">=</span> S3DataStore(credentials, bucket<span class="op">=</span><span class="st">"my-bucket"</span>)</span>
432<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> urls <span class="op">=</span> store.write_shards(dataset, prefix<span class="op">=</span><span class="st">"training/v1"</span>)</span>
433<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> <span class="bu">print</span>(urls)</span>
434<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>[<span class="st">'s3://my-bucket/training/v1/shard-000000.tar'</span>, ...]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
435</section>
436<section id="methods" class="level2">
437<h2 class="anchored" data-anchor-id="methods">Methods</h2>
438<table class="caption-top table">
439<thead>
440<tr class="header">
441<th>Name</th>
442<th>Description</th>
443</tr>
444</thead>
445<tbody>
446<tr class="odd">
447<td><a href="#atdata.AbstractDataStore.read_url">read_url</a></td>
448<td>Resolve a storage URL for reading.</td>
449</tr>
450<tr class="even">
451<td><a href="#atdata.AbstractDataStore.supports_streaming">supports_streaming</a></td>
452<td>Whether this store supports streaming reads.</td>
453</tr>
454<tr class="odd">
455<td><a href="#atdata.AbstractDataStore.write_shards">write_shards</a></td>
456<td>Write dataset shards to storage.</td>
457</tr>
458</tbody>
459</table>
460<section id="atdata.AbstractDataStore.read_url" class="level3">
461<h3 class="anchored" data-anchor-id="atdata.AbstractDataStore.read_url">read_url</h3>
462<div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>AbstractDataStore.read_url(url)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
463<p>Resolve a storage URL for reading.</p>
464<p>Some storage backends may need to transform URLs (e.g., signing S3 URLs or resolving blob references). This method returns a URL that can be used directly with WebDataset.</p>
465<section id="parameters" class="level4 doc-section doc-section-parameters">
466<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters">Parameters</h4>
467<table class="caption-top table">
468<thead>
469<tr class="header">
470<th>Name</th>
471<th>Type</th>
472<th>Description</th>
473<th>Default</th>
474</tr>
475</thead>
476<tbody>
477<tr class="odd">
478<td>url</td>
479<td><a href="`str`">str</a></td>
480<td>Storage URL to resolve.</td>
481<td><em>required</em></td>
482</tr>
483</tbody>
484</table>
485</section>
486<section id="returns" class="level4 doc-section doc-section-returns">
487<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns">Returns</h4>
488<table class="caption-top table">
489<thead>
490<tr class="header">
491<th>Name</th>
492<th>Type</th>
493<th>Description</th>
494</tr>
495</thead>
496<tbody>
497<tr class="odd">
498<td></td>
499<td><a href="`str`">str</a></td>
500<td>WebDataset-compatible URL for reading.</td>
501</tr>
502</tbody>
503</table>
504</section>
505</section>
506<section id="atdata.AbstractDataStore.supports_streaming" class="level3">
507<h3 class="anchored" data-anchor-id="atdata.AbstractDataStore.supports_streaming">supports_streaming</h3>
508<div class="sourceCode" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>AbstractDataStore.supports_streaming()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
509<p>Whether this store supports streaming reads.</p>
510<section id="returns-1" class="level4 doc-section doc-section-returns">
511<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-1">Returns</h4>
512<table class="caption-top table">
513<thead>
514<tr class="header">
515<th>Name</th>
516<th>Type</th>
517<th>Description</th>
518</tr>
519</thead>
520<tbody>
521<tr class="odd">
522<td></td>
523<td><a href="`bool`">bool</a></td>
524<td>True if the store supports efficient streaming (like S3),</td>
525</tr>
526<tr class="even">
527<td></td>
528<td><a href="`bool`">bool</a></td>
529<td>False if data must be fully downloaded first.</td>
530</tr>
531</tbody>
532</table>
533</section>
534</section>
535<section id="atdata.AbstractDataStore.write_shards" class="level3">
536<h3 class="anchored" data-anchor-id="atdata.AbstractDataStore.write_shards">write_shards</h3>
537<div class="sourceCode" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>AbstractDataStore.write_shards(ds, <span class="op">*</span>, prefix, <span class="op">**</span>kwargs)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
538<p>Write dataset shards to storage.</p>
539<section id="parameters-1" class="level4 doc-section doc-section-parameters">
540<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-1">Parameters</h4>
541<table class="caption-top table">
542<thead>
543<tr class="header">
544<th>Name</th>
545<th>Type</th>
546<th>Description</th>
547<th>Default</th>
548</tr>
549</thead>
550<tbody>
551<tr class="odd">
552<td>ds</td>
553<td><a href="`atdata.dataset.Dataset`">Dataset</a></td>
554<td>The Dataset to write.</td>
555<td><em>required</em></td>
556</tr>
557<tr class="even">
558<td>prefix</td>
559<td><a href="`str`">str</a></td>
560<td>Path prefix for the shards (e.g., ‘datasets/mnist/v1’).</td>
561<td><em>required</em></td>
562</tr>
563<tr class="odd">
564<td>**kwargs</td>
565<td></td>
566<td>Backend-specific options (e.g., maxcount for shard size).</td>
567<td><code>{}</code></td>
568</tr>
569</tbody>
570</table>
571</section>
572<section id="returns-2" class="level4 doc-section doc-section-returns">
573<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-2">Returns</h4>
574<table class="caption-top table">
575<thead>
576<tr class="header">
577<th>Name</th>
578<th>Type</th>
579<th>Description</th>
580</tr>
581</thead>
582<tbody>
583<tr class="odd">
584<td></td>
585<td><a href="`list`">list</a>[<a href="`str`">str</a>]</td>
586<td>List of URLs for the written shards, suitable for use with</td>
587</tr>
588<tr class="even">
589<td></td>
590<td><a href="`list`">list</a>[<a href="`str`">str</a>]</td>
591<td>WebDataset or atdata.Dataset().</td>
592</tr>
593</tbody>
594</table>
595
596
597</section>
598</section>
599</section>
600</section>
601
602</main> <!-- /main -->
603<script id="quarto-html-after-body" type="application/javascript">
604 window.document.addEventListener("DOMContentLoaded", function (event) {
605 // Ensure there is a toggle, if there isn't float one in the top right
606 if (window.document.querySelector('.quarto-color-scheme-toggle') === null) {
607 const a = window.document.createElement('a');
608 a.classList.add('top-right');
609 a.classList.add('quarto-color-scheme-toggle');
610 a.href = "";
611 a.onclick = function() { try { window.quartoToggleColorScheme(); } catch {} return false; };
612 const i = window.document.createElement("i");
613 i.classList.add('bi');
614 a.appendChild(i);
615 window.document.body.appendChild(a);
616 }
617 setColorSchemeToggle(hasAlternateSentinel())
618 const icon = "";
619 const anchorJS = new window.AnchorJS();
620 anchorJS.options = {
621 placement: 'right',
622 icon: icon
623 };
624 anchorJS.add('.anchored');
625 const isCodeAnnotation = (el) => {
626 for (const clz of el.classList) {
627 if (clz.startsWith('code-annotation-')) {
628 return true;
629 }
630 }
631 return false;
632 }
633 const onCopySuccess = function(e) {
634 // button target
635 const button = e.trigger;
636 // don't keep focus
637 button.blur();
638 // flash "checked"
639 button.classList.add('code-copy-button-checked');
640 var currentTitle = button.getAttribute("title");
641 button.setAttribute("title", "Copied!");
642 let tooltip;
643 if (window.bootstrap) {
644 button.setAttribute("data-bs-toggle", "tooltip");
645 button.setAttribute("data-bs-placement", "left");
646 button.setAttribute("data-bs-title", "Copied!");
647 tooltip = new bootstrap.Tooltip(button,
648 { trigger: "manual",
649 customClass: "code-copy-button-tooltip",
650 offset: [0, -8]});
651 tooltip.show();
652 }
653 setTimeout(function() {
654 if (tooltip) {
655 tooltip.hide();
656 button.removeAttribute("data-bs-title");
657 button.removeAttribute("data-bs-toggle");
658 button.removeAttribute("data-bs-placement");
659 }
660 button.setAttribute("title", currentTitle);
661 button.classList.remove('code-copy-button-checked');
662 }, 1000);
663 // clear code selection
664 e.clearSelection();
665 }
666 const getTextToCopy = function(trigger) {
667 const codeEl = trigger.previousElementSibling.cloneNode(true);
668 for (const childEl of codeEl.children) {
669 if (isCodeAnnotation(childEl)) {
670 childEl.remove();
671 }
672 }
673 return codeEl.innerText;
674 }
675 const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
676 text: getTextToCopy
677 });
678 clipboard.on('success', onCopySuccess);
679 if (window.document.getElementById('quarto-embedded-source-code-modal')) {
680 const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
681 text: getTextToCopy,
682 container: window.document.getElementById('quarto-embedded-source-code-modal')
683 });
684 clipboardModal.on('success', onCopySuccess);
685 }
686 var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
687 var mailtoRegex = new RegExp(/^mailto:/);
688 var filterRegex = new RegExp("https:\/\/github\.com\/your-org\/atdata");
689 var isInternal = (href) => {
690 return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
691 }
692 // Inspect non-navigation links and adorn them if external
693 var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
694 for (var i=0; i<links.length; i++) {
695 const link = links[i];
696 if (!isInternal(link.href)) {
697 // undo the damage that might have been done by quarto-nav.js in the case of
698 // links that we want to consider external
699 if (link.dataset.originalHref !== undefined) {
700 link.href = link.dataset.originalHref;
701 }
702 }
703 }
704 function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
705 const config = {
706 allowHTML: true,
707 maxWidth: 500,
708 delay: 100,
709 arrow: false,
710 appendTo: function(el) {
711 return el.parentElement;
712 },
713 interactive: true,
714 interactiveBorder: 10,
715 theme: 'quarto',
716 placement: 'bottom-start',
717 };
718 if (contentFn) {
719 config.content = contentFn;
720 }
721 if (onTriggerFn) {
722 config.onTrigger = onTriggerFn;
723 }
724 if (onUntriggerFn) {
725 config.onUntrigger = onUntriggerFn;
726 }
727 window.tippy(el, config);
728 }
729 const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
730 for (var i=0; i<noterefs.length; i++) {
731 const ref = noterefs[i];
732 tippyHover(ref, function() {
733 // use id or data attribute instead here
734 let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
735 try { href = new URL(href).hash; } catch {}
736 const id = href.replace(/^#\/?/, "");
737 const note = window.document.getElementById(id);
738 if (note) {
739 return note.innerHTML;
740 } else {
741 return "";
742 }
743 });
744 }
745 const xrefs = window.document.querySelectorAll('a.quarto-xref');
746 const processXRef = (id, note) => {
747 // Strip column container classes
748 const stripColumnClz = (el) => {
749 el.classList.remove("page-full", "page-columns");
750 if (el.children) {
751 for (const child of el.children) {
752 stripColumnClz(child);
753 }
754 }
755 }
756 stripColumnClz(note)
757 if (id === null || id.startsWith('sec-')) {
758 // Special case sections, only their first couple elements
759 const container = document.createElement("div");
760 if (note.children && note.children.length > 2) {
761 container.appendChild(note.children[0].cloneNode(true));
762 for (let i = 1; i < note.children.length; i++) {
763 const child = note.children[i];
764 if (child.tagName === "P" && child.innerText === "") {
765 continue;
766 } else {
767 container.appendChild(child.cloneNode(true));
768 break;
769 }
770 }
771 if (window.Quarto?.typesetMath) {
772 window.Quarto.typesetMath(container);
773 }
774 return container.innerHTML
775 } else {
776 if (window.Quarto?.typesetMath) {
777 window.Quarto.typesetMath(note);
778 }
779 return note.innerHTML;
780 }
781 } else {
782 // Remove any anchor links if they are present
783 const anchorLink = note.querySelector('a.anchorjs-link');
784 if (anchorLink) {
785 anchorLink.remove();
786 }
787 if (window.Quarto?.typesetMath) {
788 window.Quarto.typesetMath(note);
789 }
790 if (note.classList.contains("callout")) {
791 return note.outerHTML;
792 } else {
793 return note.innerHTML;
794 }
795 }
796 }
797 for (var i=0; i<xrefs.length; i++) {
798 const xref = xrefs[i];
799 tippyHover(xref, undefined, function(instance) {
800 instance.disable();
801 let url = xref.getAttribute('href');
802 let hash = undefined;
803 if (url.startsWith('#')) {
804 hash = url;
805 } else {
806 try { hash = new URL(url).hash; } catch {}
807 }
808 if (hash) {
809 const id = hash.replace(/^#\/?/, "");
810 const note = window.document.getElementById(id);
811 if (note !== null) {
812 try {
813 const html = processXRef(id, note.cloneNode(true));
814 instance.setContent(html);
815 } finally {
816 instance.enable();
817 instance.show();
818 }
819 } else {
820 // See if we can fetch this
821 fetch(url.split('#')[0])
822 .then(res => res.text())
823 .then(html => {
824 const parser = new DOMParser();
825 const htmlDoc = parser.parseFromString(html, "text/html");
826 const note = htmlDoc.getElementById(id);
827 if (note !== null) {
828 const html = processXRef(id, note);
829 instance.setContent(html);
830 }
831 }).finally(() => {
832 instance.enable();
833 instance.show();
834 });
835 }
836 } else {
837 // See if we can fetch a full url (with no hash to target)
838 // This is a special case and we should probably do some content thinning / targeting
839 fetch(url)
840 .then(res => res.text())
841 .then(html => {
842 const parser = new DOMParser();
843 const htmlDoc = parser.parseFromString(html, "text/html");
844 const note = htmlDoc.querySelector('main.content');
845 if (note !== null) {
846 // This should only happen for chapter cross references
847 // (since there is no id in the URL)
848 // remove the first header
849 if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
850 note.children[0].remove();
851 }
852 const html = processXRef(null, note);
853 instance.setContent(html);
854 }
855 }).finally(() => {
856 instance.enable();
857 instance.show();
858 });
859 }
860 }, function(instance) {
861 });
862 }
863 let selectedAnnoteEl;
864 const selectorForAnnotation = ( cell, annotation) => {
865 let cellAttr = 'data-code-cell="' + cell + '"';
866 let lineAttr = 'data-code-annotation="' + annotation + '"';
867 const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
868 return selector;
869 }
870 const selectCodeLines = (annoteEl) => {
871 const doc = window.document;
872 const targetCell = annoteEl.getAttribute("data-target-cell");
873 const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
874 const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
875 const lines = annoteSpan.getAttribute("data-code-lines").split(",");
876 const lineIds = lines.map((line) => {
877 return targetCell + "-" + line;
878 })
879 let top = null;
880 let height = null;
881 let parent = null;
882 if (lineIds.length > 0) {
883 //compute the position of the single el (top and bottom and make a div)
884 const el = window.document.getElementById(lineIds[0]);
885 top = el.offsetTop;
886 height = el.offsetHeight;
887 parent = el.parentElement.parentElement;
888 if (lineIds.length > 1) {
889 const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
890 const bottom = lastEl.offsetTop + lastEl.offsetHeight;
891 height = bottom - top;
892 }
893 if (top !== null && height !== null && parent !== null) {
894 // cook up a div (if necessary) and position it
895 let div = window.document.getElementById("code-annotation-line-highlight");
896 if (div === null) {
897 div = window.document.createElement("div");
898 div.setAttribute("id", "code-annotation-line-highlight");
899 div.style.position = 'absolute';
900 parent.appendChild(div);
901 }
902 div.style.top = top - 2 + "px";
903 div.style.height = height + 4 + "px";
904 div.style.left = 0;
905 let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
906 if (gutterDiv === null) {
907 gutterDiv = window.document.createElement("div");
908 gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
909 gutterDiv.style.position = 'absolute';
910 const codeCell = window.document.getElementById(targetCell);
911 const gutter = codeCell.querySelector('.code-annotation-gutter');
912 gutter.appendChild(gutterDiv);
913 }
914 gutterDiv.style.top = top - 2 + "px";
915 gutterDiv.style.height = height + 4 + "px";
916 }
917 selectedAnnoteEl = annoteEl;
918 }
919 };
920 const unselectCodeLines = () => {
921 const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
922 elementsIds.forEach((elId) => {
923 const div = window.document.getElementById(elId);
924 if (div) {
925 div.remove();
926 }
927 });
928 selectedAnnoteEl = undefined;
929 };
930 // Handle positioning of the toggle
931 window.addEventListener(
932 "resize",
933 throttle(() => {
934 elRect = undefined;
935 if (selectedAnnoteEl) {
936 selectCodeLines(selectedAnnoteEl);
937 }
938 }, 10)
939 );
940 function throttle(fn, ms) {
941 let throttle = false;
942 let timer;
943 return (...args) => {
944 if(!throttle) { // first call gets through
945 fn.apply(this, args);
946 throttle = true;
947 } else { // all the others get throttled
948 if(timer) clearTimeout(timer); // cancel #2
949 timer = setTimeout(() => {
950 fn.apply(this, args);
951 timer = throttle = false;
952 }, ms);
953 }
954 };
955 }
956 // Attach click handler to the DT
957 const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
958 for (const annoteDlNode of annoteDls) {
959 annoteDlNode.addEventListener('click', (event) => {
960 const clickedEl = event.target;
961 if (clickedEl !== selectedAnnoteEl) {
962 unselectCodeLines();
963 const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
964 if (activeEl) {
965 activeEl.classList.remove('code-annotation-active');
966 }
967 selectCodeLines(clickedEl);
968 clickedEl.classList.add('code-annotation-active');
969 } else {
970 // Unselect the line
971 unselectCodeLines();
972 clickedEl.classList.remove('code-annotation-active');
973 }
974 });
975 }
976 const findCites = (el) => {
977 const parentEl = el.parentElement;
978 if (parentEl) {
979 const cites = parentEl.dataset.cites;
980 if (cites) {
981 return {
982 el,
983 cites: cites.split(' ')
984 };
985 } else {
986 return findCites(el.parentElement)
987 }
988 } else {
989 return undefined;
990 }
991 };
992 var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
993 for (var i=0; i<bibliorefs.length; i++) {
994 const ref = bibliorefs[i];
995 const citeInfo = findCites(ref);
996 if (citeInfo) {
997 tippyHover(citeInfo.el, function() {
998 var popup = window.document.createElement('div');
999 citeInfo.cites.forEach(function(cite) {
1000 var citeDiv = window.document.createElement('div');
1001 citeDiv.classList.add('hanging-indent');
1002 citeDiv.classList.add('csl-entry');
1003 var biblioDiv = window.document.getElementById('ref-' + cite);
1004 if (biblioDiv) {
1005 citeDiv.innerHTML = biblioDiv.innerHTML;
1006 }
1007 popup.appendChild(citeDiv);
1008 });
1009 return popup.innerHTML;
1010 });
1011 }
1012 }
1013 });
1014 </script>
1015</div> <!-- /content -->
1016<footer class="footer">
1017 <div class="nav-footer">
1018 <div class="nav-footer-left">
1019<p>Built with <a href="https://quarto.org/">Quarto</a></p>
1020</div>
1021 <div class="nav-footer-center">
1022
1023 <div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/your-org/atdata/edit/main/api/AbstractDataStore.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></div>
1024 <div class="nav-footer-right">
1025<p>MIT License</p>
1026</div>
1027 </div>
1028</footer>
1029
1030
1031
1032
1033</body></html>