A loose federation of distributed, typed datasets
1<!DOCTYPE html>
2<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
3
4<meta charset="utf-8">
5<meta name="generator" content="quarto-1.7.34">
6
7<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
8
9
10<title>dataset – atdata</title>
11<style>
12code{white-space: pre-wrap;}
13span.smallcaps{font-variant: small-caps;}
14div.columns{display: flex; gap: min(4vw, 1.5em);}
15div.column{flex: auto; overflow-x: auto;}
16div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
17ul.task-list{list-style: none;}
18ul.task-list li input[type="checkbox"] {
19 width: 0.8em;
20 margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
21 vertical-align: middle;
22}
23/* CSS for syntax highlighting */
24html { -webkit-text-size-adjust: 100%; }
25pre > code.sourceCode { white-space: pre; position: relative; }
26pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
27pre > code.sourceCode > span:empty { height: 1.2em; }
28.sourceCode { overflow: visible; }
29code.sourceCode > span { color: inherit; text-decoration: inherit; }
30div.sourceCode { margin: 1em 0; }
31pre.sourceCode { margin: 0; }
32@media screen {
33div.sourceCode { overflow: auto; }
34}
35@media print {
36pre > code.sourceCode { white-space: pre-wrap; }
37pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
38}
39pre.numberSource code
40 { counter-reset: source-line 0; }
41pre.numberSource code > span
42 { position: relative; left: -4em; counter-increment: source-line; }
43pre.numberSource code > span > a:first-child::before
44 { content: counter(source-line);
45 position: relative; left: -1em; text-align: right; vertical-align: baseline;
46 border: none; display: inline-block;
47 -webkit-touch-callout: none; -webkit-user-select: none;
48 -khtml-user-select: none; -moz-user-select: none;
49 -ms-user-select: none; user-select: none;
50 padding: 0 4px; width: 4em;
51 }
52pre.numberSource { margin-left: 3em; padding-left: 4px; }
53div.sourceCode
54 { }
55@media screen {
56pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
57}
58</style>
59
60
61<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
62<script src="../site_libs/quarto-nav/headroom.min.js"></script>
63<script src="../site_libs/clipboard/clipboard.min.js"></script>
64<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
65<script src="../site_libs/quarto-search/fuse.min.js"></script>
66<script src="../site_libs/quarto-search/quarto-search.js"></script>
67<meta name="quarto:offset" content="../">
68<script src="../site_libs/quarto-html/quarto.js" type="module"></script>
69<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
70<script src="../site_libs/quarto-html/popper.min.js"></script>
71<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
72<script src="../site_libs/quarto-html/anchor.min.js"></script>
73<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
74<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme" id="quarto-text-highlighting-styles">
75<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8dcd8563ea6803ab7cbb3d71ca5772e1.css" rel="stylesheet" class="quarto-color-scheme quarto-color-alternate" id="quarto-text-highlighting-styles">
76<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme-extra" id="quarto-text-highlighting-styles">
77<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
78<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
79<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme" id="quarto-bootstrap" data-mode="light">
80<link href="../site_libs/bootstrap/bootstrap-dark-7964ffd8887b0991fe8d71c6c8bc75d6.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme quarto-color-alternate" id="quarto-bootstrap" data-mode="dark">
81<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme-extra" id="quarto-bootstrap" data-mode="light">
82<script id="quarto-search-options" type="application/json">{
83 "location": "navbar",
84 "copy-button": false,
85 "collapse-after": 3,
86 "panel-placement": "end",
87 "type": "overlay",
88 "limit": 50,
89 "keyboard-shortcut": [
90 "f",
91 "/",
92 "s"
93 ],
94 "show-item-context": false,
95 "language": {
96 "search-no-results-text": "No results",
97 "search-matching-documents-text": "matching documents",
98 "search-copy-link-title": "Copy link to search",
99 "search-hide-matches-text": "Hide additional matches",
100 "search-more-match-text": "more match in this document",
101 "search-more-matches-text": "more matches in this document",
102 "search-clear-button-title": "Clear",
103 "search-text-placeholder": "",
104 "search-detached-cancel-button-title": "Cancel",
105 "search-submit-button-title": "Submit",
106 "search-label": "Search"
107 }
108}</script>
109
110
111<link rel="stylesheet" href="../assets/styles.css">
112</head>
113
114<body class="nav-fixed quarto-light"><script id="quarto-html-before-body" type="application/javascript">
115 const toggleBodyColorMode = (bsSheetEl) => {
116 const mode = bsSheetEl.getAttribute("data-mode");
117 const bodyEl = window.document.querySelector("body");
118 if (mode === "dark") {
119 bodyEl.classList.add("quarto-dark");
120 bodyEl.classList.remove("quarto-light");
121 } else {
122 bodyEl.classList.add("quarto-light");
123 bodyEl.classList.remove("quarto-dark");
124 }
125 }
126 const toggleBodyColorPrimary = () => {
127 const bsSheetEl = window.document.querySelector("link#quarto-bootstrap:not([rel=disabled-stylesheet])");
128 if (bsSheetEl) {
129 toggleBodyColorMode(bsSheetEl);
130 }
131 }
132 const setColorSchemeToggle = (alternate) => {
133 const toggles = window.document.querySelectorAll('.quarto-color-scheme-toggle');
134 for (let i=0; i < toggles.length; i++) {
135 const toggle = toggles[i];
136 if (toggle) {
137 if (alternate) {
138 toggle.classList.add("alternate");
139 } else {
140 toggle.classList.remove("alternate");
141 }
142 }
143 }
144 };
145 const toggleColorMode = (alternate) => {
146 // Switch the stylesheets
147 const primaryStylesheets = window.document.querySelectorAll('link.quarto-color-scheme:not(.quarto-color-alternate)');
148 const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate');
149 manageTransitions('#quarto-margin-sidebar .nav-link', false);
150 if (alternate) {
151 // note: dark is layered on light, we don't disable primary!
152 enableStylesheet(alternateStylesheets);
153 for (const sheetNode of alternateStylesheets) {
154 if (sheetNode.id === "quarto-bootstrap") {
155 toggleBodyColorMode(sheetNode);
156 }
157 }
158 } else {
159 disableStylesheet(alternateStylesheets);
160 enableStylesheet(primaryStylesheets)
161 toggleBodyColorPrimary();
162 }
163 manageTransitions('#quarto-margin-sidebar .nav-link', true);
164 // Switch the toggles
165 setColorSchemeToggle(alternate)
166 // Hack to workaround the fact that safari doesn't
167 // properly recolor the scrollbar when toggling (#1455)
168 if (navigator.userAgent.indexOf('Safari') > 0 && navigator.userAgent.indexOf('Chrome') == -1) {
169 manageTransitions("body", false);
170 window.scrollTo(0, 1);
171 setTimeout(() => {
172 window.scrollTo(0, 0);
173 manageTransitions("body", true);
174 }, 40);
175 }
176 }
177 const disableStylesheet = (stylesheets) => {
178 for (let i=0; i < stylesheets.length; i++) {
179 const stylesheet = stylesheets[i];
180 stylesheet.rel = 'disabled-stylesheet';
181 }
182 }
183 const enableStylesheet = (stylesheets) => {
184 for (let i=0; i < stylesheets.length; i++) {
185 const stylesheet = stylesheets[i];
186 if(stylesheet.rel !== 'stylesheet') { // for Chrome, which will still FOUC without this check
187 stylesheet.rel = 'stylesheet';
188 }
189 }
190 }
191 const manageTransitions = (selector, allowTransitions) => {
192 const els = window.document.querySelectorAll(selector);
193 for (let i=0; i < els.length; i++) {
194 const el = els[i];
195 if (allowTransitions) {
196 el.classList.remove('notransition');
197 } else {
198 el.classList.add('notransition');
199 }
200 }
201 }
202 const isFileUrl = () => {
203 return window.location.protocol === 'file:';
204 }
205 const hasAlternateSentinel = () => {
206 let styleSentinel = getColorSchemeSentinel();
207 if (styleSentinel !== null) {
208 return styleSentinel === "alternate";
209 } else {
210 return false;
211 }
212 }
213 const setStyleSentinel = (alternate) => {
214 const value = alternate ? "alternate" : "default";
215 if (!isFileUrl()) {
216 window.localStorage.setItem("quarto-color-scheme", value);
217 } else {
218 localAlternateSentinel = value;
219 }
220 }
221 const getColorSchemeSentinel = () => {
222 if (!isFileUrl()) {
223 const storageValue = window.localStorage.getItem("quarto-color-scheme");
224 return storageValue != null ? storageValue : localAlternateSentinel;
225 } else {
226 return localAlternateSentinel;
227 }
228 }
229 const toggleGiscusIfUsed = (isAlternate, darkModeDefault) => {
230 const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light';
231 const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark';
232 let newTheme = '';
233 if(authorPrefersDark) {
234 newTheme = isAlternate ? baseTheme : alternateTheme;
235 } else {
236 newTheme = isAlternate ? alternateTheme : baseTheme;
237 }
238 const changeGiscusTheme = () => {
239 // From: https://github.com/giscus/giscus/issues/336
240 const sendMessage = (message) => {
241 const iframe = document.querySelector('iframe.giscus-frame');
242 if (!iframe) return;
243 iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app');
244 }
245 sendMessage({
246 setConfig: {
247 theme: newTheme
248 }
249 });
250 }
251 const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null;
252 if (isGiscussLoaded) {
253 changeGiscusTheme();
254 }
255 };
256 const authorPrefersDark = false;
257 const darkModeDefault = authorPrefersDark;
258 document.querySelector('link#quarto-text-highlighting-styles.quarto-color-scheme-extra').rel = 'disabled-stylesheet';
259 document.querySelector('link#quarto-bootstrap.quarto-color-scheme-extra').rel = 'disabled-stylesheet';
260 let localAlternateSentinel = darkModeDefault ? 'alternate' : 'default';
261 // Dark / light mode switch
262 window.quartoToggleColorScheme = () => {
263 // Read the current dark / light value
264 let toAlternate = !hasAlternateSentinel();
265 toggleColorMode(toAlternate);
266 setStyleSentinel(toAlternate);
267 toggleGiscusIfUsed(toAlternate, darkModeDefault);
268 window.dispatchEvent(new Event('resize'));
269 };
270 // Switch to dark mode if need be
271 if (hasAlternateSentinel()) {
272 toggleColorMode(true);
273 } else {
274 toggleColorMode(false);
275 }
276 </script>
277
278<div id="quarto-search-results"></div>
279 <header id="quarto-header" class="headroom fixed-top">
280 <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
281 <div class="navbar-container container-fluid">
282 <div class="navbar-brand-container mx-auto">
283 <a class="navbar-brand" href="../index.html">
284 <span class="navbar-title">atdata</span>
285 </a>
286 </div>
287 <div id="quarto-search" class="" title="Search"></div>
288 <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" role="menu" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
289 <span class="navbar-toggler-icon"></span>
290</button>
291 <div class="collapse navbar-collapse" id="navbarCollapse">
292 <ul class="navbar-nav navbar-nav-scroll me-auto">
293 <li class="nav-item">
294 <a class="nav-link" href="../index.html">
295<span class="menu-text">Guide</span></a>
296 </li>
297 <li class="nav-item dropdown ">
298 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-tutorials" role="link" data-bs-toggle="dropdown" aria-expanded="false">
299 <span class="menu-text">Tutorials</span>
300 </a>
301 <ul class="dropdown-menu" aria-labelledby="nav-menu-tutorials">
302 <li>
303 <a class="dropdown-item" href="../tutorials/quickstart.html">
304 <span class="dropdown-text">Quick Start</span></a>
305 </li>
306 <li>
307 <a class="dropdown-item" href="../tutorials/local-workflow.html">
308 <span class="dropdown-text">Local Workflow</span></a>
309 </li>
310 <li>
311 <a class="dropdown-item" href="../tutorials/atmosphere.html">
312 <span class="dropdown-text">Atmosphere Publishing</span></a>
313 </li>
314 <li>
315 <a class="dropdown-item" href="../tutorials/promotion.html">
316 <span class="dropdown-text">Promotion Workflow</span></a>
317 </li>
318 </ul>
319 </li>
320 <li class="nav-item dropdown ">
321 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-reference" role="link" data-bs-toggle="dropdown" aria-expanded="false">
322 <span class="menu-text">Reference</span>
323 </a>
324 <ul class="dropdown-menu" aria-labelledby="nav-menu-reference">
325 <li>
326 <a class="dropdown-item" href="../reference/architecture.html">
327 <span class="dropdown-text">Architecture Overview</span></a>
328 </li>
329 <li>
330 <a class="dropdown-item" href="../reference/packable-samples.html">
331 <span class="dropdown-text">Packable Samples</span></a>
332 </li>
333 <li>
334 <a class="dropdown-item" href="../reference/datasets.html">
335 <span class="dropdown-text">Datasets</span></a>
336 </li>
337 <li>
338 <a class="dropdown-item" href="../reference/lenses.html">
339 <span class="dropdown-text">Lenses</span></a>
340 </li>
341 <li>
342 <a class="dropdown-item" href="../reference/local-storage.html">
343 <span class="dropdown-text">Local Storage</span></a>
344 </li>
345 <li>
346 <a class="dropdown-item" href="../reference/atmosphere.html">
347 <span class="dropdown-text">Atmosphere</span></a>
348 </li>
349 <li>
350 <a class="dropdown-item" href="../reference/promotion.html">
351 <span class="dropdown-text">Promotion</span></a>
352 </li>
353 <li>
354 <a class="dropdown-item" href="../reference/load-dataset.html">
355 <span class="dropdown-text">load_dataset API</span></a>
356 </li>
357 <li>
358 <a class="dropdown-item" href="../reference/protocols.html">
359 <span class="dropdown-text">Protocols</span></a>
360 </li>
361 <li>
362 <a class="dropdown-item" href="../reference/uri-spec.html">
363 <span class="dropdown-text">URI Specification</span></a>
364 </li>
365 <li>
366 <a class="dropdown-item" href="../reference/troubleshooting.html">
367 <span class="dropdown-text">Troubleshooting & FAQ</span></a>
368 </li>
369 <li>
370 <a class="dropdown-item" href="../reference/deployment.html">
371 <span class="dropdown-text">Deployment Guide</span></a>
372 </li>
373 </ul>
374 </li>
375 <li class="nav-item">
376 <a class="nav-link" href="../api/index.html">
377<span class="menu-text">API</span></a>
378 </li>
379</ul>
380 <ul class="navbar-nav navbar-nav-scroll ms-auto">
381 <li class="nav-item compact">
382 <a class="nav-link" href="https://github.com/your-org/atdata"> <i class="bi bi-github" role="img">
383</i>
384<span class="menu-text"></span></a>
385 </li>
386</ul>
387 </div> <!-- /navcollapse -->
388 <div class="quarto-navbar-tools">
389 <a href="" class="quarto-color-scheme-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleColorScheme(); return false;" title="Toggle dark mode"><i class="bi"></i></a>
390</div>
391 </div> <!-- /container-fluid -->
392 </nav>
393</header>
394<!-- content -->
395<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
396<!-- sidebar -->
397<!-- margin-sidebar -->
398 <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
399 <nav id="TOC" role="doc-toc" class="toc-active">
400 <h2 id="toc-title">On this page</h2>
401
402 <ul>
403 <li><a href="#atdata.Dataset" id="toc-atdata.Dataset" class="nav-link active" data-scroll-target="#atdata.Dataset">Dataset</a>
404 <ul class="collapse">
405 <li><a href="#parameters" id="toc-parameters" class="nav-link" data-scroll-target="#parameters">Parameters</a></li>
406 <li><a href="#attributes" id="toc-attributes" class="nav-link" data-scroll-target="#attributes">Attributes</a></li>
407 <li><a href="#examples" id="toc-examples" class="nav-link" data-scroll-target="#examples">Examples</a></li>
408 <li><a href="#note" id="toc-note" class="nav-link" data-scroll-target="#note">Note</a></li>
409 <li><a href="#methods" id="toc-methods" class="nav-link" data-scroll-target="#methods">Methods</a>
410 <ul class="collapse">
411 <li><a href="#atdata.Dataset.as_type" id="toc-atdata.Dataset.as_type" class="nav-link" data-scroll-target="#atdata.Dataset.as_type">as_type</a></li>
412 <li><a href="#atdata.Dataset.list_shards" id="toc-atdata.Dataset.list_shards" class="nav-link" data-scroll-target="#atdata.Dataset.list_shards">list_shards</a></li>
413 <li><a href="#atdata.Dataset.ordered" id="toc-atdata.Dataset.ordered" class="nav-link" data-scroll-target="#atdata.Dataset.ordered">ordered</a></li>
414 <li><a href="#atdata.Dataset.shuffled" id="toc-atdata.Dataset.shuffled" class="nav-link" data-scroll-target="#atdata.Dataset.shuffled">shuffled</a></li>
415 <li><a href="#atdata.Dataset.to_parquet" id="toc-atdata.Dataset.to_parquet" class="nav-link" data-scroll-target="#atdata.Dataset.to_parquet">to_parquet</a></li>
416 <li><a href="#atdata.Dataset.wrap" id="toc-atdata.Dataset.wrap" class="nav-link" data-scroll-target="#atdata.Dataset.wrap">wrap</a></li>
417 <li><a href="#atdata.Dataset.wrap_batch" id="toc-atdata.Dataset.wrap_batch" class="nav-link" data-scroll-target="#atdata.Dataset.wrap_batch">wrap_batch</a></li>
418 </ul></li>
419 </ul></li>
420 </ul>
421<div class="toc-actions"><ul><li><a href="https://github.com/your-org/atdata/edit/main/api/Dataset.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav>
422 </div>
423<!-- main -->
424<main class="content" id="quarto-document-content"><header id="title-block-header" class="quarto-title-block"></header>
425
426
427
428
429
430<section id="atdata.Dataset" class="level1">
431<h1>Dataset</h1>
432<div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>Dataset(source<span class="op">=</span><span class="va">None</span>, metadata_url<span class="op">=</span><span class="va">None</span>, <span class="op">*</span>, url<span class="op">=</span><span class="va">None</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
433<p>A typed dataset built on WebDataset with lens transformations.</p>
434<p>This class wraps WebDataset tar archives and provides type-safe iteration over samples of a specific <code>PackableSample</code> type. Samples are stored as msgpack-serialized data within WebDataset shards.</p>
435<p>The dataset supports: - Ordered and shuffled iteration - Automatic batching with <code>SampleBatch</code> - Type transformations via the lens system (<code>as_type()</code>) - Export to parquet format</p>
436<section id="parameters" class="level2 doc-section doc-section-parameters">
437<h2 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters">Parameters</h2>
438<table class="caption-top table">
439<colgroup>
440<col style="width: 8%">
441<col style="width: 8%">
442<col style="width: 72%">
443<col style="width: 12%">
444</colgroup>
445<thead>
446<tr class="header">
447<th>Name</th>
448<th>Type</th>
449<th>Description</th>
450<th>Default</th>
451</tr>
452</thead>
453<tbody>
454<tr class="odd">
455<td>ST</td>
456<td></td>
457<td>The sample type for this dataset, must derive from <code>PackableSample</code>.</td>
458<td><em>required</em></td>
459</tr>
460</tbody>
461</table>
462</section>
463<section id="attributes" class="level2 doc-section doc-section-attributes">
464<h2 class="doc-section doc-section-attributes anchored" data-anchor-id="attributes">Attributes</h2>
465<table class="caption-top table">
466<thead>
467<tr class="header">
468<th>Name</th>
469<th>Type</th>
470<th>Description</th>
471</tr>
472</thead>
473<tbody>
474<tr class="odd">
475<td>url</td>
476<td></td>
477<td>WebDataset brace-notation URL for the tar file(s).</td>
478</tr>
479</tbody>
480</table>
481</section>
482<section id="examples" class="level2 doc-section doc-section-examples">
483<h2 class="doc-section doc-section-examples anchored" data-anchor-id="examples">Examples</h2>
484<div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> ds <span class="op">=</span> Dataset[MyData](<span class="st">"path/to/data-{000000..000009}.tar"</span>)</span>
485<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> <span class="cf">for</span> sample <span class="kw">in</span> ds.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span>
486<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>... <span class="co"># sample is SampleBatch[MyData] with batch_size samples</span></span>
487<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>... embeddings <span class="op">=</span> sample.embeddings <span class="co"># shape: (32, ...)</span></span>
488<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>...</span>
489<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> <span class="co"># Transform to a different view</span></span>
490<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> ds_view <span class="op">=</span> ds.as_type(MyDataView)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
491</section>
492<section id="note" class="level2 doc-section doc-section-note">
493<h2 class="doc-section doc-section-note anchored" data-anchor-id="note">Note</h2>
494<p>This class uses Python’s <code>__orig_class__</code> mechanism to extract the type parameter at runtime. Instances must be created using the subscripted syntax <code>Dataset[MyType](url)</code> rather than calling the constructor directly with an unsubscripted class.</p>
495</section>
496<section id="methods" class="level2">
497<h2 class="anchored" data-anchor-id="methods">Methods</h2>
498<table class="caption-top table">
499<thead>
500<tr class="header">
501<th>Name</th>
502<th>Description</th>
503</tr>
504</thead>
505<tbody>
506<tr class="odd">
507<td><a href="#atdata.Dataset.as_type">as_type</a></td>
508<td>View this dataset through a different sample type using a registered lens.</td>
509</tr>
510<tr class="even">
511<td><a href="#atdata.Dataset.list_shards">list_shards</a></td>
512<td>Get list of individual dataset shards.</td>
513</tr>
514<tr class="odd">
515<td><a href="#atdata.Dataset.ordered">ordered</a></td>
516<td>Iterate over the dataset in order</td>
517</tr>
518<tr class="even">
519<td><a href="#atdata.Dataset.shuffled">shuffled</a></td>
520<td>Iterate over the dataset in random order.</td>
521</tr>
522<tr class="odd">
523<td><a href="#atdata.Dataset.to_parquet">to_parquet</a></td>
524<td>Export dataset contents to parquet format.</td>
525</tr>
526<tr class="even">
527<td><a href="#atdata.Dataset.wrap">wrap</a></td>
528<td>Wrap a raw msgpack sample into the appropriate dataset-specific type.</td>
529</tr>
530<tr class="odd">
531<td><a href="#atdata.Dataset.wrap_batch">wrap_batch</a></td>
532<td>Wrap a batch of raw msgpack samples into a typed SampleBatch.</td>
533</tr>
534</tbody>
535</table>
536<section id="atdata.Dataset.as_type" class="level3">
537<h3 class="anchored" data-anchor-id="atdata.Dataset.as_type">as_type</h3>
538<div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>Dataset.as_type(other)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
539<p>View this dataset through a different sample type using a registered lens.</p>
540<section id="parameters-1" class="level4 doc-section doc-section-parameters">
541<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-1">Parameters</h4>
542<table class="caption-top table">
543<thead>
544<tr class="header">
545<th>Name</th>
546<th>Type</th>
547<th>Description</th>
548<th>Default</th>
549</tr>
550</thead>
551<tbody>
552<tr class="odd">
553<td>other</td>
554<td><a href="`typing.Type`">Type</a>[<a href="`atdata.dataset.RT`">RT</a>]</td>
555<td>The target sample type to transform into. Must be a type derived from <code>PackableSample</code>.</td>
556<td><em>required</em></td>
557</tr>
558</tbody>
559</table>
560</section>
561<section id="returns" class="level4 doc-section doc-section-returns">
562<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns">Returns</h4>
563<table class="caption-top table">
564<thead>
565<tr class="header">
566<th>Name</th>
567<th>Type</th>
568<th>Description</th>
569</tr>
570</thead>
571<tbody>
572<tr class="odd">
573<td></td>
574<td><a href="`atdata.dataset.Dataset`">Dataset</a>[<a href="`atdata.dataset.RT`">RT</a>]</td>
575<td>A new <code>Dataset</code> instance that yields samples of type <code>other</code></td>
576</tr>
577<tr class="even">
578<td></td>
579<td><a href="`atdata.dataset.Dataset`">Dataset</a>[<a href="`atdata.dataset.RT`">RT</a>]</td>
580<td>by applying the appropriate lens transformation from the global</td>
581</tr>
582<tr class="odd">
583<td></td>
584<td><a href="`atdata.dataset.Dataset`">Dataset</a>[<a href="`atdata.dataset.RT`">RT</a>]</td>
585<td><code>LensNetwork</code> registry.</td>
586</tr>
587</tbody>
588</table>
589</section>
590<section id="raises" class="level4 doc-section doc-section-raises">
591<h4 class="doc-section doc-section-raises anchored" data-anchor-id="raises">Raises</h4>
592<table class="caption-top table">
593<thead>
594<tr class="header">
595<th>Name</th>
596<th>Type</th>
597<th>Description</th>
598</tr>
599</thead>
600<tbody>
601<tr class="odd">
602<td></td>
603<td><a href="`ValueError`">ValueError</a></td>
604<td>If no registered lens exists between the current sample type and the target type.</td>
605</tr>
606</tbody>
607</table>
608</section>
609</section>
610<section id="atdata.Dataset.list_shards" class="level3">
611<h3 class="anchored" data-anchor-id="atdata.Dataset.list_shards">list_shards</h3>
612<div class="sourceCode" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>Dataset.list_shards()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
613<p>Get list of individual dataset shards.</p>
614<section id="returns-1" class="level4 doc-section doc-section-returns">
615<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-1">Returns</h4>
616<table class="caption-top table">
617<thead>
618<tr class="header">
619<th>Name</th>
620<th>Type</th>
621<th>Description</th>
622</tr>
623</thead>
624<tbody>
625<tr class="odd">
626<td></td>
627<td><a href="`list`">list</a>[<a href="`str`">str</a>]</td>
628<td>A full (non-lazy) list of the individual <code>tar</code> files within the</td>
629</tr>
630<tr class="even">
631<td></td>
632<td><a href="`list`">list</a>[<a href="`str`">str</a>]</td>
633<td>source WebDataset.</td>
634</tr>
635</tbody>
636</table>
637</section>
638</section>
639<section id="atdata.Dataset.ordered" class="level3">
640<h3 class="anchored" data-anchor-id="atdata.Dataset.ordered">ordered</h3>
641<div class="sourceCode" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>Dataset.ordered(batch_size<span class="op">=</span><span class="va">None</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
642<p>Iterate over the dataset in order</p>
643<section id="parameters-2" class="level4 doc-section doc-section-parameters">
644<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-2">Parameters</h4>
645<table class="caption-top table">
646<colgroup>
647<col style="width: 7%">
648<col style="width: 4%">
649<col style="width: 81%">
650<col style="width: 6%">
651</colgroup>
652<thead>
653<tr class="header">
654<th>Name</th>
655<th>Type</th>
656<th>Description</th>
657<th>Default</th>
658</tr>
659</thead>
660<tbody>
661<tr class="odd">
662<td>batch_size (</td>
663<td></td>
664<td>obj:<code>int</code>, optional): The size of iterated batches. Default: None (unbatched). If <code>None</code>, iterates over one sample at a time with no batch dimension.</td>
665<td><em>required</em></td>
666</tr>
667</tbody>
668</table>
669</section>
670<section id="returns-2" class="level4 doc-section doc-section-returns">
671<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-2">Returns</h4>
672<table class="caption-top table">
673<thead>
674<tr class="header">
675<th>Name</th>
676<th>Type</th>
677<th>Description</th>
678</tr>
679</thead>
680<tbody>
681<tr class="odd">
682<td></td>
683<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td>
684<td>obj:<code>webdataset.DataPipeline</code> A data pipeline that iterates over</td>
685</tr>
686<tr class="even">
687<td></td>
688<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td>
689<td>the dataset in its original sample order</td>
690</tr>
691</tbody>
692</table>
693</section>
694</section>
695<section id="atdata.Dataset.shuffled" class="level3">
696<h3 class="anchored" data-anchor-id="atdata.Dataset.shuffled">shuffled</h3>
697<div class="sourceCode" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>Dataset.shuffled(buffer_shards<span class="op">=</span><span class="dv">100</span>, buffer_samples<span class="op">=</span><span class="dv">10000</span>, batch_size<span class="op">=</span><span class="va">None</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
698<p>Iterate over the dataset in random order.</p>
699<section id="parameters-3" class="level4 doc-section doc-section-parameters">
700<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-3">Parameters</h4>
701<table class="caption-top table">
702<thead>
703<tr class="header">
704<th>Name</th>
705<th>Type</th>
706<th>Description</th>
707<th>Default</th>
708</tr>
709</thead>
710<tbody>
711<tr class="odd">
712<td>buffer_shards</td>
713<td><a href="`int`">int</a></td>
714<td>Number of shards to buffer for shuffling at the shard level. Larger values increase randomness but use more memory. Default: 100.</td>
715<td><code>100</code></td>
716</tr>
717<tr class="even">
718<td>buffer_samples</td>
719<td><a href="`int`">int</a></td>
720<td>Number of samples to buffer for shuffling within shards. Larger values increase randomness but use more memory. Default: 10,000.</td>
721<td><code>10000</code></td>
722</tr>
723<tr class="odd">
724<td>batch_size</td>
725<td><a href="`int`">int</a> | None</td>
726<td>The size of iterated batches. Default: None (unbatched). If <code>None</code>, iterates over one sample at a time with no batch dimension.</td>
727<td><code>None</code></td>
728</tr>
729</tbody>
730</table>
731</section>
732<section id="returns-3" class="level4 doc-section doc-section-returns">
733<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-3">Returns</h4>
734<table class="caption-top table">
735<thead>
736<tr class="header">
737<th>Name</th>
738<th>Type</th>
739<th>Description</th>
740</tr>
741</thead>
742<tbody>
743<tr class="odd">
744<td></td>
745<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td>
746<td>A WebDataset data pipeline that iterates over the dataset in</td>
747</tr>
748<tr class="even">
749<td></td>
750<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td>
751<td>randomized order. If <code>batch_size</code> is not <code>None</code>, yields</td>
752</tr>
753<tr class="odd">
754<td></td>
755<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td>
756<td><code>SampleBatch[ST]</code> instances; otherwise yields individual <code>ST</code></td>
757</tr>
758<tr class="even">
759<td></td>
760<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td>
761<td>samples.</td>
762</tr>
763</tbody>
764</table>
765</section>
766</section>
767<section id="atdata.Dataset.to_parquet" class="level3">
768<h3 class="anchored" data-anchor-id="atdata.Dataset.to_parquet">to_parquet</h3>
769<div class="sourceCode" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>Dataset.to_parquet(path, sample_map<span class="op">=</span><span class="va">None</span>, maxcount<span class="op">=</span><span class="va">None</span>, <span class="op">**</span>kwargs)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
770<p>Export dataset contents to parquet format.</p>
771<p>Converts all samples to a pandas DataFrame and saves to parquet file(s). Useful for interoperability with data analysis tools.</p>
772<section id="parameters-4" class="level4 doc-section doc-section-parameters">
773<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-4">Parameters</h4>
774<table class="caption-top table">
775<thead>
776<tr class="header">
777<th>Name</th>
778<th>Type</th>
779<th>Description</th>
780<th>Default</th>
781</tr>
782</thead>
783<tbody>
784<tr class="odd">
785<td>path</td>
786<td><a href="`atdata.dataset.Pathlike`">Pathlike</a></td>
787<td>Output path for the parquet file. If <code>maxcount</code> is specified, files are named <code>{stem}-{segment:06d}.parquet</code>.</td>
788<td><em>required</em></td>
789</tr>
790<tr class="even">
791<td>sample_map</td>
792<td><a href="`typing.Optional`">Optional</a>[<a href="`atdata.dataset.SampleExportMap`">SampleExportMap</a>]</td>
793<td>Optional function to convert samples to dictionaries. Defaults to <code>dataclasses.asdict</code>.</td>
794<td><code>None</code></td>
795</tr>
796<tr class="odd">
797<td>maxcount</td>
798<td><a href="`typing.Optional`">Optional</a>[<a href="`int`">int</a>]</td>
799<td>If specified, split output into multiple files with at most this many samples each. Recommended for large datasets.</td>
800<td><code>None</code></td>
801</tr>
802<tr class="even">
803<td>**kwargs</td>
804<td></td>
805<td>Additional arguments passed to <code>pandas.DataFrame.to_parquet()</code>. Common options include <code>compression</code>, <code>index</code>, <code>engine</code>.</td>
806<td><code>{}</code></td>
807</tr>
808</tbody>
809</table>
810</section>
811<section id="warning" class="level4 doc-section doc-section-warning">
812<h4 class="doc-section doc-section-warning anchored" data-anchor-id="warning">Warning</h4>
813<p><strong>Memory Usage</strong>: When <code>maxcount=None</code> (default), this method loads the <strong>entire dataset into memory</strong> as a pandas DataFrame before writing. For large datasets, this can cause memory exhaustion.</p>
814<p>For datasets larger than available RAM, always specify <code>maxcount</code>::</p>
815<pre><code># Safe for large datasets - processes in chunks
816ds.to_parquet("output.parquet", maxcount=10000)</code></pre>
817<p>This creates multiple parquet files: <code>output-000000.parquet</code>, <code>output-000001.parquet</code>, etc.</p>
818</section>
819<section id="examples-1" class="level4 doc-section doc-section-examples">
820<h4 class="doc-section doc-section-examples anchored" data-anchor-id="examples-1">Examples</h4>
821<div class="sourceCode" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> ds <span class="op">=</span> Dataset[MySample](<span class="st">"data.tar"</span>)</span>
822<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> <span class="co"># Small dataset - load all at once</span></span>
823<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> ds.to_parquet(<span class="st">"output.parquet"</span>)</span>
824<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span></span>
825<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> <span class="co"># Large dataset - process in chunks</span></span>
826<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> ds.to_parquet(<span class="st">"output.parquet"</span>, maxcount<span class="op">=</span><span class="dv">50000</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
827</section>
828</section>
829<section id="atdata.Dataset.wrap" class="level3">
830<h3 class="anchored" data-anchor-id="atdata.Dataset.wrap">wrap</h3>
831<div class="sourceCode" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>Dataset.wrap(sample)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
832<p>Wrap a raw msgpack sample into the appropriate dataset-specific type.</p>
833<section id="parameters-5" class="level4 doc-section doc-section-parameters">
834<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-5">Parameters</h4>
835<table class="caption-top table">
836<thead>
837<tr class="header">
838<th>Name</th>
839<th>Type</th>
840<th>Description</th>
841<th>Default</th>
842</tr>
843</thead>
844<tbody>
845<tr class="odd">
846<td>sample</td>
847<td><a href="`atdata.dataset.WDSRawSample`">WDSRawSample</a></td>
848<td>A dictionary containing at minimum a <code>'msgpack'</code> key with serialized sample bytes.</td>
849<td><em>required</em></td>
850</tr>
851</tbody>
852</table>
853</section>
854<section id="returns-4" class="level4 doc-section doc-section-returns">
855<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-4">Returns</h4>
856<table class="caption-top table">
857<thead>
858<tr class="header">
859<th>Name</th>
860<th>Type</th>
861<th>Description</th>
862</tr>
863</thead>
864<tbody>
865<tr class="odd">
866<td></td>
867<td><a href="`atdata.dataset.ST`">ST</a></td>
868<td>A deserialized sample of type <code>ST</code>, optionally transformed through</td>
869</tr>
870<tr class="even">
871<td></td>
872<td><a href="`atdata.dataset.ST`">ST</a></td>
873<td>a lens if <code>as_type()</code> was called.</td>
874</tr>
875</tbody>
876</table>
877</section>
878</section>
879<section id="atdata.Dataset.wrap_batch" class="level3">
880<h3 class="anchored" data-anchor-id="atdata.Dataset.wrap_batch">wrap_batch</h3>
881<div class="sourceCode" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>Dataset.wrap_batch(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
882<p>Wrap a batch of raw msgpack samples into a typed SampleBatch.</p>
883<section id="parameters-6" class="level4 doc-section doc-section-parameters">
884<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-6">Parameters</h4>
885<table class="caption-top table">
886<thead>
887<tr class="header">
888<th>Name</th>
889<th>Type</th>
890<th>Description</th>
891<th>Default</th>
892</tr>
893</thead>
894<tbody>
895<tr class="odd">
896<td>batch</td>
897<td><a href="`atdata.dataset.WDSRawBatch`">WDSRawBatch</a></td>
898<td>A dictionary containing a <code>'msgpack'</code> key with a list of serialized sample bytes.</td>
899<td><em>required</em></td>
900</tr>
901</tbody>
902</table>
903</section>
904<section id="returns-5" class="level4 doc-section doc-section-returns">
905<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-5">Returns</h4>
906<table class="caption-top table">
907<thead>
908<tr class="header">
909<th>Name</th>
910<th>Type</th>
911<th>Description</th>
912</tr>
913</thead>
914<tbody>
915<tr class="odd">
916<td></td>
917<td><a href="`atdata.dataset.SampleBatch`">SampleBatch</a>[<a href="`atdata.dataset.ST`">ST</a>]</td>
918<td>A <code>SampleBatch[ST]</code> containing deserialized samples, optionally</td>
919</tr>
920<tr class="even">
921<td></td>
922<td><a href="`atdata.dataset.SampleBatch`">SampleBatch</a>[<a href="`atdata.dataset.ST`">ST</a>]</td>
923<td>transformed through a lens if <code>as_type()</code> was called.</td>
924</tr>
925</tbody>
926</table>
927</section>
928<section id="note-1" class="level4 doc-section doc-section-note">
929<h4 class="doc-section doc-section-note anchored" data-anchor-id="note-1">Note</h4>
930<p>This implementation deserializes samples one at a time, then aggregates them into a batch.</p>
931
932
933</section>
934</section>
935</section>
936</section>
937
938</main> <!-- /main -->
939<script id="quarto-html-after-body" type="application/javascript">
940 window.document.addEventListener("DOMContentLoaded", function (event) {
941 // Ensure there is a toggle, if there isn't float one in the top right
942 if (window.document.querySelector('.quarto-color-scheme-toggle') === null) {
943 const a = window.document.createElement('a');
944 a.classList.add('top-right');
945 a.classList.add('quarto-color-scheme-toggle');
946 a.href = "";
947 a.onclick = function() { try { window.quartoToggleColorScheme(); } catch {} return false; };
948 const i = window.document.createElement("i");
949 i.classList.add('bi');
950 a.appendChild(i);
951 window.document.body.appendChild(a);
952 }
953 setColorSchemeToggle(hasAlternateSentinel())
954 const icon = "";
955 const anchorJS = new window.AnchorJS();
956 anchorJS.options = {
957 placement: 'right',
958 icon: icon
959 };
960 anchorJS.add('.anchored');
961 const isCodeAnnotation = (el) => {
962 for (const clz of el.classList) {
963 if (clz.startsWith('code-annotation-')) {
964 return true;
965 }
966 }
967 return false;
968 }
969 const onCopySuccess = function(e) {
970 // button target
971 const button = e.trigger;
972 // don't keep focus
973 button.blur();
974 // flash "checked"
975 button.classList.add('code-copy-button-checked');
976 var currentTitle = button.getAttribute("title");
977 button.setAttribute("title", "Copied!");
978 let tooltip;
979 if (window.bootstrap) {
980 button.setAttribute("data-bs-toggle", "tooltip");
981 button.setAttribute("data-bs-placement", "left");
982 button.setAttribute("data-bs-title", "Copied!");
983 tooltip = new bootstrap.Tooltip(button,
984 { trigger: "manual",
985 customClass: "code-copy-button-tooltip",
986 offset: [0, -8]});
987 tooltip.show();
988 }
989 setTimeout(function() {
990 if (tooltip) {
991 tooltip.hide();
992 button.removeAttribute("data-bs-title");
993 button.removeAttribute("data-bs-toggle");
994 button.removeAttribute("data-bs-placement");
995 }
996 button.setAttribute("title", currentTitle);
997 button.classList.remove('code-copy-button-checked');
998 }, 1000);
999 // clear code selection
1000 e.clearSelection();
1001 }
1002 const getTextToCopy = function(trigger) {
1003 const codeEl = trigger.previousElementSibling.cloneNode(true);
1004 for (const childEl of codeEl.children) {
1005 if (isCodeAnnotation(childEl)) {
1006 childEl.remove();
1007 }
1008 }
1009 return codeEl.innerText;
1010 }
1011 const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
1012 text: getTextToCopy
1013 });
1014 clipboard.on('success', onCopySuccess);
1015 if (window.document.getElementById('quarto-embedded-source-code-modal')) {
1016 const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
1017 text: getTextToCopy,
1018 container: window.document.getElementById('quarto-embedded-source-code-modal')
1019 });
1020 clipboardModal.on('success', onCopySuccess);
1021 }
1022 var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
1023 var mailtoRegex = new RegExp(/^mailto:/);
1024 var filterRegex = new RegExp("https:\/\/github\.com\/your-org\/atdata");
1025 var isInternal = (href) => {
1026 return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
1027 }
1028 // Inspect non-navigation links and adorn them if external
1029 var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
1030 for (var i=0; i<links.length; i++) {
1031 const link = links[i];
1032 if (!isInternal(link.href)) {
1033 // undo the damage that might have been done by quarto-nav.js in the case of
1034 // links that we want to consider external
1035 if (link.dataset.originalHref !== undefined) {
1036 link.href = link.dataset.originalHref;
1037 }
1038 }
1039 }
1040 function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
1041 const config = {
1042 allowHTML: true,
1043 maxWidth: 500,
1044 delay: 100,
1045 arrow: false,
1046 appendTo: function(el) {
1047 return el.parentElement;
1048 },
1049 interactive: true,
1050 interactiveBorder: 10,
1051 theme: 'quarto',
1052 placement: 'bottom-start',
1053 };
1054 if (contentFn) {
1055 config.content = contentFn;
1056 }
1057 if (onTriggerFn) {
1058 config.onTrigger = onTriggerFn;
1059 }
1060 if (onUntriggerFn) {
1061 config.onUntrigger = onUntriggerFn;
1062 }
1063 window.tippy(el, config);
1064 }
1065 const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
1066 for (var i=0; i<noterefs.length; i++) {
1067 const ref = noterefs[i];
1068 tippyHover(ref, function() {
1069 // use id or data attribute instead here
1070 let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
1071 try { href = new URL(href).hash; } catch {}
1072 const id = href.replace(/^#\/?/, "");
1073 const note = window.document.getElementById(id);
1074 if (note) {
1075 return note.innerHTML;
1076 } else {
1077 return "";
1078 }
1079 });
1080 }
1081 const xrefs = window.document.querySelectorAll('a.quarto-xref');
1082 const processXRef = (id, note) => {
1083 // Strip column container classes
1084 const stripColumnClz = (el) => {
1085 el.classList.remove("page-full", "page-columns");
1086 if (el.children) {
1087 for (const child of el.children) {
1088 stripColumnClz(child);
1089 }
1090 }
1091 }
1092 stripColumnClz(note)
1093 if (id === null || id.startsWith('sec-')) {
1094 // Special case sections, only their first couple elements
1095 const container = document.createElement("div");
1096 if (note.children && note.children.length > 2) {
1097 container.appendChild(note.children[0].cloneNode(true));
1098 for (let i = 1; i < note.children.length; i++) {
1099 const child = note.children[i];
1100 if (child.tagName === "P" && child.innerText === "") {
1101 continue;
1102 } else {
1103 container.appendChild(child.cloneNode(true));
1104 break;
1105 }
1106 }
1107 if (window.Quarto?.typesetMath) {
1108 window.Quarto.typesetMath(container);
1109 }
1110 return container.innerHTML
1111 } else {
1112 if (window.Quarto?.typesetMath) {
1113 window.Quarto.typesetMath(note);
1114 }
1115 return note.innerHTML;
1116 }
1117 } else {
1118 // Remove any anchor links if they are present
1119 const anchorLink = note.querySelector('a.anchorjs-link');
1120 if (anchorLink) {
1121 anchorLink.remove();
1122 }
1123 if (window.Quarto?.typesetMath) {
1124 window.Quarto.typesetMath(note);
1125 }
1126 if (note.classList.contains("callout")) {
1127 return note.outerHTML;
1128 } else {
1129 return note.innerHTML;
1130 }
1131 }
1132 }
1133 for (var i=0; i<xrefs.length; i++) {
1134 const xref = xrefs[i];
1135 tippyHover(xref, undefined, function(instance) {
1136 instance.disable();
1137 let url = xref.getAttribute('href');
1138 let hash = undefined;
1139 if (url.startsWith('#')) {
1140 hash = url;
1141 } else {
1142 try { hash = new URL(url).hash; } catch {}
1143 }
1144 if (hash) {
1145 const id = hash.replace(/^#\/?/, "");
1146 const note = window.document.getElementById(id);
1147 if (note !== null) {
1148 try {
1149 const html = processXRef(id, note.cloneNode(true));
1150 instance.setContent(html);
1151 } finally {
1152 instance.enable();
1153 instance.show();
1154 }
1155 } else {
1156 // See if we can fetch this
1157 fetch(url.split('#')[0])
1158 .then(res => res.text())
1159 .then(html => {
1160 const parser = new DOMParser();
1161 const htmlDoc = parser.parseFromString(html, "text/html");
1162 const note = htmlDoc.getElementById(id);
1163 if (note !== null) {
1164 const html = processXRef(id, note);
1165 instance.setContent(html);
1166 }
1167 }).finally(() => {
1168 instance.enable();
1169 instance.show();
1170 });
1171 }
1172 } else {
1173 // See if we can fetch a full url (with no hash to target)
1174 // This is a special case and we should probably do some content thinning / targeting
1175 fetch(url)
1176 .then(res => res.text())
1177 .then(html => {
1178 const parser = new DOMParser();
1179 const htmlDoc = parser.parseFromString(html, "text/html");
1180 const note = htmlDoc.querySelector('main.content');
1181 if (note !== null) {
1182 // This should only happen for chapter cross references
1183 // (since there is no id in the URL)
1184 // remove the first header
1185 if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
1186 note.children[0].remove();
1187 }
1188 const html = processXRef(null, note);
1189 instance.setContent(html);
1190 }
1191 }).finally(() => {
1192 instance.enable();
1193 instance.show();
1194 });
1195 }
1196 }, function(instance) {
1197 });
1198 }
1199 let selectedAnnoteEl;
1200 const selectorForAnnotation = ( cell, annotation) => {
1201 let cellAttr = 'data-code-cell="' + cell + '"';
1202 let lineAttr = 'data-code-annotation="' + annotation + '"';
1203 const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
1204 return selector;
1205 }
1206 const selectCodeLines = (annoteEl) => {
1207 const doc = window.document;
1208 const targetCell = annoteEl.getAttribute("data-target-cell");
1209 const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
1210 const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
1211 const lines = annoteSpan.getAttribute("data-code-lines").split(",");
1212 const lineIds = lines.map((line) => {
1213 return targetCell + "-" + line;
1214 })
1215 let top = null;
1216 let height = null;
1217 let parent = null;
1218 if (lineIds.length > 0) {
1219 //compute the position of the single el (top and bottom and make a div)
1220 const el = window.document.getElementById(lineIds[0]);
1221 top = el.offsetTop;
1222 height = el.offsetHeight;
1223 parent = el.parentElement.parentElement;
1224 if (lineIds.length > 1) {
1225 const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
1226 const bottom = lastEl.offsetTop + lastEl.offsetHeight;
1227 height = bottom - top;
1228 }
1229 if (top !== null && height !== null && parent !== null) {
1230 // cook up a div (if necessary) and position it
1231 let div = window.document.getElementById("code-annotation-line-highlight");
1232 if (div === null) {
1233 div = window.document.createElement("div");
1234 div.setAttribute("id", "code-annotation-line-highlight");
1235 div.style.position = 'absolute';
1236 parent.appendChild(div);
1237 }
1238 div.style.top = top - 2 + "px";
1239 div.style.height = height + 4 + "px";
1240 div.style.left = 0;
1241 let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
1242 if (gutterDiv === null) {
1243 gutterDiv = window.document.createElement("div");
1244 gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
1245 gutterDiv.style.position = 'absolute';
1246 const codeCell = window.document.getElementById(targetCell);
1247 const gutter = codeCell.querySelector('.code-annotation-gutter');
1248 gutter.appendChild(gutterDiv);
1249 }
1250 gutterDiv.style.top = top - 2 + "px";
1251 gutterDiv.style.height = height + 4 + "px";
1252 }
1253 selectedAnnoteEl = annoteEl;
1254 }
1255 };
1256 const unselectCodeLines = () => {
1257 const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
1258 elementsIds.forEach((elId) => {
1259 const div = window.document.getElementById(elId);
1260 if (div) {
1261 div.remove();
1262 }
1263 });
1264 selectedAnnoteEl = undefined;
1265 };
1266 // Handle positioning of the toggle
1267 window.addEventListener(
1268 "resize",
1269 throttle(() => {
1270 elRect = undefined;
1271 if (selectedAnnoteEl) {
1272 selectCodeLines(selectedAnnoteEl);
1273 }
1274 }, 10)
1275 );
1276 function throttle(fn, ms) {
1277 let throttle = false;
1278 let timer;
1279 return (...args) => {
1280 if(!throttle) { // first call gets through
1281 fn.apply(this, args);
1282 throttle = true;
1283 } else { // all the others get throttled
1284 if(timer) clearTimeout(timer); // cancel #2
1285 timer = setTimeout(() => {
1286 fn.apply(this, args);
1287 timer = throttle = false;
1288 }, ms);
1289 }
1290 };
1291 }
1292 // Attach click handler to the DT
1293 const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
1294 for (const annoteDlNode of annoteDls) {
1295 annoteDlNode.addEventListener('click', (event) => {
1296 const clickedEl = event.target;
1297 if (clickedEl !== selectedAnnoteEl) {
1298 unselectCodeLines();
1299 const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
1300 if (activeEl) {
1301 activeEl.classList.remove('code-annotation-active');
1302 }
1303 selectCodeLines(clickedEl);
1304 clickedEl.classList.add('code-annotation-active');
1305 } else {
1306 // Unselect the line
1307 unselectCodeLines();
1308 clickedEl.classList.remove('code-annotation-active');
1309 }
1310 });
1311 }
1312 const findCites = (el) => {
1313 const parentEl = el.parentElement;
1314 if (parentEl) {
1315 const cites = parentEl.dataset.cites;
1316 if (cites) {
1317 return {
1318 el,
1319 cites: cites.split(' ')
1320 };
1321 } else {
1322 return findCites(el.parentElement)
1323 }
1324 } else {
1325 return undefined;
1326 }
1327 };
1328 var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
1329 for (var i=0; i<bibliorefs.length; i++) {
1330 const ref = bibliorefs[i];
1331 const citeInfo = findCites(ref);
1332 if (citeInfo) {
1333 tippyHover(citeInfo.el, function() {
1334 var popup = window.document.createElement('div');
1335 citeInfo.cites.forEach(function(cite) {
1336 var citeDiv = window.document.createElement('div');
1337 citeDiv.classList.add('hanging-indent');
1338 citeDiv.classList.add('csl-entry');
1339 var biblioDiv = window.document.getElementById('ref-' + cite);
1340 if (biblioDiv) {
1341 citeDiv.innerHTML = biblioDiv.innerHTML;
1342 }
1343 popup.appendChild(citeDiv);
1344 });
1345 return popup.innerHTML;
1346 });
1347 }
1348 }
1349 });
1350 </script>
1351</div> <!-- /content -->
1352<footer class="footer">
1353 <div class="nav-footer">
1354 <div class="nav-footer-left">
1355<p>Built with <a href="https://quarto.org/">Quarto</a></p>
1356</div>
1357 <div class="nav-footer-center">
1358
1359 <div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/your-org/atdata/edit/main/api/Dataset.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></div>
1360 <div class="nav-footer-right">
1361<p>MIT License</p>
1362</div>
1363 </div>
1364</footer>
1365
1366
1367
1368
1369</body></html>