A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 1369 lines 58 kB view raw
1<!DOCTYPE html> 2<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head> 3 4<meta charset="utf-8"> 5<meta name="generator" content="quarto-1.7.34"> 6 7<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"> 8 9 10<title>dataset – atdata</title> 11<style> 12code{white-space: pre-wrap;} 13span.smallcaps{font-variant: small-caps;} 14div.columns{display: flex; gap: min(4vw, 1.5em);} 15div.column{flex: auto; overflow-x: auto;} 16div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} 17ul.task-list{list-style: none;} 18ul.task-list li input[type="checkbox"] { 19 width: 0.8em; 20 margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 21 vertical-align: middle; 22} 23/* CSS for syntax highlighting */ 24html { -webkit-text-size-adjust: 100%; } 25pre > code.sourceCode { white-space: pre; position: relative; } 26pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } 27pre > code.sourceCode > span:empty { height: 1.2em; } 28.sourceCode { overflow: visible; } 29code.sourceCode > span { color: inherit; text-decoration: inherit; } 30div.sourceCode { margin: 1em 0; } 31pre.sourceCode { margin: 0; } 32@media screen { 33div.sourceCode { overflow: auto; } 34} 35@media print { 36pre > code.sourceCode { white-space: pre-wrap; } 37pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } 38} 39pre.numberSource code 40 { counter-reset: source-line 0; } 41pre.numberSource code > span 42 { position: relative; left: -4em; counter-increment: source-line; } 43pre.numberSource code > span > a:first-child::before 44 { content: counter(source-line); 45 position: relative; left: -1em; text-align: right; vertical-align: baseline; 46 border: none; display: inline-block; 47 -webkit-touch-callout: none; -webkit-user-select: none; 48 -khtml-user-select: none; -moz-user-select: none; 49 -ms-user-select: none; user-select: none; 50 padding: 0 4px; width: 4em; 51 } 52pre.numberSource { margin-left: 3em; padding-left: 4px; } 53div.sourceCode 54 { } 55@media screen { 56pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } 57} 58</style> 59 60 61<script src="../site_libs/quarto-nav/quarto-nav.js"></script> 62<script src="../site_libs/quarto-nav/headroom.min.js"></script> 63<script src="../site_libs/clipboard/clipboard.min.js"></script> 64<script src="../site_libs/quarto-search/autocomplete.umd.js"></script> 65<script src="../site_libs/quarto-search/fuse.min.js"></script> 66<script src="../site_libs/quarto-search/quarto-search.js"></script> 67<meta name="quarto:offset" content="../"> 68<script src="../site_libs/quarto-html/quarto.js" type="module"></script> 69<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script> 70<script src="../site_libs/quarto-html/popper.min.js"></script> 71<script src="../site_libs/quarto-html/tippy.umd.min.js"></script> 72<script src="../site_libs/quarto-html/anchor.min.js"></script> 73<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet"> 74<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme" id="quarto-text-highlighting-styles"> 75<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8dcd8563ea6803ab7cbb3d71ca5772e1.css" rel="stylesheet" class="quarto-color-scheme quarto-color-alternate" id="quarto-text-highlighting-styles"> 76<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme-extra" id="quarto-text-highlighting-styles"> 77<script src="../site_libs/bootstrap/bootstrap.min.js"></script> 78<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet"> 79<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme" id="quarto-bootstrap" data-mode="light"> 80<link href="../site_libs/bootstrap/bootstrap-dark-7964ffd8887b0991fe8d71c6c8bc75d6.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme quarto-color-alternate" id="quarto-bootstrap" data-mode="dark"> 81<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme-extra" id="quarto-bootstrap" data-mode="light"> 82<script id="quarto-search-options" type="application/json">{ 83 "location": "navbar", 84 "copy-button": false, 85 "collapse-after": 3, 86 "panel-placement": "end", 87 "type": "overlay", 88 "limit": 50, 89 "keyboard-shortcut": [ 90 "f", 91 "/", 92 "s" 93 ], 94 "show-item-context": false, 95 "language": { 96 "search-no-results-text": "No results", 97 "search-matching-documents-text": "matching documents", 98 "search-copy-link-title": "Copy link to search", 99 "search-hide-matches-text": "Hide additional matches", 100 "search-more-match-text": "more match in this document", 101 "search-more-matches-text": "more matches in this document", 102 "search-clear-button-title": "Clear", 103 "search-text-placeholder": "", 104 "search-detached-cancel-button-title": "Cancel", 105 "search-submit-button-title": "Submit", 106 "search-label": "Search" 107 } 108}</script> 109 110 111<link rel="stylesheet" href="../assets/styles.css"> 112</head> 113 114<body class="nav-fixed quarto-light"><script id="quarto-html-before-body" type="application/javascript"> 115 const toggleBodyColorMode = (bsSheetEl) => { 116 const mode = bsSheetEl.getAttribute("data-mode"); 117 const bodyEl = window.document.querySelector("body"); 118 if (mode === "dark") { 119 bodyEl.classList.add("quarto-dark"); 120 bodyEl.classList.remove("quarto-light"); 121 } else { 122 bodyEl.classList.add("quarto-light"); 123 bodyEl.classList.remove("quarto-dark"); 124 } 125 } 126 const toggleBodyColorPrimary = () => { 127 const bsSheetEl = window.document.querySelector("link#quarto-bootstrap:not([rel=disabled-stylesheet])"); 128 if (bsSheetEl) { 129 toggleBodyColorMode(bsSheetEl); 130 } 131 } 132 const setColorSchemeToggle = (alternate) => { 133 const toggles = window.document.querySelectorAll('.quarto-color-scheme-toggle'); 134 for (let i=0; i < toggles.length; i++) { 135 const toggle = toggles[i]; 136 if (toggle) { 137 if (alternate) { 138 toggle.classList.add("alternate"); 139 } else { 140 toggle.classList.remove("alternate"); 141 } 142 } 143 } 144 }; 145 const toggleColorMode = (alternate) => { 146 // Switch the stylesheets 147 const primaryStylesheets = window.document.querySelectorAll('link.quarto-color-scheme:not(.quarto-color-alternate)'); 148 const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate'); 149 manageTransitions('#quarto-margin-sidebar .nav-link', false); 150 if (alternate) { 151 // note: dark is layered on light, we don't disable primary! 152 enableStylesheet(alternateStylesheets); 153 for (const sheetNode of alternateStylesheets) { 154 if (sheetNode.id === "quarto-bootstrap") { 155 toggleBodyColorMode(sheetNode); 156 } 157 } 158 } else { 159 disableStylesheet(alternateStylesheets); 160 enableStylesheet(primaryStylesheets) 161 toggleBodyColorPrimary(); 162 } 163 manageTransitions('#quarto-margin-sidebar .nav-link', true); 164 // Switch the toggles 165 setColorSchemeToggle(alternate) 166 // Hack to workaround the fact that safari doesn't 167 // properly recolor the scrollbar when toggling (#1455) 168 if (navigator.userAgent.indexOf('Safari') > 0 && navigator.userAgent.indexOf('Chrome') == -1) { 169 manageTransitions("body", false); 170 window.scrollTo(0, 1); 171 setTimeout(() => { 172 window.scrollTo(0, 0); 173 manageTransitions("body", true); 174 }, 40); 175 } 176 } 177 const disableStylesheet = (stylesheets) => { 178 for (let i=0; i < stylesheets.length; i++) { 179 const stylesheet = stylesheets[i]; 180 stylesheet.rel = 'disabled-stylesheet'; 181 } 182 } 183 const enableStylesheet = (stylesheets) => { 184 for (let i=0; i < stylesheets.length; i++) { 185 const stylesheet = stylesheets[i]; 186 if(stylesheet.rel !== 'stylesheet') { // for Chrome, which will still FOUC without this check 187 stylesheet.rel = 'stylesheet'; 188 } 189 } 190 } 191 const manageTransitions = (selector, allowTransitions) => { 192 const els = window.document.querySelectorAll(selector); 193 for (let i=0; i < els.length; i++) { 194 const el = els[i]; 195 if (allowTransitions) { 196 el.classList.remove('notransition'); 197 } else { 198 el.classList.add('notransition'); 199 } 200 } 201 } 202 const isFileUrl = () => { 203 return window.location.protocol === 'file:'; 204 } 205 const hasAlternateSentinel = () => { 206 let styleSentinel = getColorSchemeSentinel(); 207 if (styleSentinel !== null) { 208 return styleSentinel === "alternate"; 209 } else { 210 return false; 211 } 212 } 213 const setStyleSentinel = (alternate) => { 214 const value = alternate ? "alternate" : "default"; 215 if (!isFileUrl()) { 216 window.localStorage.setItem("quarto-color-scheme", value); 217 } else { 218 localAlternateSentinel = value; 219 } 220 } 221 const getColorSchemeSentinel = () => { 222 if (!isFileUrl()) { 223 const storageValue = window.localStorage.getItem("quarto-color-scheme"); 224 return storageValue != null ? storageValue : localAlternateSentinel; 225 } else { 226 return localAlternateSentinel; 227 } 228 } 229 const toggleGiscusIfUsed = (isAlternate, darkModeDefault) => { 230 const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light'; 231 const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark'; 232 let newTheme = ''; 233 if(authorPrefersDark) { 234 newTheme = isAlternate ? baseTheme : alternateTheme; 235 } else { 236 newTheme = isAlternate ? alternateTheme : baseTheme; 237 } 238 const changeGiscusTheme = () => { 239 // From: https://github.com/giscus/giscus/issues/336 240 const sendMessage = (message) => { 241 const iframe = document.querySelector('iframe.giscus-frame'); 242 if (!iframe) return; 243 iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app'); 244 } 245 sendMessage({ 246 setConfig: { 247 theme: newTheme 248 } 249 }); 250 } 251 const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null; 252 if (isGiscussLoaded) { 253 changeGiscusTheme(); 254 } 255 }; 256 const authorPrefersDark = false; 257 const darkModeDefault = authorPrefersDark; 258 document.querySelector('link#quarto-text-highlighting-styles.quarto-color-scheme-extra').rel = 'disabled-stylesheet'; 259 document.querySelector('link#quarto-bootstrap.quarto-color-scheme-extra').rel = 'disabled-stylesheet'; 260 let localAlternateSentinel = darkModeDefault ? 'alternate' : 'default'; 261 // Dark / light mode switch 262 window.quartoToggleColorScheme = () => { 263 // Read the current dark / light value 264 let toAlternate = !hasAlternateSentinel(); 265 toggleColorMode(toAlternate); 266 setStyleSentinel(toAlternate); 267 toggleGiscusIfUsed(toAlternate, darkModeDefault); 268 window.dispatchEvent(new Event('resize')); 269 }; 270 // Switch to dark mode if need be 271 if (hasAlternateSentinel()) { 272 toggleColorMode(true); 273 } else { 274 toggleColorMode(false); 275 } 276 </script> 277 278<div id="quarto-search-results"></div> 279 <header id="quarto-header" class="headroom fixed-top"> 280 <nav class="navbar navbar-expand-lg " data-bs-theme="dark"> 281 <div class="navbar-container container-fluid"> 282 <div class="navbar-brand-container mx-auto"> 283 <a class="navbar-brand" href="../index.html"> 284 <span class="navbar-title">atdata</span> 285 </a> 286 </div> 287 <div id="quarto-search" class="" title="Search"></div> 288 <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" role="menu" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> 289 <span class="navbar-toggler-icon"></span> 290</button> 291 <div class="collapse navbar-collapse" id="navbarCollapse"> 292 <ul class="navbar-nav navbar-nav-scroll me-auto"> 293 <li class="nav-item"> 294 <a class="nav-link" href="../index.html"> 295<span class="menu-text">Guide</span></a> 296 </li> 297 <li class="nav-item dropdown "> 298 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-tutorials" role="link" data-bs-toggle="dropdown" aria-expanded="false"> 299 <span class="menu-text">Tutorials</span> 300 </a> 301 <ul class="dropdown-menu" aria-labelledby="nav-menu-tutorials"> 302 <li> 303 <a class="dropdown-item" href="../tutorials/quickstart.html"> 304 <span class="dropdown-text">Quick Start</span></a> 305 </li> 306 <li> 307 <a class="dropdown-item" href="../tutorials/local-workflow.html"> 308 <span class="dropdown-text">Local Workflow</span></a> 309 </li> 310 <li> 311 <a class="dropdown-item" href="../tutorials/atmosphere.html"> 312 <span class="dropdown-text">Atmosphere Publishing</span></a> 313 </li> 314 <li> 315 <a class="dropdown-item" href="../tutorials/promotion.html"> 316 <span class="dropdown-text">Promotion Workflow</span></a> 317 </li> 318 </ul> 319 </li> 320 <li class="nav-item dropdown "> 321 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-reference" role="link" data-bs-toggle="dropdown" aria-expanded="false"> 322 <span class="menu-text">Reference</span> 323 </a> 324 <ul class="dropdown-menu" aria-labelledby="nav-menu-reference"> 325 <li> 326 <a class="dropdown-item" href="../reference/architecture.html"> 327 <span class="dropdown-text">Architecture Overview</span></a> 328 </li> 329 <li> 330 <a class="dropdown-item" href="../reference/packable-samples.html"> 331 <span class="dropdown-text">Packable Samples</span></a> 332 </li> 333 <li> 334 <a class="dropdown-item" href="../reference/datasets.html"> 335 <span class="dropdown-text">Datasets</span></a> 336 </li> 337 <li> 338 <a class="dropdown-item" href="../reference/lenses.html"> 339 <span class="dropdown-text">Lenses</span></a> 340 </li> 341 <li> 342 <a class="dropdown-item" href="../reference/local-storage.html"> 343 <span class="dropdown-text">Local Storage</span></a> 344 </li> 345 <li> 346 <a class="dropdown-item" href="../reference/atmosphere.html"> 347 <span class="dropdown-text">Atmosphere</span></a> 348 </li> 349 <li> 350 <a class="dropdown-item" href="../reference/promotion.html"> 351 <span class="dropdown-text">Promotion</span></a> 352 </li> 353 <li> 354 <a class="dropdown-item" href="../reference/load-dataset.html"> 355 <span class="dropdown-text">load_dataset API</span></a> 356 </li> 357 <li> 358 <a class="dropdown-item" href="../reference/protocols.html"> 359 <span class="dropdown-text">Protocols</span></a> 360 </li> 361 <li> 362 <a class="dropdown-item" href="../reference/uri-spec.html"> 363 <span class="dropdown-text">URI Specification</span></a> 364 </li> 365 <li> 366 <a class="dropdown-item" href="../reference/troubleshooting.html"> 367 <span class="dropdown-text">Troubleshooting &amp; FAQ</span></a> 368 </li> 369 <li> 370 <a class="dropdown-item" href="../reference/deployment.html"> 371 <span class="dropdown-text">Deployment Guide</span></a> 372 </li> 373 </ul> 374 </li> 375 <li class="nav-item"> 376 <a class="nav-link" href="../api/index.html"> 377<span class="menu-text">API</span></a> 378 </li> 379</ul> 380 <ul class="navbar-nav navbar-nav-scroll ms-auto"> 381 <li class="nav-item compact"> 382 <a class="nav-link" href="https://github.com/your-org/atdata"> <i class="bi bi-github" role="img"> 383</i> 384<span class="menu-text"></span></a> 385 </li> 386</ul> 387 </div> <!-- /navcollapse --> 388 <div class="quarto-navbar-tools"> 389 <a href="" class="quarto-color-scheme-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleColorScheme(); return false;" title="Toggle dark mode"><i class="bi"></i></a> 390</div> 391 </div> <!-- /container-fluid --> 392 </nav> 393</header> 394<!-- content --> 395<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar"> 396<!-- sidebar --> 397<!-- margin-sidebar --> 398 <div id="quarto-margin-sidebar" class="sidebar margin-sidebar"> 399 <nav id="TOC" role="doc-toc" class="toc-active"> 400 <h2 id="toc-title">On this page</h2> 401 402 <ul> 403 <li><a href="#atdata.Dataset" id="toc-atdata.Dataset" class="nav-link active" data-scroll-target="#atdata.Dataset">Dataset</a> 404 <ul class="collapse"> 405 <li><a href="#parameters" id="toc-parameters" class="nav-link" data-scroll-target="#parameters">Parameters</a></li> 406 <li><a href="#attributes" id="toc-attributes" class="nav-link" data-scroll-target="#attributes">Attributes</a></li> 407 <li><a href="#examples" id="toc-examples" class="nav-link" data-scroll-target="#examples">Examples</a></li> 408 <li><a href="#note" id="toc-note" class="nav-link" data-scroll-target="#note">Note</a></li> 409 <li><a href="#methods" id="toc-methods" class="nav-link" data-scroll-target="#methods">Methods</a> 410 <ul class="collapse"> 411 <li><a href="#atdata.Dataset.as_type" id="toc-atdata.Dataset.as_type" class="nav-link" data-scroll-target="#atdata.Dataset.as_type">as_type</a></li> 412 <li><a href="#atdata.Dataset.list_shards" id="toc-atdata.Dataset.list_shards" class="nav-link" data-scroll-target="#atdata.Dataset.list_shards">list_shards</a></li> 413 <li><a href="#atdata.Dataset.ordered" id="toc-atdata.Dataset.ordered" class="nav-link" data-scroll-target="#atdata.Dataset.ordered">ordered</a></li> 414 <li><a href="#atdata.Dataset.shuffled" id="toc-atdata.Dataset.shuffled" class="nav-link" data-scroll-target="#atdata.Dataset.shuffled">shuffled</a></li> 415 <li><a href="#atdata.Dataset.to_parquet" id="toc-atdata.Dataset.to_parquet" class="nav-link" data-scroll-target="#atdata.Dataset.to_parquet">to_parquet</a></li> 416 <li><a href="#atdata.Dataset.wrap" id="toc-atdata.Dataset.wrap" class="nav-link" data-scroll-target="#atdata.Dataset.wrap">wrap</a></li> 417 <li><a href="#atdata.Dataset.wrap_batch" id="toc-atdata.Dataset.wrap_batch" class="nav-link" data-scroll-target="#atdata.Dataset.wrap_batch">wrap_batch</a></li> 418 </ul></li> 419 </ul></li> 420 </ul> 421<div class="toc-actions"><ul><li><a href="https://github.com/your-org/atdata/edit/main/api/Dataset.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav> 422 </div> 423<!-- main --> 424<main class="content" id="quarto-document-content"><header id="title-block-header" class="quarto-title-block"></header> 425 426 427 428 429 430<section id="atdata.Dataset" class="level1"> 431<h1>Dataset</h1> 432<div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>Dataset(source<span class="op">=</span><span class="va">None</span>, metadata_url<span class="op">=</span><span class="va">None</span>, <span class="op">*</span>, url<span class="op">=</span><span class="va">None</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 433<p>A typed dataset built on WebDataset with lens transformations.</p> 434<p>This class wraps WebDataset tar archives and provides type-safe iteration over samples of a specific <code>PackableSample</code> type. Samples are stored as msgpack-serialized data within WebDataset shards.</p> 435<p>The dataset supports: - Ordered and shuffled iteration - Automatic batching with <code>SampleBatch</code> - Type transformations via the lens system (<code>as_type()</code>) - Export to parquet format</p> 436<section id="parameters" class="level2 doc-section doc-section-parameters"> 437<h2 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters">Parameters</h2> 438<table class="caption-top table"> 439<colgroup> 440<col style="width: 8%"> 441<col style="width: 8%"> 442<col style="width: 72%"> 443<col style="width: 12%"> 444</colgroup> 445<thead> 446<tr class="header"> 447<th>Name</th> 448<th>Type</th> 449<th>Description</th> 450<th>Default</th> 451</tr> 452</thead> 453<tbody> 454<tr class="odd"> 455<td>ST</td> 456<td></td> 457<td>The sample type for this dataset, must derive from <code>PackableSample</code>.</td> 458<td><em>required</em></td> 459</tr> 460</tbody> 461</table> 462</section> 463<section id="attributes" class="level2 doc-section doc-section-attributes"> 464<h2 class="doc-section doc-section-attributes anchored" data-anchor-id="attributes">Attributes</h2> 465<table class="caption-top table"> 466<thead> 467<tr class="header"> 468<th>Name</th> 469<th>Type</th> 470<th>Description</th> 471</tr> 472</thead> 473<tbody> 474<tr class="odd"> 475<td>url</td> 476<td></td> 477<td>WebDataset brace-notation URL for the tar file(s).</td> 478</tr> 479</tbody> 480</table> 481</section> 482<section id="examples" class="level2 doc-section doc-section-examples"> 483<h2 class="doc-section doc-section-examples anchored" data-anchor-id="examples">Examples</h2> 484<div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="op">&gt;&gt;&gt;</span> ds <span class="op">=</span> Dataset[MyData](<span class="st">"path/to/data-{000000..000009}.tar"</span>)</span> 485<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="op">&gt;&gt;&gt;</span> <span class="cf">for</span> sample <span class="kw">in</span> ds.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 486<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>... <span class="co"># sample is SampleBatch[MyData] with batch_size samples</span></span> 487<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>... embeddings <span class="op">=</span> sample.embeddings <span class="co"># shape: (32, ...)</span></span> 488<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>...</span> 489<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a><span class="op">&gt;&gt;&gt;</span> <span class="co"># Transform to a different view</span></span> 490<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a><span class="op">&gt;&gt;&gt;</span> ds_view <span class="op">=</span> ds.as_type(MyDataView)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 491</section> 492<section id="note" class="level2 doc-section doc-section-note"> 493<h2 class="doc-section doc-section-note anchored" data-anchor-id="note">Note</h2> 494<p>This class uses Python’s <code>__orig_class__</code> mechanism to extract the type parameter at runtime. Instances must be created using the subscripted syntax <code>Dataset[MyType](url)</code> rather than calling the constructor directly with an unsubscripted class.</p> 495</section> 496<section id="methods" class="level2"> 497<h2 class="anchored" data-anchor-id="methods">Methods</h2> 498<table class="caption-top table"> 499<thead> 500<tr class="header"> 501<th>Name</th> 502<th>Description</th> 503</tr> 504</thead> 505<tbody> 506<tr class="odd"> 507<td><a href="#atdata.Dataset.as_type">as_type</a></td> 508<td>View this dataset through a different sample type using a registered lens.</td> 509</tr> 510<tr class="even"> 511<td><a href="#atdata.Dataset.list_shards">list_shards</a></td> 512<td>Get list of individual dataset shards.</td> 513</tr> 514<tr class="odd"> 515<td><a href="#atdata.Dataset.ordered">ordered</a></td> 516<td>Iterate over the dataset in order</td> 517</tr> 518<tr class="even"> 519<td><a href="#atdata.Dataset.shuffled">shuffled</a></td> 520<td>Iterate over the dataset in random order.</td> 521</tr> 522<tr class="odd"> 523<td><a href="#atdata.Dataset.to_parquet">to_parquet</a></td> 524<td>Export dataset contents to parquet format.</td> 525</tr> 526<tr class="even"> 527<td><a href="#atdata.Dataset.wrap">wrap</a></td> 528<td>Wrap a raw msgpack sample into the appropriate dataset-specific type.</td> 529</tr> 530<tr class="odd"> 531<td><a href="#atdata.Dataset.wrap_batch">wrap_batch</a></td> 532<td>Wrap a batch of raw msgpack samples into a typed SampleBatch.</td> 533</tr> 534</tbody> 535</table> 536<section id="atdata.Dataset.as_type" class="level3"> 537<h3 class="anchored" data-anchor-id="atdata.Dataset.as_type">as_type</h3> 538<div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>Dataset.as_type(other)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 539<p>View this dataset through a different sample type using a registered lens.</p> 540<section id="parameters-1" class="level4 doc-section doc-section-parameters"> 541<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-1">Parameters</h4> 542<table class="caption-top table"> 543<thead> 544<tr class="header"> 545<th>Name</th> 546<th>Type</th> 547<th>Description</th> 548<th>Default</th> 549</tr> 550</thead> 551<tbody> 552<tr class="odd"> 553<td>other</td> 554<td><a href="`typing.Type`">Type</a>[<a href="`atdata.dataset.RT`">RT</a>]</td> 555<td>The target sample type to transform into. Must be a type derived from <code>PackableSample</code>.</td> 556<td><em>required</em></td> 557</tr> 558</tbody> 559</table> 560</section> 561<section id="returns" class="level4 doc-section doc-section-returns"> 562<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns">Returns</h4> 563<table class="caption-top table"> 564<thead> 565<tr class="header"> 566<th>Name</th> 567<th>Type</th> 568<th>Description</th> 569</tr> 570</thead> 571<tbody> 572<tr class="odd"> 573<td></td> 574<td><a href="`atdata.dataset.Dataset`">Dataset</a>[<a href="`atdata.dataset.RT`">RT</a>]</td> 575<td>A new <code>Dataset</code> instance that yields samples of type <code>other</code></td> 576</tr> 577<tr class="even"> 578<td></td> 579<td><a href="`atdata.dataset.Dataset`">Dataset</a>[<a href="`atdata.dataset.RT`">RT</a>]</td> 580<td>by applying the appropriate lens transformation from the global</td> 581</tr> 582<tr class="odd"> 583<td></td> 584<td><a href="`atdata.dataset.Dataset`">Dataset</a>[<a href="`atdata.dataset.RT`">RT</a>]</td> 585<td><code>LensNetwork</code> registry.</td> 586</tr> 587</tbody> 588</table> 589</section> 590<section id="raises" class="level4 doc-section doc-section-raises"> 591<h4 class="doc-section doc-section-raises anchored" data-anchor-id="raises">Raises</h4> 592<table class="caption-top table"> 593<thead> 594<tr class="header"> 595<th>Name</th> 596<th>Type</th> 597<th>Description</th> 598</tr> 599</thead> 600<tbody> 601<tr class="odd"> 602<td></td> 603<td><a href="`ValueError`">ValueError</a></td> 604<td>If no registered lens exists between the current sample type and the target type.</td> 605</tr> 606</tbody> 607</table> 608</section> 609</section> 610<section id="atdata.Dataset.list_shards" class="level3"> 611<h3 class="anchored" data-anchor-id="atdata.Dataset.list_shards">list_shards</h3> 612<div class="sourceCode" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>Dataset.list_shards()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 613<p>Get list of individual dataset shards.</p> 614<section id="returns-1" class="level4 doc-section doc-section-returns"> 615<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-1">Returns</h4> 616<table class="caption-top table"> 617<thead> 618<tr class="header"> 619<th>Name</th> 620<th>Type</th> 621<th>Description</th> 622</tr> 623</thead> 624<tbody> 625<tr class="odd"> 626<td></td> 627<td><a href="`list`">list</a>[<a href="`str`">str</a>]</td> 628<td>A full (non-lazy) list of the individual <code>tar</code> files within the</td> 629</tr> 630<tr class="even"> 631<td></td> 632<td><a href="`list`">list</a>[<a href="`str`">str</a>]</td> 633<td>source WebDataset.</td> 634</tr> 635</tbody> 636</table> 637</section> 638</section> 639<section id="atdata.Dataset.ordered" class="level3"> 640<h3 class="anchored" data-anchor-id="atdata.Dataset.ordered">ordered</h3> 641<div class="sourceCode" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>Dataset.ordered(batch_size<span class="op">=</span><span class="va">None</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 642<p>Iterate over the dataset in order</p> 643<section id="parameters-2" class="level4 doc-section doc-section-parameters"> 644<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-2">Parameters</h4> 645<table class="caption-top table"> 646<colgroup> 647<col style="width: 7%"> 648<col style="width: 4%"> 649<col style="width: 81%"> 650<col style="width: 6%"> 651</colgroup> 652<thead> 653<tr class="header"> 654<th>Name</th> 655<th>Type</th> 656<th>Description</th> 657<th>Default</th> 658</tr> 659</thead> 660<tbody> 661<tr class="odd"> 662<td>batch_size (</td> 663<td></td> 664<td>obj:<code>int</code>, optional): The size of iterated batches. Default: None (unbatched). If <code>None</code>, iterates over one sample at a time with no batch dimension.</td> 665<td><em>required</em></td> 666</tr> 667</tbody> 668</table> 669</section> 670<section id="returns-2" class="level4 doc-section doc-section-returns"> 671<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-2">Returns</h4> 672<table class="caption-top table"> 673<thead> 674<tr class="header"> 675<th>Name</th> 676<th>Type</th> 677<th>Description</th> 678</tr> 679</thead> 680<tbody> 681<tr class="odd"> 682<td></td> 683<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td> 684<td>obj:<code>webdataset.DataPipeline</code> A data pipeline that iterates over</td> 685</tr> 686<tr class="even"> 687<td></td> 688<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td> 689<td>the dataset in its original sample order</td> 690</tr> 691</tbody> 692</table> 693</section> 694</section> 695<section id="atdata.Dataset.shuffled" class="level3"> 696<h3 class="anchored" data-anchor-id="atdata.Dataset.shuffled">shuffled</h3> 697<div class="sourceCode" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>Dataset.shuffled(buffer_shards<span class="op">=</span><span class="dv">100</span>, buffer_samples<span class="op">=</span><span class="dv">10000</span>, batch_size<span class="op">=</span><span class="va">None</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 698<p>Iterate over the dataset in random order.</p> 699<section id="parameters-3" class="level4 doc-section doc-section-parameters"> 700<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-3">Parameters</h4> 701<table class="caption-top table"> 702<thead> 703<tr class="header"> 704<th>Name</th> 705<th>Type</th> 706<th>Description</th> 707<th>Default</th> 708</tr> 709</thead> 710<tbody> 711<tr class="odd"> 712<td>buffer_shards</td> 713<td><a href="`int`">int</a></td> 714<td>Number of shards to buffer for shuffling at the shard level. Larger values increase randomness but use more memory. Default: 100.</td> 715<td><code>100</code></td> 716</tr> 717<tr class="even"> 718<td>buffer_samples</td> 719<td><a href="`int`">int</a></td> 720<td>Number of samples to buffer for shuffling within shards. Larger values increase randomness but use more memory. Default: 10,000.</td> 721<td><code>10000</code></td> 722</tr> 723<tr class="odd"> 724<td>batch_size</td> 725<td><a href="`int`">int</a> | None</td> 726<td>The size of iterated batches. Default: None (unbatched). If <code>None</code>, iterates over one sample at a time with no batch dimension.</td> 727<td><code>None</code></td> 728</tr> 729</tbody> 730</table> 731</section> 732<section id="returns-3" class="level4 doc-section doc-section-returns"> 733<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-3">Returns</h4> 734<table class="caption-top table"> 735<thead> 736<tr class="header"> 737<th>Name</th> 738<th>Type</th> 739<th>Description</th> 740</tr> 741</thead> 742<tbody> 743<tr class="odd"> 744<td></td> 745<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td> 746<td>A WebDataset data pipeline that iterates over the dataset in</td> 747</tr> 748<tr class="even"> 749<td></td> 750<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td> 751<td>randomized order. If <code>batch_size</code> is not <code>None</code>, yields</td> 752</tr> 753<tr class="odd"> 754<td></td> 755<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td> 756<td><code>SampleBatch[ST]</code> instances; otherwise yields individual <code>ST</code></td> 757</tr> 758<tr class="even"> 759<td></td> 760<td><a href="`typing.Iterable`">Iterable</a>[<a href="`atdata.dataset.ST`">ST</a>]</td> 761<td>samples.</td> 762</tr> 763</tbody> 764</table> 765</section> 766</section> 767<section id="atdata.Dataset.to_parquet" class="level3"> 768<h3 class="anchored" data-anchor-id="atdata.Dataset.to_parquet">to_parquet</h3> 769<div class="sourceCode" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>Dataset.to_parquet(path, sample_map<span class="op">=</span><span class="va">None</span>, maxcount<span class="op">=</span><span class="va">None</span>, <span class="op">**</span>kwargs)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 770<p>Export dataset contents to parquet format.</p> 771<p>Converts all samples to a pandas DataFrame and saves to parquet file(s). Useful for interoperability with data analysis tools.</p> 772<section id="parameters-4" class="level4 doc-section doc-section-parameters"> 773<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-4">Parameters</h4> 774<table class="caption-top table"> 775<thead> 776<tr class="header"> 777<th>Name</th> 778<th>Type</th> 779<th>Description</th> 780<th>Default</th> 781</tr> 782</thead> 783<tbody> 784<tr class="odd"> 785<td>path</td> 786<td><a href="`atdata.dataset.Pathlike`">Pathlike</a></td> 787<td>Output path for the parquet file. If <code>maxcount</code> is specified, files are named <code>{stem}-{segment:06d}.parquet</code>.</td> 788<td><em>required</em></td> 789</tr> 790<tr class="even"> 791<td>sample_map</td> 792<td><a href="`typing.Optional`">Optional</a>[<a href="`atdata.dataset.SampleExportMap`">SampleExportMap</a>]</td> 793<td>Optional function to convert samples to dictionaries. Defaults to <code>dataclasses.asdict</code>.</td> 794<td><code>None</code></td> 795</tr> 796<tr class="odd"> 797<td>maxcount</td> 798<td><a href="`typing.Optional`">Optional</a>[<a href="`int`">int</a>]</td> 799<td>If specified, split output into multiple files with at most this many samples each. Recommended for large datasets.</td> 800<td><code>None</code></td> 801</tr> 802<tr class="even"> 803<td>**kwargs</td> 804<td></td> 805<td>Additional arguments passed to <code>pandas.DataFrame.to_parquet()</code>. Common options include <code>compression</code>, <code>index</code>, <code>engine</code>.</td> 806<td><code>{}</code></td> 807</tr> 808</tbody> 809</table> 810</section> 811<section id="warning" class="level4 doc-section doc-section-warning"> 812<h4 class="doc-section doc-section-warning anchored" data-anchor-id="warning">Warning</h4> 813<p><strong>Memory Usage</strong>: When <code>maxcount=None</code> (default), this method loads the <strong>entire dataset into memory</strong> as a pandas DataFrame before writing. For large datasets, this can cause memory exhaustion.</p> 814<p>For datasets larger than available RAM, always specify <code>maxcount</code>::</p> 815<pre><code># Safe for large datasets - processes in chunks 816ds.to_parquet("output.parquet", maxcount=10000)</code></pre> 817<p>This creates multiple parquet files: <code>output-000000.parquet</code>, <code>output-000001.parquet</code>, etc.</p> 818</section> 819<section id="examples-1" class="level4 doc-section doc-section-examples"> 820<h4 class="doc-section doc-section-examples anchored" data-anchor-id="examples-1">Examples</h4> 821<div class="sourceCode" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="op">&gt;&gt;&gt;</span> ds <span class="op">=</span> Dataset[MySample](<span class="st">"data.tar"</span>)</span> 822<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="op">&gt;&gt;&gt;</span> <span class="co"># Small dataset - load all at once</span></span> 823<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="op">&gt;&gt;&gt;</span> ds.to_parquet(<span class="st">"output.parquet"</span>)</span> 824<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="op">&gt;&gt;&gt;</span></span> 825<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a><span class="op">&gt;&gt;&gt;</span> <span class="co"># Large dataset - process in chunks</span></span> 826<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a><span class="op">&gt;&gt;&gt;</span> ds.to_parquet(<span class="st">"output.parquet"</span>, maxcount<span class="op">=</span><span class="dv">50000</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 827</section> 828</section> 829<section id="atdata.Dataset.wrap" class="level3"> 830<h3 class="anchored" data-anchor-id="atdata.Dataset.wrap">wrap</h3> 831<div class="sourceCode" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>Dataset.wrap(sample)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 832<p>Wrap a raw msgpack sample into the appropriate dataset-specific type.</p> 833<section id="parameters-5" class="level4 doc-section doc-section-parameters"> 834<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-5">Parameters</h4> 835<table class="caption-top table"> 836<thead> 837<tr class="header"> 838<th>Name</th> 839<th>Type</th> 840<th>Description</th> 841<th>Default</th> 842</tr> 843</thead> 844<tbody> 845<tr class="odd"> 846<td>sample</td> 847<td><a href="`atdata.dataset.WDSRawSample`">WDSRawSample</a></td> 848<td>A dictionary containing at minimum a <code>'msgpack'</code> key with serialized sample bytes.</td> 849<td><em>required</em></td> 850</tr> 851</tbody> 852</table> 853</section> 854<section id="returns-4" class="level4 doc-section doc-section-returns"> 855<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-4">Returns</h4> 856<table class="caption-top table"> 857<thead> 858<tr class="header"> 859<th>Name</th> 860<th>Type</th> 861<th>Description</th> 862</tr> 863</thead> 864<tbody> 865<tr class="odd"> 866<td></td> 867<td><a href="`atdata.dataset.ST`">ST</a></td> 868<td>A deserialized sample of type <code>ST</code>, optionally transformed through</td> 869</tr> 870<tr class="even"> 871<td></td> 872<td><a href="`atdata.dataset.ST`">ST</a></td> 873<td>a lens if <code>as_type()</code> was called.</td> 874</tr> 875</tbody> 876</table> 877</section> 878</section> 879<section id="atdata.Dataset.wrap_batch" class="level3"> 880<h3 class="anchored" data-anchor-id="atdata.Dataset.wrap_batch">wrap_batch</h3> 881<div class="sourceCode" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>Dataset.wrap_batch(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 882<p>Wrap a batch of raw msgpack samples into a typed SampleBatch.</p> 883<section id="parameters-6" class="level4 doc-section doc-section-parameters"> 884<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-6">Parameters</h4> 885<table class="caption-top table"> 886<thead> 887<tr class="header"> 888<th>Name</th> 889<th>Type</th> 890<th>Description</th> 891<th>Default</th> 892</tr> 893</thead> 894<tbody> 895<tr class="odd"> 896<td>batch</td> 897<td><a href="`atdata.dataset.WDSRawBatch`">WDSRawBatch</a></td> 898<td>A dictionary containing a <code>'msgpack'</code> key with a list of serialized sample bytes.</td> 899<td><em>required</em></td> 900</tr> 901</tbody> 902</table> 903</section> 904<section id="returns-5" class="level4 doc-section doc-section-returns"> 905<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-5">Returns</h4> 906<table class="caption-top table"> 907<thead> 908<tr class="header"> 909<th>Name</th> 910<th>Type</th> 911<th>Description</th> 912</tr> 913</thead> 914<tbody> 915<tr class="odd"> 916<td></td> 917<td><a href="`atdata.dataset.SampleBatch`">SampleBatch</a>[<a href="`atdata.dataset.ST`">ST</a>]</td> 918<td>A <code>SampleBatch[ST]</code> containing deserialized samples, optionally</td> 919</tr> 920<tr class="even"> 921<td></td> 922<td><a href="`atdata.dataset.SampleBatch`">SampleBatch</a>[<a href="`atdata.dataset.ST`">ST</a>]</td> 923<td>transformed through a lens if <code>as_type()</code> was called.</td> 924</tr> 925</tbody> 926</table> 927</section> 928<section id="note-1" class="level4 doc-section doc-section-note"> 929<h4 class="doc-section doc-section-note anchored" data-anchor-id="note-1">Note</h4> 930<p>This implementation deserializes samples one at a time, then aggregates them into a batch.</p> 931 932 933</section> 934</section> 935</section> 936</section> 937 938</main> <!-- /main --> 939<script id="quarto-html-after-body" type="application/javascript"> 940 window.document.addEventListener("DOMContentLoaded", function (event) { 941 // Ensure there is a toggle, if there isn't float one in the top right 942 if (window.document.querySelector('.quarto-color-scheme-toggle') === null) { 943 const a = window.document.createElement('a'); 944 a.classList.add('top-right'); 945 a.classList.add('quarto-color-scheme-toggle'); 946 a.href = ""; 947 a.onclick = function() { try { window.quartoToggleColorScheme(); } catch {} return false; }; 948 const i = window.document.createElement("i"); 949 i.classList.add('bi'); 950 a.appendChild(i); 951 window.document.body.appendChild(a); 952 } 953 setColorSchemeToggle(hasAlternateSentinel()) 954 const icon = ""; 955 const anchorJS = new window.AnchorJS(); 956 anchorJS.options = { 957 placement: 'right', 958 icon: icon 959 }; 960 anchorJS.add('.anchored'); 961 const isCodeAnnotation = (el) => { 962 for (const clz of el.classList) { 963 if (clz.startsWith('code-annotation-')) { 964 return true; 965 } 966 } 967 return false; 968 } 969 const onCopySuccess = function(e) { 970 // button target 971 const button = e.trigger; 972 // don't keep focus 973 button.blur(); 974 // flash "checked" 975 button.classList.add('code-copy-button-checked'); 976 var currentTitle = button.getAttribute("title"); 977 button.setAttribute("title", "Copied!"); 978 let tooltip; 979 if (window.bootstrap) { 980 button.setAttribute("data-bs-toggle", "tooltip"); 981 button.setAttribute("data-bs-placement", "left"); 982 button.setAttribute("data-bs-title", "Copied!"); 983 tooltip = new bootstrap.Tooltip(button, 984 { trigger: "manual", 985 customClass: "code-copy-button-tooltip", 986 offset: [0, -8]}); 987 tooltip.show(); 988 } 989 setTimeout(function() { 990 if (tooltip) { 991 tooltip.hide(); 992 button.removeAttribute("data-bs-title"); 993 button.removeAttribute("data-bs-toggle"); 994 button.removeAttribute("data-bs-placement"); 995 } 996 button.setAttribute("title", currentTitle); 997 button.classList.remove('code-copy-button-checked'); 998 }, 1000); 999 // clear code selection 1000 e.clearSelection(); 1001 } 1002 const getTextToCopy = function(trigger) { 1003 const codeEl = trigger.previousElementSibling.cloneNode(true); 1004 for (const childEl of codeEl.children) { 1005 if (isCodeAnnotation(childEl)) { 1006 childEl.remove(); 1007 } 1008 } 1009 return codeEl.innerText; 1010 } 1011 const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', { 1012 text: getTextToCopy 1013 }); 1014 clipboard.on('success', onCopySuccess); 1015 if (window.document.getElementById('quarto-embedded-source-code-modal')) { 1016 const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', { 1017 text: getTextToCopy, 1018 container: window.document.getElementById('quarto-embedded-source-code-modal') 1019 }); 1020 clipboardModal.on('success', onCopySuccess); 1021 } 1022 var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//); 1023 var mailtoRegex = new RegExp(/^mailto:/); 1024 var filterRegex = new RegExp("https:\/\/github\.com\/your-org\/atdata"); 1025 var isInternal = (href) => { 1026 return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href); 1027 } 1028 // Inspect non-navigation links and adorn them if external 1029 var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)'); 1030 for (var i=0; i<links.length; i++) { 1031 const link = links[i]; 1032 if (!isInternal(link.href)) { 1033 // undo the damage that might have been done by quarto-nav.js in the case of 1034 // links that we want to consider external 1035 if (link.dataset.originalHref !== undefined) { 1036 link.href = link.dataset.originalHref; 1037 } 1038 } 1039 } 1040 function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) { 1041 const config = { 1042 allowHTML: true, 1043 maxWidth: 500, 1044 delay: 100, 1045 arrow: false, 1046 appendTo: function(el) { 1047 return el.parentElement; 1048 }, 1049 interactive: true, 1050 interactiveBorder: 10, 1051 theme: 'quarto', 1052 placement: 'bottom-start', 1053 }; 1054 if (contentFn) { 1055 config.content = contentFn; 1056 } 1057 if (onTriggerFn) { 1058 config.onTrigger = onTriggerFn; 1059 } 1060 if (onUntriggerFn) { 1061 config.onUntrigger = onUntriggerFn; 1062 } 1063 window.tippy(el, config); 1064 } 1065 const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]'); 1066 for (var i=0; i<noterefs.length; i++) { 1067 const ref = noterefs[i]; 1068 tippyHover(ref, function() { 1069 // use id or data attribute instead here 1070 let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href'); 1071 try { href = new URL(href).hash; } catch {} 1072 const id = href.replace(/^#\/?/, ""); 1073 const note = window.document.getElementById(id); 1074 if (note) { 1075 return note.innerHTML; 1076 } else { 1077 return ""; 1078 } 1079 }); 1080 } 1081 const xrefs = window.document.querySelectorAll('a.quarto-xref'); 1082 const processXRef = (id, note) => { 1083 // Strip column container classes 1084 const stripColumnClz = (el) => { 1085 el.classList.remove("page-full", "page-columns"); 1086 if (el.children) { 1087 for (const child of el.children) { 1088 stripColumnClz(child); 1089 } 1090 } 1091 } 1092 stripColumnClz(note) 1093 if (id === null || id.startsWith('sec-')) { 1094 // Special case sections, only their first couple elements 1095 const container = document.createElement("div"); 1096 if (note.children && note.children.length > 2) { 1097 container.appendChild(note.children[0].cloneNode(true)); 1098 for (let i = 1; i < note.children.length; i++) { 1099 const child = note.children[i]; 1100 if (child.tagName === "P" && child.innerText === "") { 1101 continue; 1102 } else { 1103 container.appendChild(child.cloneNode(true)); 1104 break; 1105 } 1106 } 1107 if (window.Quarto?.typesetMath) { 1108 window.Quarto.typesetMath(container); 1109 } 1110 return container.innerHTML 1111 } else { 1112 if (window.Quarto?.typesetMath) { 1113 window.Quarto.typesetMath(note); 1114 } 1115 return note.innerHTML; 1116 } 1117 } else { 1118 // Remove any anchor links if they are present 1119 const anchorLink = note.querySelector('a.anchorjs-link'); 1120 if (anchorLink) { 1121 anchorLink.remove(); 1122 } 1123 if (window.Quarto?.typesetMath) { 1124 window.Quarto.typesetMath(note); 1125 } 1126 if (note.classList.contains("callout")) { 1127 return note.outerHTML; 1128 } else { 1129 return note.innerHTML; 1130 } 1131 } 1132 } 1133 for (var i=0; i<xrefs.length; i++) { 1134 const xref = xrefs[i]; 1135 tippyHover(xref, undefined, function(instance) { 1136 instance.disable(); 1137 let url = xref.getAttribute('href'); 1138 let hash = undefined; 1139 if (url.startsWith('#')) { 1140 hash = url; 1141 } else { 1142 try { hash = new URL(url).hash; } catch {} 1143 } 1144 if (hash) { 1145 const id = hash.replace(/^#\/?/, ""); 1146 const note = window.document.getElementById(id); 1147 if (note !== null) { 1148 try { 1149 const html = processXRef(id, note.cloneNode(true)); 1150 instance.setContent(html); 1151 } finally { 1152 instance.enable(); 1153 instance.show(); 1154 } 1155 } else { 1156 // See if we can fetch this 1157 fetch(url.split('#')[0]) 1158 .then(res => res.text()) 1159 .then(html => { 1160 const parser = new DOMParser(); 1161 const htmlDoc = parser.parseFromString(html, "text/html"); 1162 const note = htmlDoc.getElementById(id); 1163 if (note !== null) { 1164 const html = processXRef(id, note); 1165 instance.setContent(html); 1166 } 1167 }).finally(() => { 1168 instance.enable(); 1169 instance.show(); 1170 }); 1171 } 1172 } else { 1173 // See if we can fetch a full url (with no hash to target) 1174 // This is a special case and we should probably do some content thinning / targeting 1175 fetch(url) 1176 .then(res => res.text()) 1177 .then(html => { 1178 const parser = new DOMParser(); 1179 const htmlDoc = parser.parseFromString(html, "text/html"); 1180 const note = htmlDoc.querySelector('main.content'); 1181 if (note !== null) { 1182 // This should only happen for chapter cross references 1183 // (since there is no id in the URL) 1184 // remove the first header 1185 if (note.children.length > 0 && note.children[0].tagName === "HEADER") { 1186 note.children[0].remove(); 1187 } 1188 const html = processXRef(null, note); 1189 instance.setContent(html); 1190 } 1191 }).finally(() => { 1192 instance.enable(); 1193 instance.show(); 1194 }); 1195 } 1196 }, function(instance) { 1197 }); 1198 } 1199 let selectedAnnoteEl; 1200 const selectorForAnnotation = ( cell, annotation) => { 1201 let cellAttr = 'data-code-cell="' + cell + '"'; 1202 let lineAttr = 'data-code-annotation="' + annotation + '"'; 1203 const selector = 'span[' + cellAttr + '][' + lineAttr + ']'; 1204 return selector; 1205 } 1206 const selectCodeLines = (annoteEl) => { 1207 const doc = window.document; 1208 const targetCell = annoteEl.getAttribute("data-target-cell"); 1209 const targetAnnotation = annoteEl.getAttribute("data-target-annotation"); 1210 const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation)); 1211 const lines = annoteSpan.getAttribute("data-code-lines").split(","); 1212 const lineIds = lines.map((line) => { 1213 return targetCell + "-" + line; 1214 }) 1215 let top = null; 1216 let height = null; 1217 let parent = null; 1218 if (lineIds.length > 0) { 1219 //compute the position of the single el (top and bottom and make a div) 1220 const el = window.document.getElementById(lineIds[0]); 1221 top = el.offsetTop; 1222 height = el.offsetHeight; 1223 parent = el.parentElement.parentElement; 1224 if (lineIds.length > 1) { 1225 const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]); 1226 const bottom = lastEl.offsetTop + lastEl.offsetHeight; 1227 height = bottom - top; 1228 } 1229 if (top !== null && height !== null && parent !== null) { 1230 // cook up a div (if necessary) and position it 1231 let div = window.document.getElementById("code-annotation-line-highlight"); 1232 if (div === null) { 1233 div = window.document.createElement("div"); 1234 div.setAttribute("id", "code-annotation-line-highlight"); 1235 div.style.position = 'absolute'; 1236 parent.appendChild(div); 1237 } 1238 div.style.top = top - 2 + "px"; 1239 div.style.height = height + 4 + "px"; 1240 div.style.left = 0; 1241 let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter"); 1242 if (gutterDiv === null) { 1243 gutterDiv = window.document.createElement("div"); 1244 gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter"); 1245 gutterDiv.style.position = 'absolute'; 1246 const codeCell = window.document.getElementById(targetCell); 1247 const gutter = codeCell.querySelector('.code-annotation-gutter'); 1248 gutter.appendChild(gutterDiv); 1249 } 1250 gutterDiv.style.top = top - 2 + "px"; 1251 gutterDiv.style.height = height + 4 + "px"; 1252 } 1253 selectedAnnoteEl = annoteEl; 1254 } 1255 }; 1256 const unselectCodeLines = () => { 1257 const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"]; 1258 elementsIds.forEach((elId) => { 1259 const div = window.document.getElementById(elId); 1260 if (div) { 1261 div.remove(); 1262 } 1263 }); 1264 selectedAnnoteEl = undefined; 1265 }; 1266 // Handle positioning of the toggle 1267 window.addEventListener( 1268 "resize", 1269 throttle(() => { 1270 elRect = undefined; 1271 if (selectedAnnoteEl) { 1272 selectCodeLines(selectedAnnoteEl); 1273 } 1274 }, 10) 1275 ); 1276 function throttle(fn, ms) { 1277 let throttle = false; 1278 let timer; 1279 return (...args) => { 1280 if(!throttle) { // first call gets through 1281 fn.apply(this, args); 1282 throttle = true; 1283 } else { // all the others get throttled 1284 if(timer) clearTimeout(timer); // cancel #2 1285 timer = setTimeout(() => { 1286 fn.apply(this, args); 1287 timer = throttle = false; 1288 }, ms); 1289 } 1290 }; 1291 } 1292 // Attach click handler to the DT 1293 const annoteDls = window.document.querySelectorAll('dt[data-target-cell]'); 1294 for (const annoteDlNode of annoteDls) { 1295 annoteDlNode.addEventListener('click', (event) => { 1296 const clickedEl = event.target; 1297 if (clickedEl !== selectedAnnoteEl) { 1298 unselectCodeLines(); 1299 const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active'); 1300 if (activeEl) { 1301 activeEl.classList.remove('code-annotation-active'); 1302 } 1303 selectCodeLines(clickedEl); 1304 clickedEl.classList.add('code-annotation-active'); 1305 } else { 1306 // Unselect the line 1307 unselectCodeLines(); 1308 clickedEl.classList.remove('code-annotation-active'); 1309 } 1310 }); 1311 } 1312 const findCites = (el) => { 1313 const parentEl = el.parentElement; 1314 if (parentEl) { 1315 const cites = parentEl.dataset.cites; 1316 if (cites) { 1317 return { 1318 el, 1319 cites: cites.split(' ') 1320 }; 1321 } else { 1322 return findCites(el.parentElement) 1323 } 1324 } else { 1325 return undefined; 1326 } 1327 }; 1328 var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]'); 1329 for (var i=0; i<bibliorefs.length; i++) { 1330 const ref = bibliorefs[i]; 1331 const citeInfo = findCites(ref); 1332 if (citeInfo) { 1333 tippyHover(citeInfo.el, function() { 1334 var popup = window.document.createElement('div'); 1335 citeInfo.cites.forEach(function(cite) { 1336 var citeDiv = window.document.createElement('div'); 1337 citeDiv.classList.add('hanging-indent'); 1338 citeDiv.classList.add('csl-entry'); 1339 var biblioDiv = window.document.getElementById('ref-' + cite); 1340 if (biblioDiv) { 1341 citeDiv.innerHTML = biblioDiv.innerHTML; 1342 } 1343 popup.appendChild(citeDiv); 1344 }); 1345 return popup.innerHTML; 1346 }); 1347 } 1348 } 1349 }); 1350 </script> 1351</div> <!-- /content --> 1352<footer class="footer"> 1353 <div class="nav-footer"> 1354 <div class="nav-footer-left"> 1355<p>Built with <a href="https://quarto.org/">Quarto</a></p> 1356</div> 1357 <div class="nav-footer-center"> 1358 &nbsp; 1359 <div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/your-org/atdata/edit/main/api/Dataset.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></div> 1360 <div class="nav-footer-right"> 1361<p>MIT License</p> 1362</div> 1363 </div> 1364</footer> 1365 1366 1367 1368 1369</body></html>