A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 1319 lines 71 kB view raw
1<!DOCTYPE html> 2<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head> 3 4<meta charset="utf-8"> 5<meta name="generator" content="quarto-1.7.34"> 6 7<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"> 8 9<meta name="description" content="Loading and iterating typed WebDataset tar files"> 10 11<title>Datasets – atdata</title> 12<style> 13code{white-space: pre-wrap;} 14span.smallcaps{font-variant: small-caps;} 15div.columns{display: flex; gap: min(4vw, 1.5em);} 16div.column{flex: auto; overflow-x: auto;} 17div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} 18ul.task-list{list-style: none;} 19ul.task-list li input[type="checkbox"] { 20 width: 0.8em; 21 margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 22 vertical-align: middle; 23} 24/* CSS for syntax highlighting */ 25html { -webkit-text-size-adjust: 100%; } 26pre > code.sourceCode { white-space: pre; position: relative; } 27pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } 28pre > code.sourceCode > span:empty { height: 1.2em; } 29.sourceCode { overflow: visible; } 30code.sourceCode > span { color: inherit; text-decoration: inherit; } 31div.sourceCode { margin: 1em 0; } 32pre.sourceCode { margin: 0; } 33@media screen { 34div.sourceCode { overflow: auto; } 35} 36@media print { 37pre > code.sourceCode { white-space: pre-wrap; } 38pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } 39} 40pre.numberSource code 41 { counter-reset: source-line 0; } 42pre.numberSource code > span 43 { position: relative; left: -4em; counter-increment: source-line; } 44pre.numberSource code > span > a:first-child::before 45 { content: counter(source-line); 46 position: relative; left: -1em; text-align: right; vertical-align: baseline; 47 border: none; display: inline-block; 48 -webkit-touch-callout: none; -webkit-user-select: none; 49 -khtml-user-select: none; -moz-user-select: none; 50 -ms-user-select: none; user-select: none; 51 padding: 0 4px; width: 4em; 52 } 53pre.numberSource { margin-left: 3em; padding-left: 4px; } 54div.sourceCode 55 { } 56@media screen { 57pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } 58} 59</style> 60 61 62<script src="../site_libs/quarto-nav/quarto-nav.js"></script> 63<script src="../site_libs/quarto-nav/headroom.min.js"></script> 64<script src="../site_libs/clipboard/clipboard.min.js"></script> 65<script src="../site_libs/quarto-search/autocomplete.umd.js"></script> 66<script src="../site_libs/quarto-search/fuse.min.js"></script> 67<script src="../site_libs/quarto-search/quarto-search.js"></script> 68<meta name="quarto:offset" content="../"> 69<script src="../site_libs/quarto-html/quarto.js" type="module"></script> 70<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script> 71<script src="../site_libs/quarto-html/popper.min.js"></script> 72<script src="../site_libs/quarto-html/tippy.umd.min.js"></script> 73<script src="../site_libs/quarto-html/anchor.min.js"></script> 74<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet"> 75<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme" id="quarto-text-highlighting-styles"> 76<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8dcd8563ea6803ab7cbb3d71ca5772e1.css" rel="stylesheet" class="quarto-color-scheme quarto-color-alternate" id="quarto-text-highlighting-styles"> 77<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme-extra" id="quarto-text-highlighting-styles"> 78<script src="../site_libs/bootstrap/bootstrap.min.js"></script> 79<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet"> 80<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme" id="quarto-bootstrap" data-mode="light"> 81<link href="../site_libs/bootstrap/bootstrap-dark-7964ffd8887b0991fe8d71c6c8bc75d6.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme quarto-color-alternate" id="quarto-bootstrap" data-mode="dark"> 82<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme-extra" id="quarto-bootstrap" data-mode="light"> 83<script id="quarto-search-options" type="application/json">{ 84 "location": "navbar", 85 "copy-button": false, 86 "collapse-after": 3, 87 "panel-placement": "end", 88 "type": "overlay", 89 "limit": 50, 90 "keyboard-shortcut": [ 91 "f", 92 "/", 93 "s" 94 ], 95 "show-item-context": false, 96 "language": { 97 "search-no-results-text": "No results", 98 "search-matching-documents-text": "matching documents", 99 "search-copy-link-title": "Copy link to search", 100 "search-hide-matches-text": "Hide additional matches", 101 "search-more-match-text": "more match in this document", 102 "search-more-matches-text": "more matches in this document", 103 "search-clear-button-title": "Clear", 104 "search-text-placeholder": "", 105 "search-detached-cancel-button-title": "Cancel", 106 "search-submit-button-title": "Submit", 107 "search-label": "Search" 108 } 109}</script> 110 111 112<link rel="stylesheet" href="../assets/styles.css"> 113</head> 114 115<body class="nav-sidebar docked nav-fixed quarto-light"><script id="quarto-html-before-body" type="application/javascript"> 116 const toggleBodyColorMode = (bsSheetEl) => { 117 const mode = bsSheetEl.getAttribute("data-mode"); 118 const bodyEl = window.document.querySelector("body"); 119 if (mode === "dark") { 120 bodyEl.classList.add("quarto-dark"); 121 bodyEl.classList.remove("quarto-light"); 122 } else { 123 bodyEl.classList.add("quarto-light"); 124 bodyEl.classList.remove("quarto-dark"); 125 } 126 } 127 const toggleBodyColorPrimary = () => { 128 const bsSheetEl = window.document.querySelector("link#quarto-bootstrap:not([rel=disabled-stylesheet])"); 129 if (bsSheetEl) { 130 toggleBodyColorMode(bsSheetEl); 131 } 132 } 133 const setColorSchemeToggle = (alternate) => { 134 const toggles = window.document.querySelectorAll('.quarto-color-scheme-toggle'); 135 for (let i=0; i < toggles.length; i++) { 136 const toggle = toggles[i]; 137 if (toggle) { 138 if (alternate) { 139 toggle.classList.add("alternate"); 140 } else { 141 toggle.classList.remove("alternate"); 142 } 143 } 144 } 145 }; 146 const toggleColorMode = (alternate) => { 147 // Switch the stylesheets 148 const primaryStylesheets = window.document.querySelectorAll('link.quarto-color-scheme:not(.quarto-color-alternate)'); 149 const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate'); 150 manageTransitions('#quarto-margin-sidebar .nav-link', false); 151 if (alternate) { 152 // note: dark is layered on light, we don't disable primary! 153 enableStylesheet(alternateStylesheets); 154 for (const sheetNode of alternateStylesheets) { 155 if (sheetNode.id === "quarto-bootstrap") { 156 toggleBodyColorMode(sheetNode); 157 } 158 } 159 } else { 160 disableStylesheet(alternateStylesheets); 161 enableStylesheet(primaryStylesheets) 162 toggleBodyColorPrimary(); 163 } 164 manageTransitions('#quarto-margin-sidebar .nav-link', true); 165 // Switch the toggles 166 setColorSchemeToggle(alternate) 167 // Hack to workaround the fact that safari doesn't 168 // properly recolor the scrollbar when toggling (#1455) 169 if (navigator.userAgent.indexOf('Safari') > 0 && navigator.userAgent.indexOf('Chrome') == -1) { 170 manageTransitions("body", false); 171 window.scrollTo(0, 1); 172 setTimeout(() => { 173 window.scrollTo(0, 0); 174 manageTransitions("body", true); 175 }, 40); 176 } 177 } 178 const disableStylesheet = (stylesheets) => { 179 for (let i=0; i < stylesheets.length; i++) { 180 const stylesheet = stylesheets[i]; 181 stylesheet.rel = 'disabled-stylesheet'; 182 } 183 } 184 const enableStylesheet = (stylesheets) => { 185 for (let i=0; i < stylesheets.length; i++) { 186 const stylesheet = stylesheets[i]; 187 if(stylesheet.rel !== 'stylesheet') { // for Chrome, which will still FOUC without this check 188 stylesheet.rel = 'stylesheet'; 189 } 190 } 191 } 192 const manageTransitions = (selector, allowTransitions) => { 193 const els = window.document.querySelectorAll(selector); 194 for (let i=0; i < els.length; i++) { 195 const el = els[i]; 196 if (allowTransitions) { 197 el.classList.remove('notransition'); 198 } else { 199 el.classList.add('notransition'); 200 } 201 } 202 } 203 const isFileUrl = () => { 204 return window.location.protocol === 'file:'; 205 } 206 const hasAlternateSentinel = () => { 207 let styleSentinel = getColorSchemeSentinel(); 208 if (styleSentinel !== null) { 209 return styleSentinel === "alternate"; 210 } else { 211 return false; 212 } 213 } 214 const setStyleSentinel = (alternate) => { 215 const value = alternate ? "alternate" : "default"; 216 if (!isFileUrl()) { 217 window.localStorage.setItem("quarto-color-scheme", value); 218 } else { 219 localAlternateSentinel = value; 220 } 221 } 222 const getColorSchemeSentinel = () => { 223 if (!isFileUrl()) { 224 const storageValue = window.localStorage.getItem("quarto-color-scheme"); 225 return storageValue != null ? storageValue : localAlternateSentinel; 226 } else { 227 return localAlternateSentinel; 228 } 229 } 230 const toggleGiscusIfUsed = (isAlternate, darkModeDefault) => { 231 const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light'; 232 const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark'; 233 let newTheme = ''; 234 if(authorPrefersDark) { 235 newTheme = isAlternate ? baseTheme : alternateTheme; 236 } else { 237 newTheme = isAlternate ? alternateTheme : baseTheme; 238 } 239 const changeGiscusTheme = () => { 240 // From: https://github.com/giscus/giscus/issues/336 241 const sendMessage = (message) => { 242 const iframe = document.querySelector('iframe.giscus-frame'); 243 if (!iframe) return; 244 iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app'); 245 } 246 sendMessage({ 247 setConfig: { 248 theme: newTheme 249 } 250 }); 251 } 252 const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null; 253 if (isGiscussLoaded) { 254 changeGiscusTheme(); 255 } 256 }; 257 const authorPrefersDark = false; 258 const darkModeDefault = authorPrefersDark; 259 document.querySelector('link#quarto-text-highlighting-styles.quarto-color-scheme-extra').rel = 'disabled-stylesheet'; 260 document.querySelector('link#quarto-bootstrap.quarto-color-scheme-extra').rel = 'disabled-stylesheet'; 261 let localAlternateSentinel = darkModeDefault ? 'alternate' : 'default'; 262 // Dark / light mode switch 263 window.quartoToggleColorScheme = () => { 264 // Read the current dark / light value 265 let toAlternate = !hasAlternateSentinel(); 266 toggleColorMode(toAlternate); 267 setStyleSentinel(toAlternate); 268 toggleGiscusIfUsed(toAlternate, darkModeDefault); 269 window.dispatchEvent(new Event('resize')); 270 }; 271 // Switch to dark mode if need be 272 if (hasAlternateSentinel()) { 273 toggleColorMode(true); 274 } else { 275 toggleColorMode(false); 276 } 277 </script> 278 279<div id="quarto-search-results"></div> 280 <header id="quarto-header" class="headroom fixed-top"> 281 <nav class="navbar navbar-expand-lg " data-bs-theme="dark"> 282 <div class="navbar-container container-fluid"> 283 <div class="navbar-brand-container mx-auto"> 284 <a class="navbar-brand" href="../index.html"> 285 <span class="navbar-title">atdata</span> 286 </a> 287 </div> 288 <div id="quarto-search" class="" title="Search"></div> 289 <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" role="menu" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> 290 <span class="navbar-toggler-icon"></span> 291</button> 292 <div class="collapse navbar-collapse" id="navbarCollapse"> 293 <ul class="navbar-nav navbar-nav-scroll me-auto"> 294 <li class="nav-item"> 295 <a class="nav-link active" href="../index.html" aria-current="page"> 296<span class="menu-text">Guide</span></a> 297 </li> 298 <li class="nav-item dropdown "> 299 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-tutorials" role="link" data-bs-toggle="dropdown" aria-expanded="false"> 300 <span class="menu-text">Tutorials</span> 301 </a> 302 <ul class="dropdown-menu" aria-labelledby="nav-menu-tutorials"> 303 <li> 304 <a class="dropdown-item" href="../tutorials/quickstart.html"> 305 <span class="dropdown-text">Quick Start</span></a> 306 </li> 307 <li> 308 <a class="dropdown-item" href="../tutorials/local-workflow.html"> 309 <span class="dropdown-text">Local Workflow</span></a> 310 </li> 311 <li> 312 <a class="dropdown-item" href="../tutorials/atmosphere.html"> 313 <span class="dropdown-text">Atmosphere Publishing</span></a> 314 </li> 315 <li> 316 <a class="dropdown-item" href="../tutorials/promotion.html"> 317 <span class="dropdown-text">Promotion Workflow</span></a> 318 </li> 319 </ul> 320 </li> 321 <li class="nav-item dropdown "> 322 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-reference" role="link" data-bs-toggle="dropdown" aria-expanded="false"> 323 <span class="menu-text">Reference</span> 324 </a> 325 <ul class="dropdown-menu" aria-labelledby="nav-menu-reference"> 326 <li> 327 <a class="dropdown-item" href="../reference/architecture.html"> 328 <span class="dropdown-text">Architecture Overview</span></a> 329 </li> 330 <li> 331 <a class="dropdown-item" href="../reference/packable-samples.html"> 332 <span class="dropdown-text">Packable Samples</span></a> 333 </li> 334 <li> 335 <a class="dropdown-item" href="../reference/datasets.html"> 336 <span class="dropdown-text">Datasets</span></a> 337 </li> 338 <li> 339 <a class="dropdown-item" href="../reference/lenses.html"> 340 <span class="dropdown-text">Lenses</span></a> 341 </li> 342 <li> 343 <a class="dropdown-item" href="../reference/local-storage.html"> 344 <span class="dropdown-text">Local Storage</span></a> 345 </li> 346 <li> 347 <a class="dropdown-item" href="../reference/atmosphere.html"> 348 <span class="dropdown-text">Atmosphere</span></a> 349 </li> 350 <li> 351 <a class="dropdown-item" href="../reference/promotion.html"> 352 <span class="dropdown-text">Promotion</span></a> 353 </li> 354 <li> 355 <a class="dropdown-item" href="../reference/load-dataset.html"> 356 <span class="dropdown-text">load_dataset API</span></a> 357 </li> 358 <li> 359 <a class="dropdown-item" href="../reference/protocols.html"> 360 <span class="dropdown-text">Protocols</span></a> 361 </li> 362 <li> 363 <a class="dropdown-item" href="../reference/uri-spec.html"> 364 <span class="dropdown-text">URI Specification</span></a> 365 </li> 366 <li> 367 <a class="dropdown-item" href="../reference/troubleshooting.html"> 368 <span class="dropdown-text">Troubleshooting &amp; FAQ</span></a> 369 </li> 370 <li> 371 <a class="dropdown-item" href="../reference/deployment.html"> 372 <span class="dropdown-text">Deployment Guide</span></a> 373 </li> 374 </ul> 375 </li> 376 <li class="nav-item"> 377 <a class="nav-link" href="../api/index.html"> 378<span class="menu-text">API</span></a> 379 </li> 380</ul> 381 <ul class="navbar-nav navbar-nav-scroll ms-auto"> 382 <li class="nav-item compact"> 383 <a class="nav-link" href="https://github.com/your-org/atdata"> <i class="bi bi-github" role="img"> 384</i> 385<span class="menu-text"></span></a> 386 </li> 387</ul> 388 </div> <!-- /navcollapse --> 389 <div class="quarto-navbar-tools"> 390 <a href="" class="quarto-color-scheme-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleColorScheme(); return false;" title="Toggle dark mode"><i class="bi"></i></a> 391</div> 392 </div> <!-- /container-fluid --> 393 </nav> 394 <nav class="quarto-secondary-nav"> 395 <div class="container-fluid d-flex"> 396 <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> 397 <i class="bi bi-layout-text-sidebar-reverse"></i> 398 </button> 399 <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../reference/architecture.html">Reference</a></li><li class="breadcrumb-item"><a href="../reference/datasets.html">Datasets</a></li></ol></nav> 400 <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> 401 </a> 402 </div> 403 </nav> 404</header> 405<!-- content --> 406<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar"> 407<!-- sidebar --> 408 <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto"> 409 <div class="sidebar-menu-container"> 410 <ul class="list-unstyled mt-1"> 411 <li class="sidebar-item"> 412 <div class="sidebar-item-container"> 413 <a href="../index.html" class="sidebar-item-text sidebar-link"> 414 <span class="menu-text">atdata</span></a> 415 </div> 416</li> 417 <li class="sidebar-item sidebar-item-section"> 418 <div class="sidebar-item-container"> 419 <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true"> 420 <span class="menu-text">Getting Started</span></a> 421 <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section"> 422 <i class="bi bi-chevron-right ms-2"></i> 423 </a> 424 </div> 425 <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show"> 426 <li class="sidebar-item"> 427 <div class="sidebar-item-container"> 428 <a href="../tutorials/quickstart.html" class="sidebar-item-text sidebar-link"> 429 <span class="menu-text">Quick Start</span></a> 430 </div> 431</li> 432 <li class="sidebar-item"> 433 <div class="sidebar-item-container"> 434 <a href="../tutorials/local-workflow.html" class="sidebar-item-text sidebar-link"> 435 <span class="menu-text">Local Workflow</span></a> 436 </div> 437</li> 438 <li class="sidebar-item"> 439 <div class="sidebar-item-container"> 440 <a href="../tutorials/atmosphere.html" class="sidebar-item-text sidebar-link"> 441 <span class="menu-text">Atmosphere Publishing</span></a> 442 </div> 443</li> 444 <li class="sidebar-item"> 445 <div class="sidebar-item-container"> 446 <a href="../tutorials/promotion.html" class="sidebar-item-text sidebar-link"> 447 <span class="menu-text">Promotion Workflow</span></a> 448 </div> 449</li> 450 </ul> 451 </li> 452 <li class="sidebar-item sidebar-item-section"> 453 <div class="sidebar-item-container"> 454 <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true"> 455 <span class="menu-text">Reference</span></a> 456 <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section"> 457 <i class="bi bi-chevron-right ms-2"></i> 458 </a> 459 </div> 460 <ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show"> 461 <li class="sidebar-item"> 462 <div class="sidebar-item-container"> 463 <a href="../reference/architecture.html" class="sidebar-item-text sidebar-link"> 464 <span class="menu-text">Architecture Overview</span></a> 465 </div> 466</li> 467 <li class="sidebar-item"> 468 <div class="sidebar-item-container"> 469 <a href="../reference/packable-samples.html" class="sidebar-item-text sidebar-link"> 470 <span class="menu-text">Packable Samples</span></a> 471 </div> 472</li> 473 <li class="sidebar-item"> 474 <div class="sidebar-item-container"> 475 <a href="../reference/datasets.html" class="sidebar-item-text sidebar-link active"> 476 <span class="menu-text">Datasets</span></a> 477 </div> 478</li> 479 <li class="sidebar-item"> 480 <div class="sidebar-item-container"> 481 <a href="../reference/lenses.html" class="sidebar-item-text sidebar-link"> 482 <span class="menu-text">Lenses</span></a> 483 </div> 484</li> 485 <li class="sidebar-item"> 486 <div class="sidebar-item-container"> 487 <a href="../reference/local-storage.html" class="sidebar-item-text sidebar-link"> 488 <span class="menu-text">Local Storage</span></a> 489 </div> 490</li> 491 <li class="sidebar-item"> 492 <div class="sidebar-item-container"> 493 <a href="../reference/atmosphere.html" class="sidebar-item-text sidebar-link"> 494 <span class="menu-text">Atmosphere (ATProto Integration)</span></a> 495 </div> 496</li> 497 <li class="sidebar-item"> 498 <div class="sidebar-item-container"> 499 <a href="../reference/promotion.html" class="sidebar-item-text sidebar-link"> 500 <span class="menu-text">Promotion Workflow</span></a> 501 </div> 502</li> 503 <li class="sidebar-item"> 504 <div class="sidebar-item-container"> 505 <a href="../reference/load-dataset.html" class="sidebar-item-text sidebar-link"> 506 <span class="menu-text">load_dataset API</span></a> 507 </div> 508</li> 509 <li class="sidebar-item"> 510 <div class="sidebar-item-container"> 511 <a href="../reference/protocols.html" class="sidebar-item-text sidebar-link"> 512 <span class="menu-text">Protocols</span></a> 513 </div> 514</li> 515 <li class="sidebar-item"> 516 <div class="sidebar-item-container"> 517 <a href="../reference/uri-spec.html" class="sidebar-item-text sidebar-link"> 518 <span class="menu-text">URI Specification</span></a> 519 </div> 520</li> 521 <li class="sidebar-item"> 522 <div class="sidebar-item-container"> 523 <a href="../reference/troubleshooting.html" class="sidebar-item-text sidebar-link"> 524 <span class="menu-text">Troubleshooting &amp; FAQ</span></a> 525 </div> 526</li> 527 <li class="sidebar-item"> 528 <div class="sidebar-item-container"> 529 <a href="../reference/deployment.html" class="sidebar-item-text sidebar-link"> 530 <span class="menu-text">Deployment Guide</span></a> 531 </div> 532</li> 533 </ul> 534 </li> 535 </ul> 536 </div> 537</nav> 538<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div> 539<!-- margin-sidebar --> 540 <div id="quarto-margin-sidebar" class="sidebar margin-sidebar"> 541 <nav id="TOC" role="doc-toc" class="toc-active"> 542 <h2 id="toc-title">On this page</h2> 543 544 <ul> 545 <li><a href="#creating-a-dataset" id="toc-creating-a-dataset" class="nav-link active" data-scroll-target="#creating-a-dataset">Creating a Dataset</a></li> 546 <li><a href="#data-sources" id="toc-data-sources" class="nav-link" data-scroll-target="#data-sources">Data Sources</a> 547 <ul class="collapse"> 548 <li><a href="#url-source-default" id="toc-url-source-default" class="nav-link" data-scroll-target="#url-source-default">URL Source (default)</a></li> 549 <li><a href="#s3-source" id="toc-s3-source" class="nav-link" data-scroll-target="#s3-source">S3 Source</a></li> 550 </ul></li> 551 <li><a href="#iteration-modes" id="toc-iteration-modes" class="nav-link" data-scroll-target="#iteration-modes">Iteration Modes</a> 552 <ul class="collapse"> 553 <li><a href="#ordered-iteration" id="toc-ordered-iteration" class="nav-link" data-scroll-target="#ordered-iteration">Ordered Iteration</a></li> 554 <li><a href="#shuffled-iteration" id="toc-shuffled-iteration" class="nav-link" data-scroll-target="#shuffled-iteration">Shuffled Iteration</a></li> 555 </ul></li> 556 <li><a href="#samplebatch" id="toc-samplebatch" class="nav-link" data-scroll-target="#samplebatch">SampleBatch</a></li> 557 <li><a href="#type-transformations-with-lenses" id="toc-type-transformations-with-lenses" class="nav-link" data-scroll-target="#type-transformations-with-lenses">Type Transformations with Lenses</a></li> 558 <li><a href="#dataset-properties" id="toc-dataset-properties" class="nav-link" data-scroll-target="#dataset-properties">Dataset Properties</a> 559 <ul class="collapse"> 560 <li><a href="#shard-list" id="toc-shard-list" class="nav-link" data-scroll-target="#shard-list">Shard List</a></li> 561 <li><a href="#metadata" id="toc-metadata" class="nav-link" data-scroll-target="#metadata">Metadata</a></li> 562 </ul></li> 563 <li><a href="#writing-datasets" id="toc-writing-datasets" class="nav-link" data-scroll-target="#writing-datasets">Writing Datasets</a></li> 564 <li><a href="#parquet-export" id="toc-parquet-export" class="nav-link" data-scroll-target="#parquet-export">Parquet Export</a></li> 565 <li><a href="#url-formats" id="toc-url-formats" class="nav-link" data-scroll-target="#url-formats">URL Formats</a></li> 566 <li><a href="#dataset-properties-1" id="toc-dataset-properties-1" class="nav-link" data-scroll-target="#dataset-properties-1">Dataset Properties</a> 567 <ul class="collapse"> 568 <li><a href="#source" id="toc-source" class="nav-link" data-scroll-target="#source">Source</a></li> 569 <li><a href="#sample-type" id="toc-sample-type" class="nav-link" data-scroll-target="#sample-type">Sample Type</a></li> 570 </ul></li> 571 <li><a href="#related" id="toc-related" class="nav-link" data-scroll-target="#related">Related</a></li> 572 </ul> 573<div class="toc-actions"><ul><li><a href="https://github.com/your-org/atdata/edit/main/reference/datasets.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav> 574 </div> 575<!-- main --> 576<main class="content" id="quarto-document-content"> 577 578 579<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../reference/architecture.html">Reference</a></li><li class="breadcrumb-item"><a href="../reference/datasets.html">Datasets</a></li></ol></nav> 580<div class="quarto-title"> 581<h1 class="title">Datasets</h1> 582</div> 583 584<div> 585 <div class="description"> 586 Loading and iterating typed WebDataset tar files 587 </div> 588</div> 589 590 591<div class="quarto-title-meta"> 592 593 594 595 596 </div> 597 598 599 600</header> 601 602 603<p>The <code>Dataset</code> class provides typed iteration over WebDataset tar files with automatic batching and lens transformations.</p> 604<section id="creating-a-dataset" class="level2"> 605<h2 class="anchored" data-anchor-id="creating-a-dataset">Creating a Dataset</h2> 606<div id="ef6e2916" class="cell"> 607<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 608<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 609<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a></span> 610<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 611<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> ImageSample:</span> 612<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a> image: NDArray</span> 613<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">str</span></span> 614<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a></span> 615<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Single shard (string URL - most common)</span></span> 616<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"data-000000.tar"</span>)</span> 617<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a></span> 618<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Multiple shards with brace notation</span></span> 619<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"data-{000000..000009}.tar"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 620</div> 621<p>The type parameter <code>[ImageSample]</code> specifies what sample type the dataset contains. This enables type-safe iteration and automatic deserialization.</p> 622</section> 623<section id="data-sources" class="level2"> 624<h2 class="anchored" data-anchor-id="data-sources">Data Sources</h2> 625<p>Datasets can be created from different data sources using the <code>DataSource</code> protocol:</p> 626<section id="url-source-default" class="level3"> 627<h3 class="anchored" data-anchor-id="url-source-default">URL Source (default)</h3> 628<p>When you pass a string to <code>Dataset</code>, it automatically wraps it in a <code>URLSource</code>:</p> 629<div id="9cb82ec3" class="cell"> 630<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># These are equivalent:</span></span> 631<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"data-{000000..000009}.tar"</span>)</span> 632<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](atdata.URLSource(<span class="st">"data-{000000..000009}.tar"</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 633</div> 634</section> 635<section id="s3-source" class="level3"> 636<h3 class="anchored" data-anchor-id="s3-source">S3 Source</h3> 637<p>For private S3 buckets or S3-compatible storage (Cloudflare R2, MinIO), use <code>S3Source</code>:</p> 638<div id="c2cb3b94" class="cell"> 639<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># From explicit credentials</span></span> 640<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> atdata.S3Source(</span> 641<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> bucket<span class="op">=</span><span class="st">"my-bucket"</span>,</span> 642<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a> keys<span class="op">=</span>[<span class="st">"data-000000.tar"</span>, <span class="st">"data-000001.tar"</span>],</span> 643<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a> endpoint<span class="op">=</span><span class="st">"https://my-r2-account.r2.cloudflarestorage.com"</span>,</span> 644<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a> access_key<span class="op">=</span><span class="st">"AKID..."</span>,</span> 645<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a> secret_key<span class="op">=</span><span class="st">"SECRET..."</span>,</span> 646<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a>)</span> 647<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](source)</span> 648<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a></span> 649<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a><span class="co"># From S3 URLs</span></span> 650<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> atdata.S3Source.from_urls([</span> 651<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a> <span class="st">"s3://my-bucket/data-000000.tar"</span>,</span> 652<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a> <span class="st">"s3://my-bucket/data-000001.tar"</span>,</span> 653<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a>])</span> 654<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](source)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 655</div> 656<div class="callout callout-style-default callout-note callout-titled"> 657<div class="callout-header d-flex align-content-center"> 658<div class="callout-icon-container"> 659<i class="callout-icon"></i> 660</div> 661<div class="callout-title-container flex-fill"> 662Note 663</div> 664</div> 665<div class="callout-body-container callout-body"> 666<p><code>S3Source</code> uses boto3 for streaming, enabling authentication with private buckets. For public S3 URLs, a string URL with <code>URLSource</code> works directly.</p> 667</div> 668</div> 669</section> 670</section> 671<section id="iteration-modes" class="level2"> 672<h2 class="anchored" data-anchor-id="iteration-modes">Iteration Modes</h2> 673<section id="ordered-iteration" class="level3"> 674<h3 class="anchored" data-anchor-id="ordered-iteration">Ordered Iteration</h3> 675<p>Iterate through samples in their original order:</p> 676<div id="d3f45d97" class="cell"> 677<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># With batching (default batch_size=1)</span></span> 678<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 679<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> images <span class="op">=</span> batch.image <span class="co"># numpy array (32, H, W, C)</span></span> 680<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> labels <span class="op">=</span> batch.label <span class="co"># list of 32 strings</span></span> 681<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a></span> 682<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Without batching (raw samples)</span></span> 683<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> sample <span class="kw">in</span> dataset.ordered(batch_size<span class="op">=</span><span class="va">None</span>):</span> 684<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(sample.label)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 685</div> 686</section> 687<section id="shuffled-iteration" class="level3"> 688<h3 class="anchored" data-anchor-id="shuffled-iteration">Shuffled Iteration</h3> 689<p>Iterate with randomized order at both shard and sample levels:</p> 690<div id="fb5204d1" class="cell"> 691<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.shuffled(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 692<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a> <span class="co"># Samples are shuffled</span></span> 693<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> process(batch)</span> 694<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a></span> 695<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Control shuffle buffer sizes</span></span> 696<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.shuffled(</span> 697<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a> buffer_shards<span class="op">=</span><span class="dv">100</span>, <span class="co"># Shards to buffer (default: 100)</span></span> 698<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a> buffer_samples<span class="op">=</span><span class="dv">10000</span>, <span class="co"># Samples to buffer (default: 10,000)</span></span> 699<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a> batch_size<span class="op">=</span><span class="dv">32</span>,</span> 700<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a>):</span> 701<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a> process(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 702</div> 703<div class="callout callout-style-default callout-tip callout-titled"> 704<div class="callout-header d-flex align-content-center"> 705<div class="callout-icon-container"> 706<i class="callout-icon"></i> 707</div> 708<div class="callout-title-container flex-fill"> 709Tip 710</div> 711</div> 712<div class="callout-body-container callout-body"> 713<p>Larger buffer sizes increase randomness but use more memory. For training, <code>buffer_samples=10000</code> is usually a good balance.</p> 714</div> 715</div> 716</section> 717</section> 718<section id="samplebatch" class="level2"> 719<h2 class="anchored" data-anchor-id="samplebatch">SampleBatch</h2> 720<p>When iterating with a <code>batch_size</code>, each iteration yields a <code>SampleBatch</code> with automatic attribute aggregation.</p> 721<div id="c1093b41" class="cell"> 722<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 723<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> Sample:</span> 724<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> features: NDArray <span class="co"># shape (256,)</span></span> 725<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">str</span></span> 726<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> score: <span class="bu">float</span></span> 727<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a></span> 728<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.ordered(batch_size<span class="op">=</span><span class="dv">16</span>):</span> 729<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a> <span class="co"># NDArray fields are stacked with a batch dimension</span></span> 730<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a> features <span class="op">=</span> batch.features <span class="co"># numpy array (16, 256)</span></span> 731<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a></span> 732<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a> <span class="co"># Other fields become lists</span></span> 733<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a> labels <span class="op">=</span> batch.label <span class="co"># list of 16 strings</span></span> 734<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a> scores <span class="op">=</span> batch.score <span class="co"># list of 16 floats</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 735</div> 736<p>Results are cached, so accessing the same attribute multiple times is efficient.</p> 737</section> 738<section id="type-transformations-with-lenses" class="level2"> 739<h2 class="anchored" data-anchor-id="type-transformations-with-lenses">Type Transformations with Lenses</h2> 740<p>View a dataset through a different sample type using registered lenses:</p> 741<div id="044cffe6" class="cell"> 742<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 743<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> SimplifiedSample:</span> 744<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">str</span></span> 745<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a></span> 746<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.lens</span></span> 747<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> simplify(src: ImageSample) <span class="op">-&gt;</span> SimplifiedSample:</span> 748<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> SimplifiedSample(label<span class="op">=</span>src.label)</span> 749<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a></span> 750<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Transform dataset to different type</span></span> 751<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>simple_ds <span class="op">=</span> dataset.as_type(SimplifiedSample)</span> 752<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a></span> 753<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> simple_ds.ordered(batch_size<span class="op">=</span><span class="dv">16</span>):</span> 754<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(batch.label) <span class="co"># Only label field available</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 755</div> 756<p>See <a href="../reference/lenses.html">Lenses</a> for details on defining transformations.</p> 757</section> 758<section id="dataset-properties" class="level2"> 759<h2 class="anchored" data-anchor-id="dataset-properties">Dataset Properties</h2> 760<section id="shard-list" class="level3"> 761<h3 class="anchored" data-anchor-id="shard-list">Shard List</h3> 762<p>Get the list of individual tar files:</p> 763<div id="ce9df6da" class="cell"> 764<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[Sample](<span class="st">"data-{000000..000009}.tar"</span>)</span> 765<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>shards <span class="op">=</span> dataset.shard_list</span> 766<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a><span class="co"># ['data-000000.tar', 'data-000001.tar', ..., 'data-000009.tar']</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 767</div> 768</section> 769<section id="metadata" class="level3"> 770<h3 class="anchored" data-anchor-id="metadata">Metadata</h3> 771<p>Datasets can have associated metadata from a URL:</p> 772<div id="129c7a63" class="cell"> 773<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[Sample](</span> 774<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a> <span class="st">"data-{000000..000009}.tar"</span>,</span> 775<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> metadata_url<span class="op">=</span><span class="st">"https://example.com/metadata.msgpack"</span></span> 776<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>)</span> 777<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a></span> 778<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Fetched and cached on first access</span></span> 779<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a>metadata <span class="op">=</span> dataset.metadata <span class="co"># dict or None</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 780</div> 781</section> 782</section> 783<section id="writing-datasets" class="level2"> 784<h2 class="anchored" data-anchor-id="writing-datasets">Writing Datasets</h2> 785<p>Use WebDataset’s <code>TarWriter</code> or <code>ShardWriter</code> to create datasets:</p> 786<div id="f29fbb2c" class="cell"> 787<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span> 788<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 789<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a></span> 790<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a>samples <span class="op">=</span> [</span> 791<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> ImageSample(image<span class="op">=</span>np.random.rand(<span class="dv">224</span>, <span class="dv">224</span>, <span class="dv">3</span>).astype(np.float32), label<span class="op">=</span><span class="st">"cat"</span>)</span> 792<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> _ <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">100</span>)</span> 793<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a>]</span> 794<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a></span> 795<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Single tar file</span></span> 796<span id="cb10-10"><a href="#cb10-10" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> wds.writer.TarWriter(<span class="st">"data-000000.tar"</span>) <span class="im">as</span> sink:</span> 797<span id="cb10-11"><a href="#cb10-11" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i, sample <span class="kw">in</span> <span class="bu">enumerate</span>(samples):</span> 798<span id="cb10-12"><a href="#cb10-12" aria-hidden="true" tabindex="-1"></a> sink.write({<span class="op">**</span>sample.as_wds, <span class="st">"__key__"</span>: <span class="ss">f"sample_</span><span class="sc">{</span>i<span class="sc">:06d}</span><span class="ss">"</span>})</span> 799<span id="cb10-13"><a href="#cb10-13" aria-hidden="true" tabindex="-1"></a></span> 800<span id="cb10-14"><a href="#cb10-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Multiple shards with automatic splitting</span></span> 801<span id="cb10-15"><a href="#cb10-15" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> wds.writer.ShardWriter(<span class="st">"data-</span><span class="sc">%06d</span><span class="st">.tar"</span>, maxcount<span class="op">=</span><span class="dv">1000</span>) <span class="im">as</span> sink:</span> 802<span id="cb10-16"><a href="#cb10-16" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i, sample <span class="kw">in</span> <span class="bu">enumerate</span>(samples):</span> 803<span id="cb10-17"><a href="#cb10-17" aria-hidden="true" tabindex="-1"></a> sink.write({<span class="op">**</span>sample.as_wds, <span class="st">"__key__"</span>: <span class="ss">f"sample_</span><span class="sc">{</span>i<span class="sc">:06d}</span><span class="ss">"</span>})</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 804</div> 805</section> 806<section id="parquet-export" class="level2"> 807<h2 class="anchored" data-anchor-id="parquet-export">Parquet Export</h2> 808<p>Export dataset contents to parquet format:</p> 809<div id="53060440" class="cell"> 810<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Export entire dataset</span></span> 811<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>dataset.to_parquet(<span class="st">"output.parquet"</span>)</span> 812<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a></span> 813<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Export with custom field mapping</span></span> 814<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> extract_fields(sample):</span> 815<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> {<span class="st">"label"</span>: sample.label, <span class="st">"score"</span>: sample.confidence}</span> 816<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a></span> 817<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a>dataset.to_parquet(<span class="st">"output.parquet"</span>, sample_map<span class="op">=</span>extract_fields)</span> 818<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a></span> 819<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Export in segments</span></span> 820<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a>dataset.to_parquet(<span class="st">"output.parquet"</span>, maxcount<span class="op">=</span><span class="dv">10000</span>)</span> 821<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Creates output-000000.parquet, output-000001.parquet, etc.</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 822</div> 823</section> 824<section id="url-formats" class="level2"> 825<h2 class="anchored" data-anchor-id="url-formats">URL Formats</h2> 826<p>When using string URLs (via <code>URLSource</code>), WebDataset supports various formats:</p> 827<table class="caption-top table"> 828<colgroup> 829<col style="width: 47%"> 830<col style="width: 52%"> 831</colgroup> 832<thead> 833<tr class="header"> 834<th>Format</th> 835<th>Example</th> 836</tr> 837</thead> 838<tbody> 839<tr class="odd"> 840<td>Local files</td> 841<td><code>./data/file.tar</code>, <code>/absolute/path/file-{000000..000009}.tar</code></td> 842</tr> 843<tr class="even"> 844<td>HTTP/HTTPS</td> 845<td><code>https://example.com/data-{000000..000009}.tar</code></td> 846</tr> 847<tr class="odd"> 848<td>Google Cloud</td> 849<td><code>gs://bucket/path/file.tar</code></td> 850</tr> 851</tbody> 852</table> 853<p>For S3 with authentication, use <code>S3Source</code> instead of <code>s3://</code> URLs.</p> 854</section> 855<section id="dataset-properties-1" class="level2"> 856<h2 class="anchored" data-anchor-id="dataset-properties-1">Dataset Properties</h2> 857<section id="source" class="level3"> 858<h3 class="anchored" data-anchor-id="source">Source</h3> 859<p>Access the underlying <code>DataSource</code>:</p> 860<div id="e315e899" class="cell"> 861<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[Sample](<span class="st">"data.tar"</span>)</span> 862<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>source <span class="op">=</span> dataset.source <span class="co"># URLSource instance</span></span> 863<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(source.shard_list) <span class="co"># ['data.tar']</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 864</div> 865</section> 866<section id="sample-type" class="level3"> 867<h3 class="anchored" data-anchor-id="sample-type">Sample Type</h3> 868<p>Get the type parameter used to create the dataset:</p> 869<div id="c77919f7" class="cell"> 870<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"data.tar"</span>)</span> 871<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(dataset.sample_type) <span class="co"># &lt;class 'ImageSample'&gt;</span></span> 872<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(dataset.batch_type) <span class="co"># SampleBatch[ImageSample]</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 873</div> 874</section> 875</section> 876<section id="related" class="level2"> 877<h2 class="anchored" data-anchor-id="related">Related</h2> 878<ul> 879<li><a href="../reference/packable-samples.html">Packable Samples</a> - Defining typed samples</li> 880<li><a href="../reference/lenses.html">Lenses</a> - Type transformations</li> 881<li><a href="../reference/load-dataset.html">load_dataset</a> - HuggingFace-style loading API</li> 882<li><a href="../reference/protocols.html">Protocols</a> - DataSource protocol details</li> 883</ul> 884 885 886</section> 887 888</main> <!-- /main --> 889<script id="quarto-html-after-body" type="application/javascript"> 890 window.document.addEventListener("DOMContentLoaded", function (event) { 891 // Ensure there is a toggle, if there isn't float one in the top right 892 if (window.document.querySelector('.quarto-color-scheme-toggle') === null) { 893 const a = window.document.createElement('a'); 894 a.classList.add('top-right'); 895 a.classList.add('quarto-color-scheme-toggle'); 896 a.href = ""; 897 a.onclick = function() { try { window.quartoToggleColorScheme(); } catch {} return false; }; 898 const i = window.document.createElement("i"); 899 i.classList.add('bi'); 900 a.appendChild(i); 901 window.document.body.appendChild(a); 902 } 903 setColorSchemeToggle(hasAlternateSentinel()) 904 const icon = ""; 905 const anchorJS = new window.AnchorJS(); 906 anchorJS.options = { 907 placement: 'right', 908 icon: icon 909 }; 910 anchorJS.add('.anchored'); 911 const isCodeAnnotation = (el) => { 912 for (const clz of el.classList) { 913 if (clz.startsWith('code-annotation-')) { 914 return true; 915 } 916 } 917 return false; 918 } 919 const onCopySuccess = function(e) { 920 // button target 921 const button = e.trigger; 922 // don't keep focus 923 button.blur(); 924 // flash "checked" 925 button.classList.add('code-copy-button-checked'); 926 var currentTitle = button.getAttribute("title"); 927 button.setAttribute("title", "Copied!"); 928 let tooltip; 929 if (window.bootstrap) { 930 button.setAttribute("data-bs-toggle", "tooltip"); 931 button.setAttribute("data-bs-placement", "left"); 932 button.setAttribute("data-bs-title", "Copied!"); 933 tooltip = new bootstrap.Tooltip(button, 934 { trigger: "manual", 935 customClass: "code-copy-button-tooltip", 936 offset: [0, -8]}); 937 tooltip.show(); 938 } 939 setTimeout(function() { 940 if (tooltip) { 941 tooltip.hide(); 942 button.removeAttribute("data-bs-title"); 943 button.removeAttribute("data-bs-toggle"); 944 button.removeAttribute("data-bs-placement"); 945 } 946 button.setAttribute("title", currentTitle); 947 button.classList.remove('code-copy-button-checked'); 948 }, 1000); 949 // clear code selection 950 e.clearSelection(); 951 } 952 const getTextToCopy = function(trigger) { 953 const codeEl = trigger.previousElementSibling.cloneNode(true); 954 for (const childEl of codeEl.children) { 955 if (isCodeAnnotation(childEl)) { 956 childEl.remove(); 957 } 958 } 959 return codeEl.innerText; 960 } 961 const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', { 962 text: getTextToCopy 963 }); 964 clipboard.on('success', onCopySuccess); 965 if (window.document.getElementById('quarto-embedded-source-code-modal')) { 966 const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', { 967 text: getTextToCopy, 968 container: window.document.getElementById('quarto-embedded-source-code-modal') 969 }); 970 clipboardModal.on('success', onCopySuccess); 971 } 972 var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//); 973 var mailtoRegex = new RegExp(/^mailto:/); 974 var filterRegex = new RegExp("https:\/\/github\.com\/your-org\/atdata"); 975 var isInternal = (href) => { 976 return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href); 977 } 978 // Inspect non-navigation links and adorn them if external 979 var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)'); 980 for (var i=0; i<links.length; i++) { 981 const link = links[i]; 982 if (!isInternal(link.href)) { 983 // undo the damage that might have been done by quarto-nav.js in the case of 984 // links that we want to consider external 985 if (link.dataset.originalHref !== undefined) { 986 link.href = link.dataset.originalHref; 987 } 988 } 989 } 990 function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) { 991 const config = { 992 allowHTML: true, 993 maxWidth: 500, 994 delay: 100, 995 arrow: false, 996 appendTo: function(el) { 997 return el.parentElement; 998 }, 999 interactive: true, 1000 interactiveBorder: 10, 1001 theme: 'quarto', 1002 placement: 'bottom-start', 1003 }; 1004 if (contentFn) { 1005 config.content = contentFn; 1006 } 1007 if (onTriggerFn) { 1008 config.onTrigger = onTriggerFn; 1009 } 1010 if (onUntriggerFn) { 1011 config.onUntrigger = onUntriggerFn; 1012 } 1013 window.tippy(el, config); 1014 } 1015 const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]'); 1016 for (var i=0; i<noterefs.length; i++) { 1017 const ref = noterefs[i]; 1018 tippyHover(ref, function() { 1019 // use id or data attribute instead here 1020 let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href'); 1021 try { href = new URL(href).hash; } catch {} 1022 const id = href.replace(/^#\/?/, ""); 1023 const note = window.document.getElementById(id); 1024 if (note) { 1025 return note.innerHTML; 1026 } else { 1027 return ""; 1028 } 1029 }); 1030 } 1031 const xrefs = window.document.querySelectorAll('a.quarto-xref'); 1032 const processXRef = (id, note) => { 1033 // Strip column container classes 1034 const stripColumnClz = (el) => { 1035 el.classList.remove("page-full", "page-columns"); 1036 if (el.children) { 1037 for (const child of el.children) { 1038 stripColumnClz(child); 1039 } 1040 } 1041 } 1042 stripColumnClz(note) 1043 if (id === null || id.startsWith('sec-')) { 1044 // Special case sections, only their first couple elements 1045 const container = document.createElement("div"); 1046 if (note.children && note.children.length > 2) { 1047 container.appendChild(note.children[0].cloneNode(true)); 1048 for (let i = 1; i < note.children.length; i++) { 1049 const child = note.children[i]; 1050 if (child.tagName === "P" && child.innerText === "") { 1051 continue; 1052 } else { 1053 container.appendChild(child.cloneNode(true)); 1054 break; 1055 } 1056 } 1057 if (window.Quarto?.typesetMath) { 1058 window.Quarto.typesetMath(container); 1059 } 1060 return container.innerHTML 1061 } else { 1062 if (window.Quarto?.typesetMath) { 1063 window.Quarto.typesetMath(note); 1064 } 1065 return note.innerHTML; 1066 } 1067 } else { 1068 // Remove any anchor links if they are present 1069 const anchorLink = note.querySelector('a.anchorjs-link'); 1070 if (anchorLink) { 1071 anchorLink.remove(); 1072 } 1073 if (window.Quarto?.typesetMath) { 1074 window.Quarto.typesetMath(note); 1075 } 1076 if (note.classList.contains("callout")) { 1077 return note.outerHTML; 1078 } else { 1079 return note.innerHTML; 1080 } 1081 } 1082 } 1083 for (var i=0; i<xrefs.length; i++) { 1084 const xref = xrefs[i]; 1085 tippyHover(xref, undefined, function(instance) { 1086 instance.disable(); 1087 let url = xref.getAttribute('href'); 1088 let hash = undefined; 1089 if (url.startsWith('#')) { 1090 hash = url; 1091 } else { 1092 try { hash = new URL(url).hash; } catch {} 1093 } 1094 if (hash) { 1095 const id = hash.replace(/^#\/?/, ""); 1096 const note = window.document.getElementById(id); 1097 if (note !== null) { 1098 try { 1099 const html = processXRef(id, note.cloneNode(true)); 1100 instance.setContent(html); 1101 } finally { 1102 instance.enable(); 1103 instance.show(); 1104 } 1105 } else { 1106 // See if we can fetch this 1107 fetch(url.split('#')[0]) 1108 .then(res => res.text()) 1109 .then(html => { 1110 const parser = new DOMParser(); 1111 const htmlDoc = parser.parseFromString(html, "text/html"); 1112 const note = htmlDoc.getElementById(id); 1113 if (note !== null) { 1114 const html = processXRef(id, note); 1115 instance.setContent(html); 1116 } 1117 }).finally(() => { 1118 instance.enable(); 1119 instance.show(); 1120 }); 1121 } 1122 } else { 1123 // See if we can fetch a full url (with no hash to target) 1124 // This is a special case and we should probably do some content thinning / targeting 1125 fetch(url) 1126 .then(res => res.text()) 1127 .then(html => { 1128 const parser = new DOMParser(); 1129 const htmlDoc = parser.parseFromString(html, "text/html"); 1130 const note = htmlDoc.querySelector('main.content'); 1131 if (note !== null) { 1132 // This should only happen for chapter cross references 1133 // (since there is no id in the URL) 1134 // remove the first header 1135 if (note.children.length > 0 && note.children[0].tagName === "HEADER") { 1136 note.children[0].remove(); 1137 } 1138 const html = processXRef(null, note); 1139 instance.setContent(html); 1140 } 1141 }).finally(() => { 1142 instance.enable(); 1143 instance.show(); 1144 }); 1145 } 1146 }, function(instance) { 1147 }); 1148 } 1149 let selectedAnnoteEl; 1150 const selectorForAnnotation = ( cell, annotation) => { 1151 let cellAttr = 'data-code-cell="' + cell + '"'; 1152 let lineAttr = 'data-code-annotation="' + annotation + '"'; 1153 const selector = 'span[' + cellAttr + '][' + lineAttr + ']'; 1154 return selector; 1155 } 1156 const selectCodeLines = (annoteEl) => { 1157 const doc = window.document; 1158 const targetCell = annoteEl.getAttribute("data-target-cell"); 1159 const targetAnnotation = annoteEl.getAttribute("data-target-annotation"); 1160 const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation)); 1161 const lines = annoteSpan.getAttribute("data-code-lines").split(","); 1162 const lineIds = lines.map((line) => { 1163 return targetCell + "-" + line; 1164 }) 1165 let top = null; 1166 let height = null; 1167 let parent = null; 1168 if (lineIds.length > 0) { 1169 //compute the position of the single el (top and bottom and make a div) 1170 const el = window.document.getElementById(lineIds[0]); 1171 top = el.offsetTop; 1172 height = el.offsetHeight; 1173 parent = el.parentElement.parentElement; 1174 if (lineIds.length > 1) { 1175 const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]); 1176 const bottom = lastEl.offsetTop + lastEl.offsetHeight; 1177 height = bottom - top; 1178 } 1179 if (top !== null && height !== null && parent !== null) { 1180 // cook up a div (if necessary) and position it 1181 let div = window.document.getElementById("code-annotation-line-highlight"); 1182 if (div === null) { 1183 div = window.document.createElement("div"); 1184 div.setAttribute("id", "code-annotation-line-highlight"); 1185 div.style.position = 'absolute'; 1186 parent.appendChild(div); 1187 } 1188 div.style.top = top - 2 + "px"; 1189 div.style.height = height + 4 + "px"; 1190 div.style.left = 0; 1191 let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter"); 1192 if (gutterDiv === null) { 1193 gutterDiv = window.document.createElement("div"); 1194 gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter"); 1195 gutterDiv.style.position = 'absolute'; 1196 const codeCell = window.document.getElementById(targetCell); 1197 const gutter = codeCell.querySelector('.code-annotation-gutter'); 1198 gutter.appendChild(gutterDiv); 1199 } 1200 gutterDiv.style.top = top - 2 + "px"; 1201 gutterDiv.style.height = height + 4 + "px"; 1202 } 1203 selectedAnnoteEl = annoteEl; 1204 } 1205 }; 1206 const unselectCodeLines = () => { 1207 const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"]; 1208 elementsIds.forEach((elId) => { 1209 const div = window.document.getElementById(elId); 1210 if (div) { 1211 div.remove(); 1212 } 1213 }); 1214 selectedAnnoteEl = undefined; 1215 }; 1216 // Handle positioning of the toggle 1217 window.addEventListener( 1218 "resize", 1219 throttle(() => { 1220 elRect = undefined; 1221 if (selectedAnnoteEl) { 1222 selectCodeLines(selectedAnnoteEl); 1223 } 1224 }, 10) 1225 ); 1226 function throttle(fn, ms) { 1227 let throttle = false; 1228 let timer; 1229 return (...args) => { 1230 if(!throttle) { // first call gets through 1231 fn.apply(this, args); 1232 throttle = true; 1233 } else { // all the others get throttled 1234 if(timer) clearTimeout(timer); // cancel #2 1235 timer = setTimeout(() => { 1236 fn.apply(this, args); 1237 timer = throttle = false; 1238 }, ms); 1239 } 1240 }; 1241 } 1242 // Attach click handler to the DT 1243 const annoteDls = window.document.querySelectorAll('dt[data-target-cell]'); 1244 for (const annoteDlNode of annoteDls) { 1245 annoteDlNode.addEventListener('click', (event) => { 1246 const clickedEl = event.target; 1247 if (clickedEl !== selectedAnnoteEl) { 1248 unselectCodeLines(); 1249 const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active'); 1250 if (activeEl) { 1251 activeEl.classList.remove('code-annotation-active'); 1252 } 1253 selectCodeLines(clickedEl); 1254 clickedEl.classList.add('code-annotation-active'); 1255 } else { 1256 // Unselect the line 1257 unselectCodeLines(); 1258 clickedEl.classList.remove('code-annotation-active'); 1259 } 1260 }); 1261 } 1262 const findCites = (el) => { 1263 const parentEl = el.parentElement; 1264 if (parentEl) { 1265 const cites = parentEl.dataset.cites; 1266 if (cites) { 1267 return { 1268 el, 1269 cites: cites.split(' ') 1270 }; 1271 } else { 1272 return findCites(el.parentElement) 1273 } 1274 } else { 1275 return undefined; 1276 } 1277 }; 1278 var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]'); 1279 for (var i=0; i<bibliorefs.length; i++) { 1280 const ref = bibliorefs[i]; 1281 const citeInfo = findCites(ref); 1282 if (citeInfo) { 1283 tippyHover(citeInfo.el, function() { 1284 var popup = window.document.createElement('div'); 1285 citeInfo.cites.forEach(function(cite) { 1286 var citeDiv = window.document.createElement('div'); 1287 citeDiv.classList.add('hanging-indent'); 1288 citeDiv.classList.add('csl-entry'); 1289 var biblioDiv = window.document.getElementById('ref-' + cite); 1290 if (biblioDiv) { 1291 citeDiv.innerHTML = biblioDiv.innerHTML; 1292 } 1293 popup.appendChild(citeDiv); 1294 }); 1295 return popup.innerHTML; 1296 }); 1297 } 1298 } 1299 }); 1300 </script> 1301</div> <!-- /content --> 1302<footer class="footer"> 1303 <div class="nav-footer"> 1304 <div class="nav-footer-left"> 1305<p>Built with <a href="https://quarto.org/">Quarto</a></p> 1306</div> 1307 <div class="nav-footer-center"> 1308 &nbsp; 1309 <div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/your-org/atdata/edit/main/reference/datasets.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></div> 1310 <div class="nav-footer-right"> 1311<p>MIT License</p> 1312</div> 1313 </div> 1314</footer> 1315 1316 1317 1318 1319</body></html>