A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 1336 lines 73 kB view raw
1<!DOCTYPE html> 2<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head> 3 4<meta charset="utf-8"> 5<meta name="generator" content="quarto-1.7.34"> 6 7<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"> 8 9<meta name="description" content="HuggingFace-style dataset loading interface"> 10 11<title>load_dataset API – atdata</title> 12<style> 13code{white-space: pre-wrap;} 14span.smallcaps{font-variant: small-caps;} 15div.columns{display: flex; gap: min(4vw, 1.5em);} 16div.column{flex: auto; overflow-x: auto;} 17div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} 18ul.task-list{list-style: none;} 19ul.task-list li input[type="checkbox"] { 20 width: 0.8em; 21 margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 22 vertical-align: middle; 23} 24/* CSS for syntax highlighting */ 25html { -webkit-text-size-adjust: 100%; } 26pre > code.sourceCode { white-space: pre; position: relative; } 27pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } 28pre > code.sourceCode > span:empty { height: 1.2em; } 29.sourceCode { overflow: visible; } 30code.sourceCode > span { color: inherit; text-decoration: inherit; } 31div.sourceCode { margin: 1em 0; } 32pre.sourceCode { margin: 0; } 33@media screen { 34div.sourceCode { overflow: auto; } 35} 36@media print { 37pre > code.sourceCode { white-space: pre-wrap; } 38pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } 39} 40pre.numberSource code 41 { counter-reset: source-line 0; } 42pre.numberSource code > span 43 { position: relative; left: -4em; counter-increment: source-line; } 44pre.numberSource code > span > a:first-child::before 45 { content: counter(source-line); 46 position: relative; left: -1em; text-align: right; vertical-align: baseline; 47 border: none; display: inline-block; 48 -webkit-touch-callout: none; -webkit-user-select: none; 49 -khtml-user-select: none; -moz-user-select: none; 50 -ms-user-select: none; user-select: none; 51 padding: 0 4px; width: 4em; 52 } 53pre.numberSource { margin-left: 3em; padding-left: 4px; } 54div.sourceCode 55 { } 56@media screen { 57pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } 58} 59</style> 60 61 62<script src="../site_libs/quarto-nav/quarto-nav.js"></script> 63<script src="../site_libs/quarto-nav/headroom.min.js"></script> 64<script src="../site_libs/clipboard/clipboard.min.js"></script> 65<script src="../site_libs/quarto-search/autocomplete.umd.js"></script> 66<script src="../site_libs/quarto-search/fuse.min.js"></script> 67<script src="../site_libs/quarto-search/quarto-search.js"></script> 68<meta name="quarto:offset" content="../"> 69<script src="../site_libs/quarto-html/quarto.js" type="module"></script> 70<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script> 71<script src="../site_libs/quarto-html/popper.min.js"></script> 72<script src="../site_libs/quarto-html/tippy.umd.min.js"></script> 73<script src="../site_libs/quarto-html/anchor.min.js"></script> 74<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet"> 75<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme" id="quarto-text-highlighting-styles"> 76<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8dcd8563ea6803ab7cbb3d71ca5772e1.css" rel="stylesheet" class="quarto-color-scheme quarto-color-alternate" id="quarto-text-highlighting-styles"> 77<link href="../site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css" rel="stylesheet" class="quarto-color-scheme-extra" id="quarto-text-highlighting-styles"> 78<script src="../site_libs/bootstrap/bootstrap.min.js"></script> 79<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet"> 80<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme" id="quarto-bootstrap" data-mode="light"> 81<link href="../site_libs/bootstrap/bootstrap-dark-7964ffd8887b0991fe8d71c6c8bc75d6.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme quarto-color-alternate" id="quarto-bootstrap" data-mode="dark"> 82<link href="../site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme-extra" id="quarto-bootstrap" data-mode="light"> 83<script id="quarto-search-options" type="application/json">{ 84 "location": "navbar", 85 "copy-button": false, 86 "collapse-after": 3, 87 "panel-placement": "end", 88 "type": "overlay", 89 "limit": 50, 90 "keyboard-shortcut": [ 91 "f", 92 "/", 93 "s" 94 ], 95 "show-item-context": false, 96 "language": { 97 "search-no-results-text": "No results", 98 "search-matching-documents-text": "matching documents", 99 "search-copy-link-title": "Copy link to search", 100 "search-hide-matches-text": "Hide additional matches", 101 "search-more-match-text": "more match in this document", 102 "search-more-matches-text": "more matches in this document", 103 "search-clear-button-title": "Clear", 104 "search-text-placeholder": "", 105 "search-detached-cancel-button-title": "Cancel", 106 "search-submit-button-title": "Submit", 107 "search-label": "Search" 108 } 109}</script> 110 111 112<link rel="stylesheet" href="../assets/styles.css"> 113</head> 114 115<body class="nav-sidebar docked nav-fixed quarto-light"><script id="quarto-html-before-body" type="application/javascript"> 116 const toggleBodyColorMode = (bsSheetEl) => { 117 const mode = bsSheetEl.getAttribute("data-mode"); 118 const bodyEl = window.document.querySelector("body"); 119 if (mode === "dark") { 120 bodyEl.classList.add("quarto-dark"); 121 bodyEl.classList.remove("quarto-light"); 122 } else { 123 bodyEl.classList.add("quarto-light"); 124 bodyEl.classList.remove("quarto-dark"); 125 } 126 } 127 const toggleBodyColorPrimary = () => { 128 const bsSheetEl = window.document.querySelector("link#quarto-bootstrap:not([rel=disabled-stylesheet])"); 129 if (bsSheetEl) { 130 toggleBodyColorMode(bsSheetEl); 131 } 132 } 133 const setColorSchemeToggle = (alternate) => { 134 const toggles = window.document.querySelectorAll('.quarto-color-scheme-toggle'); 135 for (let i=0; i < toggles.length; i++) { 136 const toggle = toggles[i]; 137 if (toggle) { 138 if (alternate) { 139 toggle.classList.add("alternate"); 140 } else { 141 toggle.classList.remove("alternate"); 142 } 143 } 144 } 145 }; 146 const toggleColorMode = (alternate) => { 147 // Switch the stylesheets 148 const primaryStylesheets = window.document.querySelectorAll('link.quarto-color-scheme:not(.quarto-color-alternate)'); 149 const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate'); 150 manageTransitions('#quarto-margin-sidebar .nav-link', false); 151 if (alternate) { 152 // note: dark is layered on light, we don't disable primary! 153 enableStylesheet(alternateStylesheets); 154 for (const sheetNode of alternateStylesheets) { 155 if (sheetNode.id === "quarto-bootstrap") { 156 toggleBodyColorMode(sheetNode); 157 } 158 } 159 } else { 160 disableStylesheet(alternateStylesheets); 161 enableStylesheet(primaryStylesheets) 162 toggleBodyColorPrimary(); 163 } 164 manageTransitions('#quarto-margin-sidebar .nav-link', true); 165 // Switch the toggles 166 setColorSchemeToggle(alternate) 167 // Hack to workaround the fact that safari doesn't 168 // properly recolor the scrollbar when toggling (#1455) 169 if (navigator.userAgent.indexOf('Safari') > 0 && navigator.userAgent.indexOf('Chrome') == -1) { 170 manageTransitions("body", false); 171 window.scrollTo(0, 1); 172 setTimeout(() => { 173 window.scrollTo(0, 0); 174 manageTransitions("body", true); 175 }, 40); 176 } 177 } 178 const disableStylesheet = (stylesheets) => { 179 for (let i=0; i < stylesheets.length; i++) { 180 const stylesheet = stylesheets[i]; 181 stylesheet.rel = 'disabled-stylesheet'; 182 } 183 } 184 const enableStylesheet = (stylesheets) => { 185 for (let i=0; i < stylesheets.length; i++) { 186 const stylesheet = stylesheets[i]; 187 if(stylesheet.rel !== 'stylesheet') { // for Chrome, which will still FOUC without this check 188 stylesheet.rel = 'stylesheet'; 189 } 190 } 191 } 192 const manageTransitions = (selector, allowTransitions) => { 193 const els = window.document.querySelectorAll(selector); 194 for (let i=0; i < els.length; i++) { 195 const el = els[i]; 196 if (allowTransitions) { 197 el.classList.remove('notransition'); 198 } else { 199 el.classList.add('notransition'); 200 } 201 } 202 } 203 const isFileUrl = () => { 204 return window.location.protocol === 'file:'; 205 } 206 const hasAlternateSentinel = () => { 207 let styleSentinel = getColorSchemeSentinel(); 208 if (styleSentinel !== null) { 209 return styleSentinel === "alternate"; 210 } else { 211 return false; 212 } 213 } 214 const setStyleSentinel = (alternate) => { 215 const value = alternate ? "alternate" : "default"; 216 if (!isFileUrl()) { 217 window.localStorage.setItem("quarto-color-scheme", value); 218 } else { 219 localAlternateSentinel = value; 220 } 221 } 222 const getColorSchemeSentinel = () => { 223 if (!isFileUrl()) { 224 const storageValue = window.localStorage.getItem("quarto-color-scheme"); 225 return storageValue != null ? storageValue : localAlternateSentinel; 226 } else { 227 return localAlternateSentinel; 228 } 229 } 230 const toggleGiscusIfUsed = (isAlternate, darkModeDefault) => { 231 const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light'; 232 const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark'; 233 let newTheme = ''; 234 if(authorPrefersDark) { 235 newTheme = isAlternate ? baseTheme : alternateTheme; 236 } else { 237 newTheme = isAlternate ? alternateTheme : baseTheme; 238 } 239 const changeGiscusTheme = () => { 240 // From: https://github.com/giscus/giscus/issues/336 241 const sendMessage = (message) => { 242 const iframe = document.querySelector('iframe.giscus-frame'); 243 if (!iframe) return; 244 iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app'); 245 } 246 sendMessage({ 247 setConfig: { 248 theme: newTheme 249 } 250 }); 251 } 252 const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null; 253 if (isGiscussLoaded) { 254 changeGiscusTheme(); 255 } 256 }; 257 const authorPrefersDark = false; 258 const darkModeDefault = authorPrefersDark; 259 document.querySelector('link#quarto-text-highlighting-styles.quarto-color-scheme-extra').rel = 'disabled-stylesheet'; 260 document.querySelector('link#quarto-bootstrap.quarto-color-scheme-extra').rel = 'disabled-stylesheet'; 261 let localAlternateSentinel = darkModeDefault ? 'alternate' : 'default'; 262 // Dark / light mode switch 263 window.quartoToggleColorScheme = () => { 264 // Read the current dark / light value 265 let toAlternate = !hasAlternateSentinel(); 266 toggleColorMode(toAlternate); 267 setStyleSentinel(toAlternate); 268 toggleGiscusIfUsed(toAlternate, darkModeDefault); 269 window.dispatchEvent(new Event('resize')); 270 }; 271 // Switch to dark mode if need be 272 if (hasAlternateSentinel()) { 273 toggleColorMode(true); 274 } else { 275 toggleColorMode(false); 276 } 277 </script> 278 279<div id="quarto-search-results"></div> 280 <header id="quarto-header" class="headroom fixed-top"> 281 <nav class="navbar navbar-expand-lg " data-bs-theme="dark"> 282 <div class="navbar-container container-fluid"> 283 <div class="navbar-brand-container mx-auto"> 284 <a class="navbar-brand" href="../index.html"> 285 <span class="navbar-title">atdata</span> 286 </a> 287 </div> 288 <div id="quarto-search" class="" title="Search"></div> 289 <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" role="menu" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> 290 <span class="navbar-toggler-icon"></span> 291</button> 292 <div class="collapse navbar-collapse" id="navbarCollapse"> 293 <ul class="navbar-nav navbar-nav-scroll me-auto"> 294 <li class="nav-item"> 295 <a class="nav-link active" href="../index.html" aria-current="page"> 296<span class="menu-text">Guide</span></a> 297 </li> 298 <li class="nav-item dropdown "> 299 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-tutorials" role="link" data-bs-toggle="dropdown" aria-expanded="false"> 300 <span class="menu-text">Tutorials</span> 301 </a> 302 <ul class="dropdown-menu" aria-labelledby="nav-menu-tutorials"> 303 <li> 304 <a class="dropdown-item" href="../tutorials/quickstart.html"> 305 <span class="dropdown-text">Quick Start</span></a> 306 </li> 307 <li> 308 <a class="dropdown-item" href="../tutorials/local-workflow.html"> 309 <span class="dropdown-text">Local Workflow</span></a> 310 </li> 311 <li> 312 <a class="dropdown-item" href="../tutorials/atmosphere.html"> 313 <span class="dropdown-text">Atmosphere Publishing</span></a> 314 </li> 315 <li> 316 <a class="dropdown-item" href="../tutorials/promotion.html"> 317 <span class="dropdown-text">Promotion Workflow</span></a> 318 </li> 319 </ul> 320 </li> 321 <li class="nav-item dropdown "> 322 <a class="nav-link dropdown-toggle" href="#" id="nav-menu-reference" role="link" data-bs-toggle="dropdown" aria-expanded="false"> 323 <span class="menu-text">Reference</span> 324 </a> 325 <ul class="dropdown-menu" aria-labelledby="nav-menu-reference"> 326 <li> 327 <a class="dropdown-item" href="../reference/architecture.html"> 328 <span class="dropdown-text">Architecture Overview</span></a> 329 </li> 330 <li> 331 <a class="dropdown-item" href="../reference/packable-samples.html"> 332 <span class="dropdown-text">Packable Samples</span></a> 333 </li> 334 <li> 335 <a class="dropdown-item" href="../reference/datasets.html"> 336 <span class="dropdown-text">Datasets</span></a> 337 </li> 338 <li> 339 <a class="dropdown-item" href="../reference/lenses.html"> 340 <span class="dropdown-text">Lenses</span></a> 341 </li> 342 <li> 343 <a class="dropdown-item" href="../reference/local-storage.html"> 344 <span class="dropdown-text">Local Storage</span></a> 345 </li> 346 <li> 347 <a class="dropdown-item" href="../reference/atmosphere.html"> 348 <span class="dropdown-text">Atmosphere</span></a> 349 </li> 350 <li> 351 <a class="dropdown-item" href="../reference/promotion.html"> 352 <span class="dropdown-text">Promotion</span></a> 353 </li> 354 <li> 355 <a class="dropdown-item" href="../reference/load-dataset.html"> 356 <span class="dropdown-text">load_dataset API</span></a> 357 </li> 358 <li> 359 <a class="dropdown-item" href="../reference/protocols.html"> 360 <span class="dropdown-text">Protocols</span></a> 361 </li> 362 <li> 363 <a class="dropdown-item" href="../reference/uri-spec.html"> 364 <span class="dropdown-text">URI Specification</span></a> 365 </li> 366 <li> 367 <a class="dropdown-item" href="../reference/troubleshooting.html"> 368 <span class="dropdown-text">Troubleshooting &amp; FAQ</span></a> 369 </li> 370 <li> 371 <a class="dropdown-item" href="../reference/deployment.html"> 372 <span class="dropdown-text">Deployment Guide</span></a> 373 </li> 374 </ul> 375 </li> 376 <li class="nav-item"> 377 <a class="nav-link" href="../api/index.html"> 378<span class="menu-text">API</span></a> 379 </li> 380</ul> 381 <ul class="navbar-nav navbar-nav-scroll ms-auto"> 382 <li class="nav-item compact"> 383 <a class="nav-link" href="https://github.com/your-org/atdata"> <i class="bi bi-github" role="img"> 384</i> 385<span class="menu-text"></span></a> 386 </li> 387</ul> 388 </div> <!-- /navcollapse --> 389 <div class="quarto-navbar-tools"> 390 <a href="" class="quarto-color-scheme-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleColorScheme(); return false;" title="Toggle dark mode"><i class="bi"></i></a> 391</div> 392 </div> <!-- /container-fluid --> 393 </nav> 394 <nav class="quarto-secondary-nav"> 395 <div class="container-fluid d-flex"> 396 <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> 397 <i class="bi bi-layout-text-sidebar-reverse"></i> 398 </button> 399 <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../reference/architecture.html">Reference</a></li><li class="breadcrumb-item"><a href="../reference/load-dataset.html">load_dataset API</a></li></ol></nav> 400 <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> 401 </a> 402 </div> 403 </nav> 404</header> 405<!-- content --> 406<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar"> 407<!-- sidebar --> 408 <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto"> 409 <div class="sidebar-menu-container"> 410 <ul class="list-unstyled mt-1"> 411 <li class="sidebar-item"> 412 <div class="sidebar-item-container"> 413 <a href="../index.html" class="sidebar-item-text sidebar-link"> 414 <span class="menu-text">atdata</span></a> 415 </div> 416</li> 417 <li class="sidebar-item sidebar-item-section"> 418 <div class="sidebar-item-container"> 419 <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true"> 420 <span class="menu-text">Getting Started</span></a> 421 <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section"> 422 <i class="bi bi-chevron-right ms-2"></i> 423 </a> 424 </div> 425 <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show"> 426 <li class="sidebar-item"> 427 <div class="sidebar-item-container"> 428 <a href="../tutorials/quickstart.html" class="sidebar-item-text sidebar-link"> 429 <span class="menu-text">Quick Start</span></a> 430 </div> 431</li> 432 <li class="sidebar-item"> 433 <div class="sidebar-item-container"> 434 <a href="../tutorials/local-workflow.html" class="sidebar-item-text sidebar-link"> 435 <span class="menu-text">Local Workflow</span></a> 436 </div> 437</li> 438 <li class="sidebar-item"> 439 <div class="sidebar-item-container"> 440 <a href="../tutorials/atmosphere.html" class="sidebar-item-text sidebar-link"> 441 <span class="menu-text">Atmosphere Publishing</span></a> 442 </div> 443</li> 444 <li class="sidebar-item"> 445 <div class="sidebar-item-container"> 446 <a href="../tutorials/promotion.html" class="sidebar-item-text sidebar-link"> 447 <span class="menu-text">Promotion Workflow</span></a> 448 </div> 449</li> 450 </ul> 451 </li> 452 <li class="sidebar-item sidebar-item-section"> 453 <div class="sidebar-item-container"> 454 <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true"> 455 <span class="menu-text">Reference</span></a> 456 <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section"> 457 <i class="bi bi-chevron-right ms-2"></i> 458 </a> 459 </div> 460 <ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show"> 461 <li class="sidebar-item"> 462 <div class="sidebar-item-container"> 463 <a href="../reference/architecture.html" class="sidebar-item-text sidebar-link"> 464 <span class="menu-text">Architecture Overview</span></a> 465 </div> 466</li> 467 <li class="sidebar-item"> 468 <div class="sidebar-item-container"> 469 <a href="../reference/packable-samples.html" class="sidebar-item-text sidebar-link"> 470 <span class="menu-text">Packable Samples</span></a> 471 </div> 472</li> 473 <li class="sidebar-item"> 474 <div class="sidebar-item-container"> 475 <a href="../reference/datasets.html" class="sidebar-item-text sidebar-link"> 476 <span class="menu-text">Datasets</span></a> 477 </div> 478</li> 479 <li class="sidebar-item"> 480 <div class="sidebar-item-container"> 481 <a href="../reference/lenses.html" class="sidebar-item-text sidebar-link"> 482 <span class="menu-text">Lenses</span></a> 483 </div> 484</li> 485 <li class="sidebar-item"> 486 <div class="sidebar-item-container"> 487 <a href="../reference/local-storage.html" class="sidebar-item-text sidebar-link"> 488 <span class="menu-text">Local Storage</span></a> 489 </div> 490</li> 491 <li class="sidebar-item"> 492 <div class="sidebar-item-container"> 493 <a href="../reference/atmosphere.html" class="sidebar-item-text sidebar-link"> 494 <span class="menu-text">Atmosphere (ATProto Integration)</span></a> 495 </div> 496</li> 497 <li class="sidebar-item"> 498 <div class="sidebar-item-container"> 499 <a href="../reference/promotion.html" class="sidebar-item-text sidebar-link"> 500 <span class="menu-text">Promotion Workflow</span></a> 501 </div> 502</li> 503 <li class="sidebar-item"> 504 <div class="sidebar-item-container"> 505 <a href="../reference/load-dataset.html" class="sidebar-item-text sidebar-link active"> 506 <span class="menu-text">load_dataset API</span></a> 507 </div> 508</li> 509 <li class="sidebar-item"> 510 <div class="sidebar-item-container"> 511 <a href="../reference/protocols.html" class="sidebar-item-text sidebar-link"> 512 <span class="menu-text">Protocols</span></a> 513 </div> 514</li> 515 <li class="sidebar-item"> 516 <div class="sidebar-item-container"> 517 <a href="../reference/uri-spec.html" class="sidebar-item-text sidebar-link"> 518 <span class="menu-text">URI Specification</span></a> 519 </div> 520</li> 521 <li class="sidebar-item"> 522 <div class="sidebar-item-container"> 523 <a href="../reference/troubleshooting.html" class="sidebar-item-text sidebar-link"> 524 <span class="menu-text">Troubleshooting &amp; FAQ</span></a> 525 </div> 526</li> 527 <li class="sidebar-item"> 528 <div class="sidebar-item-container"> 529 <a href="../reference/deployment.html" class="sidebar-item-text sidebar-link"> 530 <span class="menu-text">Deployment Guide</span></a> 531 </div> 532</li> 533 </ul> 534 </li> 535 </ul> 536 </div> 537</nav> 538<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div> 539<!-- margin-sidebar --> 540 <div id="quarto-margin-sidebar" class="sidebar margin-sidebar"> 541 <nav id="TOC" role="doc-toc" class="toc-active"> 542 <h2 id="toc-title">On this page</h2> 543 544 <ul> 545 <li><a href="#overview" id="toc-overview" class="nav-link active" data-scroll-target="#overview">Overview</a></li> 546 <li><a href="#basic-usage" id="toc-basic-usage" class="nav-link" data-scroll-target="#basic-usage">Basic Usage</a></li> 547 <li><a href="#path-formats" id="toc-path-formats" class="nav-link" data-scroll-target="#path-formats">Path Formats</a> 548 <ul class="collapse"> 549 <li><a href="#webdataset-brace-notation" id="toc-webdataset-brace-notation" class="nav-link" data-scroll-target="#webdataset-brace-notation">WebDataset Brace Notation</a></li> 550 <li><a href="#glob-patterns" id="toc-glob-patterns" class="nav-link" data-scroll-target="#glob-patterns">Glob Patterns</a></li> 551 <li><a href="#local-directory" id="toc-local-directory" class="nav-link" data-scroll-target="#local-directory">Local Directory</a></li> 552 <li><a href="#remote-urls" id="toc-remote-urls" class="nav-link" data-scroll-target="#remote-urls">Remote URLs</a></li> 553 <li><a href="#index-lookup" id="toc-index-lookup" class="nav-link" data-scroll-target="#index-lookup">Index Lookup</a></li> 554 </ul></li> 555 <li><a href="#split-detection" id="toc-split-detection" class="nav-link" data-scroll-target="#split-detection">Split Detection</a></li> 556 <li><a href="#datasetdict" id="toc-datasetdict" class="nav-link" data-scroll-target="#datasetdict">DatasetDict</a></li> 557 <li><a href="#explicit-data-files" id="toc-explicit-data-files" class="nav-link" data-scroll-target="#explicit-data-files">Explicit Data Files</a></li> 558 <li><a href="#streaming-mode" id="toc-streaming-mode" class="nav-link" data-scroll-target="#streaming-mode">Streaming Mode</a></li> 559 <li><a href="#auto-type-resolution" id="toc-auto-type-resolution" class="nav-link" data-scroll-target="#auto-type-resolution">Auto Type Resolution</a></li> 560 <li><a href="#error-handling" id="toc-error-handling" class="nav-link" data-scroll-target="#error-handling">Error Handling</a></li> 561 <li><a href="#complete-example" id="toc-complete-example" class="nav-link" data-scroll-target="#complete-example">Complete Example</a></li> 562 <li><a href="#related" id="toc-related" class="nav-link" data-scroll-target="#related">Related</a></li> 563 </ul> 564<div class="toc-actions"><ul><li><a href="https://github.com/your-org/atdata/edit/main/reference/load-dataset.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav> 565 </div> 566<!-- main --> 567<main class="content" id="quarto-document-content"> 568 569 570<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../reference/architecture.html">Reference</a></li><li class="breadcrumb-item"><a href="../reference/load-dataset.html">load_dataset API</a></li></ol></nav> 571<div class="quarto-title"> 572<h1 class="title">load_dataset API</h1> 573</div> 574 575<div> 576 <div class="description"> 577 HuggingFace-style dataset loading interface 578 </div> 579</div> 580 581 582<div class="quarto-title-meta"> 583 584 585 586 587 </div> 588 589 590 591</header> 592 593 594<p>The <code>load_dataset()</code> function provides a HuggingFace Datasets-style interface for loading typed datasets.</p> 595<section id="overview" class="level2"> 596<h2 class="anchored" data-anchor-id="overview">Overview</h2> 597<p>Key differences from HuggingFace Datasets:</p> 598<ul> 599<li>Requires explicit <code>sample_type</code> parameter (typed dataclass) unless using index</li> 600<li>Returns <code>atdata.Dataset[ST]</code> instead of HF Dataset</li> 601<li>Built on WebDataset for efficient streaming</li> 602<li>No Arrow caching layer</li> 603</ul> 604</section> 605<section id="basic-usage" class="level2"> 606<h2 class="anchored" data-anchor-id="basic-usage">Basic Usage</h2> 607<div id="44ca6bce" class="cell"> 608<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 609<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata <span class="im">import</span> load_dataset</span> 610<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 611<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a></span> 612<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 613<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> TextSample:</span> 614<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a> text: <span class="bu">str</span></span> 615<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">int</span></span> 616<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a></span> 617<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Load a specific split</span></span> 618<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a>train_ds <span class="op">=</span> load_dataset(<span class="st">"path/to/data.tar"</span>, TextSample, split<span class="op">=</span><span class="st">"train"</span>)</span> 619<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a></span> 620<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Load all splits (returns DatasetDict)</span></span> 621<span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a>ds_dict <span class="op">=</span> load_dataset(<span class="st">"path/to/data/"</span>, TextSample)</span> 622<span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a>train_ds <span class="op">=</span> ds_dict[<span class="st">"train"</span>]</span> 623<span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a>test_ds <span class="op">=</span> ds_dict[<span class="st">"test"</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 624</div> 625</section> 626<section id="path-formats" class="level2"> 627<h2 class="anchored" data-anchor-id="path-formats">Path Formats</h2> 628<section id="webdataset-brace-notation" class="level3"> 629<h3 class="anchored" data-anchor-id="webdataset-brace-notation">WebDataset Brace Notation</h3> 630<div id="8d0a44f7" class="cell"> 631<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Range notation</span></span> 632<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"data-{000000..000099}.tar"</span>, MySample, split<span class="op">=</span><span class="st">"train"</span>)</span> 633<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span> 634<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="co"># List notation</span></span> 635<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"data-{train,test,val}.tar"</span>, MySample, split<span class="op">=</span><span class="st">"train"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 636</div> 637</section> 638<section id="glob-patterns" class="level3"> 639<h3 class="anchored" data-anchor-id="glob-patterns">Glob Patterns</h3> 640<div id="f88b246c" class="cell"> 641<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Match all tar files</span></span> 642<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"path/to/*.tar"</span>, MySample)</span> 643<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span> 644<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Match pattern</span></span> 645<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"path/to/train-*.tar"</span>, MySample, split<span class="op">=</span><span class="st">"train"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 646</div> 647</section> 648<section id="local-directory" class="level3"> 649<h3 class="anchored" data-anchor-id="local-directory">Local Directory</h3> 650<div id="dd112997" class="cell"> 651<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Scans for .tar files</span></span> 652<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"./my-dataset/"</span>, MySample)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 653</div> 654</section> 655<section id="remote-urls" class="level3"> 656<h3 class="anchored" data-anchor-id="remote-urls">Remote URLs</h3> 657<div id="fb8989cb" class="cell"> 658<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># S3 (public buckets)</span></span> 659<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"s3://bucket/data-{000..099}.tar"</span>, MySample, split<span class="op">=</span><span class="st">"train"</span>)</span> 660<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span> 661<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="co"># HTTP/HTTPS</span></span> 662<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"https://example.com/data.tar"</span>, MySample, split<span class="op">=</span><span class="st">"train"</span>)</span> 663<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a></span> 664<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Google Cloud Storage</span></span> 665<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"gs://bucket/data.tar"</span>, MySample, split<span class="op">=</span><span class="st">"train"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 666</div> 667<div class="callout callout-style-default callout-note callout-titled"> 668<div class="callout-header d-flex align-content-center"> 669<div class="callout-icon-container"> 670<i class="callout-icon"></i> 671</div> 672<div class="callout-title-container flex-fill"> 673Note 674</div> 675</div> 676<div class="callout-body-container callout-body"> 677<p>For private S3 buckets or S3-compatible storage with authentication, use <code>atdata.S3Source</code> with <code>Dataset</code> directly. See <a href="../reference/datasets.html">Datasets</a> for details.</p> 678</div> 679</div> 680</section> 681<section id="index-lookup" class="level3"> 682<h3 class="anchored" data-anchor-id="index-lookup">Index Lookup</h3> 683<div id="415c2094" class="cell"> 684<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> LocalIndex</span> 685<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a></span> 686<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> LocalIndex()</span> 687<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a></span> 688<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Load from local index (auto-resolves type from schema)</span></span> 689<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"@local/my-dataset"</span>, index<span class="op">=</span>index, split<span class="op">=</span><span class="st">"train"</span>)</span> 690<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a></span> 691<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a><span class="co"># With explicit type</span></span> 692<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"@local/my-dataset"</span>, MySample, index<span class="op">=</span>index, split<span class="op">=</span><span class="st">"train"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 693</div> 694</section> 695</section> 696<section id="split-detection" class="level2"> 697<h2 class="anchored" data-anchor-id="split-detection">Split Detection</h2> 698<p>Splits are automatically detected from filenames and directories:</p> 699<table class="caption-top table"> 700<thead> 701<tr class="header"> 702<th>Pattern</th> 703<th>Detected Split</th> 704</tr> 705</thead> 706<tbody> 707<tr class="odd"> 708<td><code>train-*.tar</code>, <code>training-*.tar</code></td> 709<td>train</td> 710</tr> 711<tr class="even"> 712<td><code>test-*.tar</code>, <code>testing-*.tar</code></td> 713<td>test</td> 714</tr> 715<tr class="odd"> 716<td><code>val-*.tar</code>, <code>valid-*.tar</code>, <code>validation-*.tar</code></td> 717<td>validation</td> 718</tr> 719<tr class="even"> 720<td><code>dev-*.tar</code>, <code>development-*.tar</code></td> 721<td>validation</td> 722</tr> 723<tr class="odd"> 724<td><code>train/*.tar</code> (directory)</td> 725<td>train</td> 726</tr> 727<tr class="even"> 728<td><code>test/*.tar</code> (directory)</td> 729<td>test</td> 730</tr> 731</tbody> 732</table> 733<div class="callout callout-style-default callout-note callout-titled"> 734<div class="callout-header d-flex align-content-center"> 735<div class="callout-icon-container"> 736<i class="callout-icon"></i> 737</div> 738<div class="callout-title-container flex-fill"> 739Note 740</div> 741</div> 742<div class="callout-body-container callout-body"> 743<p>Files without a detected split default to “train”.</p> 744</div> 745</div> 746</section> 747<section id="datasetdict" class="level2"> 748<h2 class="anchored" data-anchor-id="datasetdict">DatasetDict</h2> 749<p>When loading without <code>split=</code>, returns a <code>DatasetDict</code>:</p> 750<div id="22c1bc35" class="cell"> 751<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>ds_dict <span class="op">=</span> load_dataset(<span class="st">"path/to/data/"</span>, MySample)</span> 752<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a></span> 753<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Access splits</span></span> 754<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>train_ds <span class="op">=</span> ds_dict[<span class="st">"train"</span>]</span> 755<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>test_ds <span class="op">=</span> ds_dict[<span class="st">"test"</span>]</span> 756<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a></span> 757<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Iterate splits</span></span> 758<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> name, dataset <span class="kw">in</span> ds_dict.items():</span> 759<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>name<span class="sc">}</span><span class="ss">: </span><span class="sc">{</span><span class="bu">len</span>(dataset.shard_list)<span class="sc">}</span><span class="ss"> shards"</span>)</span> 760<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a></span> 761<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a><span class="co"># Properties</span></span> 762<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(ds_dict.num_shards) <span class="co"># {'train': 10, 'test': 2}</span></span> 763<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(ds_dict.sample_type) <span class="co"># &lt;class 'MySample'&gt;</span></span> 764<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(ds_dict.streaming) <span class="co"># False</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 765</div> 766</section> 767<section id="explicit-data-files" class="level2"> 768<h2 class="anchored" data-anchor-id="explicit-data-files">Explicit Data Files</h2> 769<p>Override automatic detection with <code>data_files</code>:</p> 770<div id="452b998f" class="cell"> 771<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Single pattern</span></span> 772<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(</span> 773<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> <span class="st">"path/to/"</span>,</span> 774<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a> MySample,</span> 775<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a> data_files<span class="op">=</span><span class="st">"custom-*.tar"</span>,</span> 776<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a>)</span> 777<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a></span> 778<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a><span class="co"># List of patterns</span></span> 779<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(</span> 780<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a> <span class="st">"path/to/"</span>,</span> 781<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a> MySample,</span> 782<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a> data_files<span class="op">=</span>[<span class="st">"shard-000.tar"</span>, <span class="st">"shard-001.tar"</span>],</span> 783<span id="cb8-13"><a href="#cb8-13" aria-hidden="true" tabindex="-1"></a>)</span> 784<span id="cb8-14"><a href="#cb8-14" aria-hidden="true" tabindex="-1"></a></span> 785<span id="cb8-15"><a href="#cb8-15" aria-hidden="true" tabindex="-1"></a><span class="co"># Explicit split mapping</span></span> 786<span id="cb8-16"><a href="#cb8-16" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(</span> 787<span id="cb8-17"><a href="#cb8-17" aria-hidden="true" tabindex="-1"></a> <span class="st">"path/to/"</span>,</span> 788<span id="cb8-18"><a href="#cb8-18" aria-hidden="true" tabindex="-1"></a> MySample,</span> 789<span id="cb8-19"><a href="#cb8-19" aria-hidden="true" tabindex="-1"></a> data_files<span class="op">=</span>{</span> 790<span id="cb8-20"><a href="#cb8-20" aria-hidden="true" tabindex="-1"></a> <span class="st">"train"</span>: <span class="st">"training-shards-*.tar"</span>,</span> 791<span id="cb8-21"><a href="#cb8-21" aria-hidden="true" tabindex="-1"></a> <span class="st">"test"</span>: <span class="st">"eval-data.tar"</span>,</span> 792<span id="cb8-22"><a href="#cb8-22" aria-hidden="true" tabindex="-1"></a> },</span> 793<span id="cb8-23"><a href="#cb8-23" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 794</div> 795</section> 796<section id="streaming-mode" class="level2"> 797<h2 class="anchored" data-anchor-id="streaming-mode">Streaming Mode</h2> 798<p>The <code>streaming</code> parameter signals intent for streaming mode:</p> 799<div id="9de4892c" class="cell"> 800<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Mark as streaming</span></span> 801<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>ds_dict <span class="op">=</span> load_dataset(<span class="st">"path/to/data.tar"</span>, MySample, streaming<span class="op">=</span><span class="va">True</span>)</span> 802<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a></span> 803<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Check streaming status</span></span> 804<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> ds_dict.streaming:</span> 805<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="st">"Streaming mode"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 806</div> 807<div class="callout callout-style-default callout-tip callout-titled"> 808<div class="callout-header d-flex align-content-center"> 809<div class="callout-icon-container"> 810<i class="callout-icon"></i> 811</div> 812<div class="callout-title-container flex-fill"> 813Tip 814</div> 815</div> 816<div class="callout-body-container callout-body"> 817<p>atdata datasets are always lazy/streaming via WebDataset pipelines. This parameter primarily signals intent.</p> 818</div> 819</div> 820</section> 821<section id="auto-type-resolution" class="level2"> 822<h2 class="anchored" data-anchor-id="auto-type-resolution">Auto Type Resolution</h2> 823<p>When using index lookup, the sample type can be resolved automatically:</p> 824<div id="04d401c9" class="cell"> 825<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> LocalIndex</span> 826<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span> 827<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> LocalIndex()</span> 828<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a></span> 829<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a><span class="co"># No sample_type needed - resolved from schema</span></span> 830<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"@local/my-dataset"</span>, index<span class="op">=</span>index, split<span class="op">=</span><span class="st">"train"</span>)</span> 831<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a></span> 832<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Type is inferred from the stored schema</span></span> 833<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a>sample_type <span class="op">=</span> ds.sample_type</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 834</div> 835</section> 836<section id="error-handling" class="level2"> 837<h2 class="anchored" data-anchor-id="error-handling">Error Handling</h2> 838<div id="fdcf842d" class="cell"> 839<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="cf">try</span>:</span> 840<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a> ds <span class="op">=</span> load_dataset(<span class="st">"path/to/data.tar"</span>, MySample, split<span class="op">=</span><span class="st">"train"</span>)</span> 841<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="cf">except</span> <span class="pp">FileNotFoundError</span>:</span> 842<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="st">"No data files found"</span>)</span> 843<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a><span class="cf">except</span> <span class="pp">ValueError</span> <span class="im">as</span> e:</span> 844<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> <span class="st">"Split"</span> <span class="kw">in</span> <span class="bu">str</span>(e):</span> 845<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="st">"Requested split not found"</span>)</span> 846<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a> <span class="cf">else</span>:</span> 847<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"Invalid configuration: </span><span class="sc">{</span>e<span class="sc">}</span><span class="ss">"</span>)</span> 848<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a><span class="cf">except</span> <span class="pp">KeyError</span>:</span> 849<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="st">"Dataset not found in index"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 850</div> 851</section> 852<section id="complete-example" class="level2"> 853<h2 class="anchored" data-anchor-id="complete-example">Complete Example</h2> 854<div id="6d45dd17" class="cell"> 855<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 856<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 857<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 858<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata <span class="im">import</span> load_dataset</span> 859<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span> 860<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a></span> 861<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Define sample type</span></span> 862<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 863<span id="cb12-9"><a href="#cb12-9" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> ImageSample:</span> 864<span id="cb12-10"><a href="#cb12-10" aria-hidden="true" tabindex="-1"></a> image: NDArray</span> 865<span id="cb12-11"><a href="#cb12-11" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">str</span></span> 866<span id="cb12-12"><a href="#cb12-12" aria-hidden="true" tabindex="-1"></a></span> 867<span id="cb12-13"><a href="#cb12-13" aria-hidden="true" tabindex="-1"></a><span class="co"># 2. Create dataset files</span></span> 868<span id="cb12-14"><a href="#cb12-14" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> split <span class="kw">in</span> [<span class="st">"train"</span>, <span class="st">"test"</span>]:</span> 869<span id="cb12-15"><a href="#cb12-15" aria-hidden="true" tabindex="-1"></a> <span class="cf">with</span> wds.writer.TarWriter(<span class="ss">f"</span><span class="sc">{</span>split<span class="sc">}</span><span class="ss">-000.tar"</span>) <span class="im">as</span> sink:</span> 870<span id="cb12-16"><a href="#cb12-16" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">100</span>):</span> 871<span id="cb12-17"><a href="#cb12-17" aria-hidden="true" tabindex="-1"></a> sample <span class="op">=</span> ImageSample(</span> 872<span id="cb12-18"><a href="#cb12-18" aria-hidden="true" tabindex="-1"></a> image<span class="op">=</span>np.random.rand(<span class="dv">64</span>, <span class="dv">64</span>, <span class="dv">3</span>).astype(np.float32),</span> 873<span id="cb12-19"><a href="#cb12-19" aria-hidden="true" tabindex="-1"></a> label<span class="op">=</span><span class="ss">f"sample_</span><span class="sc">{</span>i<span class="sc">}</span><span class="ss">"</span>,</span> 874<span id="cb12-20"><a href="#cb12-20" aria-hidden="true" tabindex="-1"></a> )</span> 875<span id="cb12-21"><a href="#cb12-21" aria-hidden="true" tabindex="-1"></a> sink.write({<span class="op">**</span>sample.as_wds, <span class="st">"__key__"</span>: <span class="ss">f"</span><span class="sc">{</span>i<span class="sc">:06d}</span><span class="ss">"</span>})</span> 876<span id="cb12-22"><a href="#cb12-22" aria-hidden="true" tabindex="-1"></a></span> 877<span id="cb12-23"><a href="#cb12-23" aria-hidden="true" tabindex="-1"></a><span class="co"># 3. Load with split detection</span></span> 878<span id="cb12-24"><a href="#cb12-24" aria-hidden="true" tabindex="-1"></a>ds_dict <span class="op">=</span> load_dataset(<span class="st">"./"</span>, ImageSample)</span> 879<span id="cb12-25"><a href="#cb12-25" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(ds_dict.keys()) <span class="co"># dict_keys(['train', 'test'])</span></span> 880<span id="cb12-26"><a href="#cb12-26" aria-hidden="true" tabindex="-1"></a></span> 881<span id="cb12-27"><a href="#cb12-27" aria-hidden="true" tabindex="-1"></a><span class="co"># 4. Iterate</span></span> 882<span id="cb12-28"><a href="#cb12-28" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> ds_dict[<span class="st">"train"</span>].ordered(batch_size<span class="op">=</span><span class="dv">16</span>):</span> 883<span id="cb12-29"><a href="#cb12-29" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(batch.image.shape) <span class="co"># (16, 64, 64, 3)</span></span> 884<span id="cb12-30"><a href="#cb12-30" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(batch.label) <span class="co"># ['sample_0', 'sample_1', ...]</span></span> 885<span id="cb12-31"><a href="#cb12-31" aria-hidden="true" tabindex="-1"></a> <span class="cf">break</span></span> 886<span id="cb12-32"><a href="#cb12-32" aria-hidden="true" tabindex="-1"></a></span> 887<span id="cb12-33"><a href="#cb12-33" aria-hidden="true" tabindex="-1"></a><span class="co"># 5. Load specific split</span></span> 888<span id="cb12-34"><a href="#cb12-34" aria-hidden="true" tabindex="-1"></a>train_ds <span class="op">=</span> load_dataset(<span class="st">"./"</span>, ImageSample, split<span class="op">=</span><span class="st">"train"</span>)</span> 889<span id="cb12-35"><a href="#cb12-35" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> train_ds.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 890<span id="cb12-36"><a href="#cb12-36" aria-hidden="true" tabindex="-1"></a> process(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 891</div> 892</section> 893<section id="related" class="level2"> 894<h2 class="anchored" data-anchor-id="related">Related</h2> 895<ul> 896<li><a href="../reference/datasets.html">Datasets</a> - Dataset iteration and batching</li> 897<li><a href="../reference/packable-samples.html">Packable Samples</a> - Defining sample types</li> 898<li><a href="../reference/local-storage.html">Local Storage</a> - LocalIndex for index lookup</li> 899<li><a href="../reference/protocols.html">Protocols</a> - AbstractIndex interface</li> 900</ul> 901 902 903</section> 904 905</main> <!-- /main --> 906<script id="quarto-html-after-body" type="application/javascript"> 907 window.document.addEventListener("DOMContentLoaded", function (event) { 908 // Ensure there is a toggle, if there isn't float one in the top right 909 if (window.document.querySelector('.quarto-color-scheme-toggle') === null) { 910 const a = window.document.createElement('a'); 911 a.classList.add('top-right'); 912 a.classList.add('quarto-color-scheme-toggle'); 913 a.href = ""; 914 a.onclick = function() { try { window.quartoToggleColorScheme(); } catch {} return false; }; 915 const i = window.document.createElement("i"); 916 i.classList.add('bi'); 917 a.appendChild(i); 918 window.document.body.appendChild(a); 919 } 920 setColorSchemeToggle(hasAlternateSentinel()) 921 const icon = ""; 922 const anchorJS = new window.AnchorJS(); 923 anchorJS.options = { 924 placement: 'right', 925 icon: icon 926 }; 927 anchorJS.add('.anchored'); 928 const isCodeAnnotation = (el) => { 929 for (const clz of el.classList) { 930 if (clz.startsWith('code-annotation-')) { 931 return true; 932 } 933 } 934 return false; 935 } 936 const onCopySuccess = function(e) { 937 // button target 938 const button = e.trigger; 939 // don't keep focus 940 button.blur(); 941 // flash "checked" 942 button.classList.add('code-copy-button-checked'); 943 var currentTitle = button.getAttribute("title"); 944 button.setAttribute("title", "Copied!"); 945 let tooltip; 946 if (window.bootstrap) { 947 button.setAttribute("data-bs-toggle", "tooltip"); 948 button.setAttribute("data-bs-placement", "left"); 949 button.setAttribute("data-bs-title", "Copied!"); 950 tooltip = new bootstrap.Tooltip(button, 951 { trigger: "manual", 952 customClass: "code-copy-button-tooltip", 953 offset: [0, -8]}); 954 tooltip.show(); 955 } 956 setTimeout(function() { 957 if (tooltip) { 958 tooltip.hide(); 959 button.removeAttribute("data-bs-title"); 960 button.removeAttribute("data-bs-toggle"); 961 button.removeAttribute("data-bs-placement"); 962 } 963 button.setAttribute("title", currentTitle); 964 button.classList.remove('code-copy-button-checked'); 965 }, 1000); 966 // clear code selection 967 e.clearSelection(); 968 } 969 const getTextToCopy = function(trigger) { 970 const codeEl = trigger.previousElementSibling.cloneNode(true); 971 for (const childEl of codeEl.children) { 972 if (isCodeAnnotation(childEl)) { 973 childEl.remove(); 974 } 975 } 976 return codeEl.innerText; 977 } 978 const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', { 979 text: getTextToCopy 980 }); 981 clipboard.on('success', onCopySuccess); 982 if (window.document.getElementById('quarto-embedded-source-code-modal')) { 983 const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', { 984 text: getTextToCopy, 985 container: window.document.getElementById('quarto-embedded-source-code-modal') 986 }); 987 clipboardModal.on('success', onCopySuccess); 988 } 989 var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//); 990 var mailtoRegex = new RegExp(/^mailto:/); 991 var filterRegex = new RegExp("https:\/\/github\.com\/your-org\/atdata"); 992 var isInternal = (href) => { 993 return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href); 994 } 995 // Inspect non-navigation links and adorn them if external 996 var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)'); 997 for (var i=0; i<links.length; i++) { 998 const link = links[i]; 999 if (!isInternal(link.href)) { 1000 // undo the damage that might have been done by quarto-nav.js in the case of 1001 // links that we want to consider external 1002 if (link.dataset.originalHref !== undefined) { 1003 link.href = link.dataset.originalHref; 1004 } 1005 } 1006 } 1007 function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) { 1008 const config = { 1009 allowHTML: true, 1010 maxWidth: 500, 1011 delay: 100, 1012 arrow: false, 1013 appendTo: function(el) { 1014 return el.parentElement; 1015 }, 1016 interactive: true, 1017 interactiveBorder: 10, 1018 theme: 'quarto', 1019 placement: 'bottom-start', 1020 }; 1021 if (contentFn) { 1022 config.content = contentFn; 1023 } 1024 if (onTriggerFn) { 1025 config.onTrigger = onTriggerFn; 1026 } 1027 if (onUntriggerFn) { 1028 config.onUntrigger = onUntriggerFn; 1029 } 1030 window.tippy(el, config); 1031 } 1032 const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]'); 1033 for (var i=0; i<noterefs.length; i++) { 1034 const ref = noterefs[i]; 1035 tippyHover(ref, function() { 1036 // use id or data attribute instead here 1037 let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href'); 1038 try { href = new URL(href).hash; } catch {} 1039 const id = href.replace(/^#\/?/, ""); 1040 const note = window.document.getElementById(id); 1041 if (note) { 1042 return note.innerHTML; 1043 } else { 1044 return ""; 1045 } 1046 }); 1047 } 1048 const xrefs = window.document.querySelectorAll('a.quarto-xref'); 1049 const processXRef = (id, note) => { 1050 // Strip column container classes 1051 const stripColumnClz = (el) => { 1052 el.classList.remove("page-full", "page-columns"); 1053 if (el.children) { 1054 for (const child of el.children) { 1055 stripColumnClz(child); 1056 } 1057 } 1058 } 1059 stripColumnClz(note) 1060 if (id === null || id.startsWith('sec-')) { 1061 // Special case sections, only their first couple elements 1062 const container = document.createElement("div"); 1063 if (note.children && note.children.length > 2) { 1064 container.appendChild(note.children[0].cloneNode(true)); 1065 for (let i = 1; i < note.children.length; i++) { 1066 const child = note.children[i]; 1067 if (child.tagName === "P" && child.innerText === "") { 1068 continue; 1069 } else { 1070 container.appendChild(child.cloneNode(true)); 1071 break; 1072 } 1073 } 1074 if (window.Quarto?.typesetMath) { 1075 window.Quarto.typesetMath(container); 1076 } 1077 return container.innerHTML 1078 } else { 1079 if (window.Quarto?.typesetMath) { 1080 window.Quarto.typesetMath(note); 1081 } 1082 return note.innerHTML; 1083 } 1084 } else { 1085 // Remove any anchor links if they are present 1086 const anchorLink = note.querySelector('a.anchorjs-link'); 1087 if (anchorLink) { 1088 anchorLink.remove(); 1089 } 1090 if (window.Quarto?.typesetMath) { 1091 window.Quarto.typesetMath(note); 1092 } 1093 if (note.classList.contains("callout")) { 1094 return note.outerHTML; 1095 } else { 1096 return note.innerHTML; 1097 } 1098 } 1099 } 1100 for (var i=0; i<xrefs.length; i++) { 1101 const xref = xrefs[i]; 1102 tippyHover(xref, undefined, function(instance) { 1103 instance.disable(); 1104 let url = xref.getAttribute('href'); 1105 let hash = undefined; 1106 if (url.startsWith('#')) { 1107 hash = url; 1108 } else { 1109 try { hash = new URL(url).hash; } catch {} 1110 } 1111 if (hash) { 1112 const id = hash.replace(/^#\/?/, ""); 1113 const note = window.document.getElementById(id); 1114 if (note !== null) { 1115 try { 1116 const html = processXRef(id, note.cloneNode(true)); 1117 instance.setContent(html); 1118 } finally { 1119 instance.enable(); 1120 instance.show(); 1121 } 1122 } else { 1123 // See if we can fetch this 1124 fetch(url.split('#')[0]) 1125 .then(res => res.text()) 1126 .then(html => { 1127 const parser = new DOMParser(); 1128 const htmlDoc = parser.parseFromString(html, "text/html"); 1129 const note = htmlDoc.getElementById(id); 1130 if (note !== null) { 1131 const html = processXRef(id, note); 1132 instance.setContent(html); 1133 } 1134 }).finally(() => { 1135 instance.enable(); 1136 instance.show(); 1137 }); 1138 } 1139 } else { 1140 // See if we can fetch a full url (with no hash to target) 1141 // This is a special case and we should probably do some content thinning / targeting 1142 fetch(url) 1143 .then(res => res.text()) 1144 .then(html => { 1145 const parser = new DOMParser(); 1146 const htmlDoc = parser.parseFromString(html, "text/html"); 1147 const note = htmlDoc.querySelector('main.content'); 1148 if (note !== null) { 1149 // This should only happen for chapter cross references 1150 // (since there is no id in the URL) 1151 // remove the first header 1152 if (note.children.length > 0 && note.children[0].tagName === "HEADER") { 1153 note.children[0].remove(); 1154 } 1155 const html = processXRef(null, note); 1156 instance.setContent(html); 1157 } 1158 }).finally(() => { 1159 instance.enable(); 1160 instance.show(); 1161 }); 1162 } 1163 }, function(instance) { 1164 }); 1165 } 1166 let selectedAnnoteEl; 1167 const selectorForAnnotation = ( cell, annotation) => { 1168 let cellAttr = 'data-code-cell="' + cell + '"'; 1169 let lineAttr = 'data-code-annotation="' + annotation + '"'; 1170 const selector = 'span[' + cellAttr + '][' + lineAttr + ']'; 1171 return selector; 1172 } 1173 const selectCodeLines = (annoteEl) => { 1174 const doc = window.document; 1175 const targetCell = annoteEl.getAttribute("data-target-cell"); 1176 const targetAnnotation = annoteEl.getAttribute("data-target-annotation"); 1177 const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation)); 1178 const lines = annoteSpan.getAttribute("data-code-lines").split(","); 1179 const lineIds = lines.map((line) => { 1180 return targetCell + "-" + line; 1181 }) 1182 let top = null; 1183 let height = null; 1184 let parent = null; 1185 if (lineIds.length > 0) { 1186 //compute the position of the single el (top and bottom and make a div) 1187 const el = window.document.getElementById(lineIds[0]); 1188 top = el.offsetTop; 1189 height = el.offsetHeight; 1190 parent = el.parentElement.parentElement; 1191 if (lineIds.length > 1) { 1192 const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]); 1193 const bottom = lastEl.offsetTop + lastEl.offsetHeight; 1194 height = bottom - top; 1195 } 1196 if (top !== null && height !== null && parent !== null) { 1197 // cook up a div (if necessary) and position it 1198 let div = window.document.getElementById("code-annotation-line-highlight"); 1199 if (div === null) { 1200 div = window.document.createElement("div"); 1201 div.setAttribute("id", "code-annotation-line-highlight"); 1202 div.style.position = 'absolute'; 1203 parent.appendChild(div); 1204 } 1205 div.style.top = top - 2 + "px"; 1206 div.style.height = height + 4 + "px"; 1207 div.style.left = 0; 1208 let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter"); 1209 if (gutterDiv === null) { 1210 gutterDiv = window.document.createElement("div"); 1211 gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter"); 1212 gutterDiv.style.position = 'absolute'; 1213 const codeCell = window.document.getElementById(targetCell); 1214 const gutter = codeCell.querySelector('.code-annotation-gutter'); 1215 gutter.appendChild(gutterDiv); 1216 } 1217 gutterDiv.style.top = top - 2 + "px"; 1218 gutterDiv.style.height = height + 4 + "px"; 1219 } 1220 selectedAnnoteEl = annoteEl; 1221 } 1222 }; 1223 const unselectCodeLines = () => { 1224 const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"]; 1225 elementsIds.forEach((elId) => { 1226 const div = window.document.getElementById(elId); 1227 if (div) { 1228 div.remove(); 1229 } 1230 }); 1231 selectedAnnoteEl = undefined; 1232 }; 1233 // Handle positioning of the toggle 1234 window.addEventListener( 1235 "resize", 1236 throttle(() => { 1237 elRect = undefined; 1238 if (selectedAnnoteEl) { 1239 selectCodeLines(selectedAnnoteEl); 1240 } 1241 }, 10) 1242 ); 1243 function throttle(fn, ms) { 1244 let throttle = false; 1245 let timer; 1246 return (...args) => { 1247 if(!throttle) { // first call gets through 1248 fn.apply(this, args); 1249 throttle = true; 1250 } else { // all the others get throttled 1251 if(timer) clearTimeout(timer); // cancel #2 1252 timer = setTimeout(() => { 1253 fn.apply(this, args); 1254 timer = throttle = false; 1255 }, ms); 1256 } 1257 }; 1258 } 1259 // Attach click handler to the DT 1260 const annoteDls = window.document.querySelectorAll('dt[data-target-cell]'); 1261 for (const annoteDlNode of annoteDls) { 1262 annoteDlNode.addEventListener('click', (event) => { 1263 const clickedEl = event.target; 1264 if (clickedEl !== selectedAnnoteEl) { 1265 unselectCodeLines(); 1266 const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active'); 1267 if (activeEl) { 1268 activeEl.classList.remove('code-annotation-active'); 1269 } 1270 selectCodeLines(clickedEl); 1271 clickedEl.classList.add('code-annotation-active'); 1272 } else { 1273 // Unselect the line 1274 unselectCodeLines(); 1275 clickedEl.classList.remove('code-annotation-active'); 1276 } 1277 }); 1278 } 1279 const findCites = (el) => { 1280 const parentEl = el.parentElement; 1281 if (parentEl) { 1282 const cites = parentEl.dataset.cites; 1283 if (cites) { 1284 return { 1285 el, 1286 cites: cites.split(' ') 1287 }; 1288 } else { 1289 return findCites(el.parentElement) 1290 } 1291 } else { 1292 return undefined; 1293 } 1294 }; 1295 var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]'); 1296 for (var i=0; i<bibliorefs.length; i++) { 1297 const ref = bibliorefs[i]; 1298 const citeInfo = findCites(ref); 1299 if (citeInfo) { 1300 tippyHover(citeInfo.el, function() { 1301 var popup = window.document.createElement('div'); 1302 citeInfo.cites.forEach(function(cite) { 1303 var citeDiv = window.document.createElement('div'); 1304 citeDiv.classList.add('hanging-indent'); 1305 citeDiv.classList.add('csl-entry'); 1306 var biblioDiv = window.document.getElementById('ref-' + cite); 1307 if (biblioDiv) { 1308 citeDiv.innerHTML = biblioDiv.innerHTML; 1309 } 1310 popup.appendChild(citeDiv); 1311 }); 1312 return popup.innerHTML; 1313 }); 1314 } 1315 } 1316 }); 1317 </script> 1318</div> <!-- /content --> 1319<footer class="footer"> 1320 <div class="nav-footer"> 1321 <div class="nav-footer-left"> 1322<p>Built with <a href="https://quarto.org/">Quarto</a></p> 1323</div> 1324 <div class="nav-footer-center"> 1325 &nbsp; 1326 <div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/your-org/atdata/edit/main/reference/load-dataset.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/your-org/atdata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></div> 1327 <div class="nav-footer-right"> 1328<p>MIT License</p> 1329</div> 1330 </div> 1331</footer> 1332 1333 1334 1335 1336</body></html>