A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactor: simplify docs CSS by deferring to Quarto's built-in theming

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+135 -368
+1
CHANGELOG.md
··· 12 12 - Fix dark theme styling for callouts and code blocks in Quarto docs (#229) 13 13 14 14 ### Changed 15 + - Fix dark mode styling issues in Quarto docs (#230) 15 16 - Set up Quarto documentation site with tutorials, reference docs, and API reference (#224) 16 17 - Add shared sample type definitions to conftest.py (#219) 17 18 - Add blob operation tests for DatasetLoader and DatasetPublisher (#220)
+4 -121
docs/assets/styles.css
··· 4 4 LIGHT THEME STYLES 5 5 =========================================== */ 6 6 7 - /* Code block styling */ 8 - .sourceCode { 9 - background-color: #f8f9fa; 10 - border-radius: 4px; 11 - } 12 - 13 - /* Callout styling */ 14 - .callout { 15 - margin: 1rem 0; 16 - } 7 + /* Note: Quarto handles code block and callout styling. 8 + Only add custom component styles here. */ 17 9 18 10 /* API reference styling */ 19 11 .api-signature { ··· 89 81 90 82 /* =========================================== 91 83 DARK THEME STYLES 92 - Quarto uses [data-bs-theme="dark"] on html element 84 + Only for custom components - let Quarto handle 85 + standard elements (callouts, code blocks, tables) 93 86 =========================================== */ 94 87 95 - /* Code blocks in dark mode */ 96 - [data-bs-theme="dark"] .sourceCode { 97 - background-color: #1e1e1e; 98 - border: 1px solid #444; 99 - } 100 - 101 - [data-bs-theme="dark"] .sourceCode pre { 102 - background-color: #1e1e1e; 103 - color: #d4d4d4; 104 - } 105 - 106 - [data-bs-theme="dark"] pre > code.sourceCode { 107 - background-color: transparent; 108 - } 109 - 110 - [data-bs-theme="dark"] code { 111 - background-color: #2d2d2d; 112 - color: #e0e0e0; 113 - } 114 - 115 - /* Inline code in dark mode */ 116 - [data-bs-theme="dark"] p code, 117 - [data-bs-theme="dark"] li code { 118 - background-color: #3d3d3d; 119 - color: #e0e0e0; 120 - padding: 0.125rem 0.25rem; 121 - border-radius: 3px; 122 - } 123 - 124 88 /* API reference styling in dark mode */ 125 89 [data-bs-theme="dark"] .api-signature { 126 90 background-color: #2d2d2d; ··· 137 101 [data-bs-theme="dark"] .hero .tagline { 138 102 color: #adb5bd; 139 103 } 140 - 141 - /* Callout boxes in dark mode */ 142 - [data-bs-theme="dark"] .callout { 143 - border-left-width: 4px; 144 - } 145 - 146 - [data-bs-theme="dark"] .callout-note { 147 - background-color: rgba(13, 110, 253, 0.15); 148 - border-color: #0d6efd; 149 - } 150 - 151 - [data-bs-theme="dark"] .callout-tip { 152 - background-color: rgba(25, 135, 84, 0.15); 153 - border-color: #198754; 154 - } 155 - 156 - [data-bs-theme="dark"] .callout-warning { 157 - background-color: rgba(255, 193, 7, 0.15); 158 - border-color: #ffc107; 159 - } 160 - 161 - [data-bs-theme="dark"] .callout-caution { 162 - background-color: rgba(253, 126, 20, 0.15); 163 - border-color: #fd7e14; 164 - } 165 - 166 - [data-bs-theme="dark"] .callout-important { 167 - background-color: rgba(220, 53, 69, 0.15); 168 - border-color: #dc3545; 169 - } 170 - 171 - [data-bs-theme="dark"] .callout .callout-title-container { 172 - color: inherit; 173 - } 174 - 175 - [data-bs-theme="dark"] .callout-body-container { 176 - color: #e0e0e0; 177 - } 178 - 179 - /* Table styling in dark mode */ 180 - [data-bs-theme="dark"] table { 181 - color: #e0e0e0; 182 - } 183 - 184 - [data-bs-theme="dark"] table th { 185 - background-color: #2d2d2d; 186 - border-color: #444; 187 - } 188 - 189 - [data-bs-theme="dark"] table td { 190 - border-color: #444; 191 - } 192 - 193 - [data-bs-theme="dark"] table tr:nth-child(even) { 194 - background-color: rgba(255, 255, 255, 0.05); 195 - } 196 - 197 - /* Panel tabset in dark mode */ 198 - [data-bs-theme="dark"] .panel-tabset .nav-link { 199 - color: #adb5bd; 200 - } 201 - 202 - [data-bs-theme="dark"] .panel-tabset .nav-link.active { 203 - color: #fff; 204 - background-color: #375a7f; 205 - border-color: #375a7f; 206 - } 207 - 208 - /* Sidebar in dark mode */ 209 - [data-bs-theme="dark"] .sidebar { 210 - background-color: #222; 211 - } 212 - 213 - /* Links in dark mode for better visibility */ 214 - [data-bs-theme="dark"] a { 215 - color: #6ea8fe; 216 - } 217 - 218 - [data-bs-theme="dark"] a:hover { 219 - color: #9ec5fe; 220 - }
+6 -6
docs/index.html
··· 606 606 <h2 class="anchored" data-anchor-id="quick-example">Quick Example</h2> 607 607 <section id="define-a-sample-type" class="level3"> 608 608 <h3 class="anchored" data-anchor-id="define-a-sample-type">Define a Sample Type</h3> 609 - <div id="e063b0bb" class="cell"> 609 + <div id="8279c33c" class="cell"> 610 610 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 611 611 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 612 612 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> ··· 620 620 </section> 621 621 <section id="create-and-write-samples" class="level3"> 622 622 <h3 class="anchored" data-anchor-id="create-and-write-samples">Create and Write Samples</h3> 623 - <div id="9cc9dba2" class="cell"> 623 + <div id="9329acc3" class="cell"> 624 624 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span> 625 625 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span> 626 626 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>samples <span class="op">=</span> [</span> ··· 639 639 </section> 640 640 <section id="load-and-iterate" class="level3"> 641 641 <h3 class="anchored" data-anchor-id="load-and-iterate">Load and Iterate</h3> 642 - <div id="1eddfee0" class="cell"> 642 + <div id="ed72875c" class="cell"> 643 643 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"data-000000.tar"</span>)</span> 644 644 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a></span> 645 645 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Iterate with batching</span></span> ··· 652 652 </section> 653 653 <section id="huggingface-style-loading" class="level2"> 654 654 <h2 class="anchored" data-anchor-id="huggingface-style-loading">HuggingFace-Style Loading</h2> 655 - <div id="e139b196" class="cell"> 655 + <div id="383ba489" class="cell"> 656 656 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Load from local path</span></span> 657 657 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> atdata.load_dataset(<span class="st">"path/to/data-{000000..000009}.tar"</span>, split<span class="op">=</span><span class="st">"train"</span>)</span> 658 658 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span> ··· 664 664 </section> 665 665 <section id="local-storage-with-redis-s3" class="level2"> 666 666 <h2 class="anchored" data-anchor-id="local-storage-with-redis-s3">Local Storage with Redis + S3</h2> 667 - <div id="0db5c641" class="cell"> 667 + <div id="d54e2012" class="cell"> 668 668 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> LocalIndex, Repo</span> 669 669 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a></span> 670 670 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Set up local index</span></span> ··· 684 684 </section> 685 685 <section id="publish-to-atproto-federation" class="level2"> 686 686 <h2 class="anchored" data-anchor-id="publish-to-atproto-federation">Publish to ATProto Federation</h2> 687 - <div id="13e3e185" class="cell"> 687 + <div id="4d2dd73e" class="cell"> 688 688 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient</span> 689 689 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.promote <span class="im">import</span> promote_to_atmosphere</span> 690 690 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a></span>
+14 -14
docs/reference/atmosphere.html
··· 592 592 <section id="atmosphereclient" class="level2"> 593 593 <h2 class="anchored" data-anchor-id="atmosphereclient">AtmosphereClient</h2> 594 594 <p>The client handles authentication and record operations:</p> 595 - <div id="2f742bd5" class="cell"> 595 + <div id="7d23f054" class="cell"> 596 596 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient</span> 597 597 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a></span> 598 598 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span> ··· 619 619 <section id="session-management" class="level3"> 620 620 <h3 class="anchored" data-anchor-id="session-management">Session Management</h3> 621 621 <p>Save and restore sessions to avoid re-authentication:</p> 622 - <div id="9fabe18c" class="cell"> 622 + <div id="deed6819" class="cell"> 623 623 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Export session for later</span></span> 624 624 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>session_string <span class="op">=</span> client.export_session()</span> 625 625 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span> ··· 631 631 <section id="custom-pds" class="level3"> 632 632 <h3 class="anchored" data-anchor-id="custom-pds">Custom PDS</h3> 633 633 <p>Connect to a custom PDS instead of bsky.social:</p> 634 - <div id="1fdb7a0d" class="cell"> 634 + <div id="e7d9ddc4" class="cell"> 635 635 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient(base_url<span class="op">=</span><span class="st">"https://pds.example.com"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 636 636 </div> 637 637 </section> ··· 639 639 <section id="atmosphereindex" class="level2"> 640 640 <h2 class="anchored" data-anchor-id="atmosphereindex">AtmosphereIndex</h2> 641 641 <p>The unified interface for ATProto operations, implementing the AbstractIndex protocol:</p> 642 - <div id="14883141" class="cell"> 642 + <div id="7cf0087c" class="cell"> 643 643 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient, AtmosphereIndex</span> 644 644 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span> 645 645 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span> ··· 649 649 </div> 650 650 <section id="publishing-schemas" class="level3"> 651 651 <h3 class="anchored" data-anchor-id="publishing-schemas">Publishing Schemas</h3> 652 - <div id="6bd8de1d" class="cell"> 652 + <div id="a5b09f02" class="cell"> 653 653 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 654 654 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 655 655 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a></span> ··· 670 670 </section> 671 671 <section id="publishing-datasets" class="level3"> 672 672 <h3 class="anchored" data-anchor-id="publishing-datasets">Publishing Datasets</h3> 673 - <div id="70a41b1e" class="cell"> 673 + <div id="5dfc1b7a" class="cell"> 674 674 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"data-{000000..000009}.tar"</span>)</span> 675 675 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a></span> 676 676 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(</span> ··· 688 688 </section> 689 689 <section id="listing-and-retrieving" class="level3"> 690 690 <h3 class="anchored" data-anchor-id="listing-and-retrieving">Listing and Retrieving</h3> 691 - <div id="6e038865" class="cell"> 691 + <div id="69588050" class="cell"> 692 692 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># List your datasets</span></span> 693 693 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> entry <span class="kw">in</span> index.list_datasets():</span> 694 694 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>entry<span class="sc">.</span>name<span class="sc">}</span><span class="ss">: </span><span class="sc">{</span>entry<span class="sc">.</span>schema_ref<span class="sc">}</span><span class="ss">"</span>)</span> ··· 714 714 <p>For more control, use the individual publisher classes:</p> 715 715 <section id="schemapublisher" class="level3"> 716 716 <h3 class="anchored" data-anchor-id="schemapublisher">SchemaPublisher</h3> 717 - <div id="ae2a3e09" class="cell"> 717 + <div id="f50450f1" class="cell"> 718 718 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> SchemaPublisher</span> 719 719 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span> 720 720 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>publisher <span class="op">=</span> SchemaPublisher(client)</span> ··· 730 730 </section> 731 731 <section id="datasetpublisher" class="level3"> 732 732 <h3 class="anchored" data-anchor-id="datasetpublisher">DatasetPublisher</h3> 733 - <div id="5f9de3a2" class="cell"> 733 + <div id="486054b3" class="cell"> 734 734 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> DatasetPublisher</span> 735 735 <span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span> 736 736 <span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>publisher <span class="op">=</span> DatasetPublisher(client)</span> ··· 748 748 <section id="blob-storage" class="level4"> 749 749 <h4 class="anchored" data-anchor-id="blob-storage">Blob Storage</h4> 750 750 <p>For smaller datasets (up to ~50MB per shard), you can store data directly in ATProto blobs instead of external URLs:</p> 751 - <div id="674753a3" class="cell"> 751 + <div id="b5bba684" class="cell"> 752 752 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> io</span> 753 753 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span> 754 754 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a></span> ··· 768 768 <span id="cb11-17"><a href="#cb11-17" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 769 769 </div> 770 770 <p>To load datasets with blob storage:</p> 771 - <div id="8a514550" class="cell"> 771 + <div id="d978c45f" class="cell"> 772 772 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> DatasetLoader</span> 773 773 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a></span> 774 774 <span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>loader <span class="op">=</span> DatasetLoader(client)</span> ··· 789 789 </section> 790 790 <section id="lenspublisher" class="level3"> 791 791 <h3 class="anchored" data-anchor-id="lenspublisher">LensPublisher</h3> 792 - <div id="83685215" class="cell"> 792 + <div id="91dc6d27" class="cell"> 793 793 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> LensPublisher</span> 794 794 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a></span> 795 795 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>publisher <span class="op">=</span> LensPublisher(client)</span> ··· 830 830 <section id="at-uris" class="level2"> 831 831 <h2 class="anchored" data-anchor-id="at-uris">AT URIs</h2> 832 832 <p>ATProto records are identified by AT URIs:</p> 833 - <div id="ce70a3d3" class="cell"> 833 + <div id="098b761d" class="cell"> 834 834 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtUri</span> 835 835 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a></span> 836 836 <span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Parse an AT URI</span></span> ··· 896 896 </section> 897 897 <section id="complete-example" class="level2"> 898 898 <h2 class="anchored" data-anchor-id="complete-example">Complete Example</h2> 899 - <div id="eaf1716d" class="cell"> 899 + <div id="19a9561b" class="cell"> 900 900 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 901 901 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 902 902 <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span>
+9 -9
docs/reference/datasets.html
··· 570 570 <p>The <code>Dataset</code> class provides typed iteration over WebDataset tar files with automatic batching and lens transformations.</p> 571 571 <section id="creating-a-dataset" class="level2"> 572 572 <h2 class="anchored" data-anchor-id="creating-a-dataset">Creating a Dataset</h2> 573 - <div id="73a578a0" class="cell"> 573 + <div id="f8ffdbd3" class="cell"> 574 574 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 575 575 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 576 576 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a></span> ··· 592 592 <section id="ordered-iteration" class="level3"> 593 593 <h3 class="anchored" data-anchor-id="ordered-iteration">Ordered Iteration</h3> 594 594 <p>Iterate through samples in their original order:</p> 595 - <div id="f57f13b0" class="cell"> 595 + <div id="dcc57e86" class="cell"> 596 596 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># With batching (default batch_size=1)</span></span> 597 597 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.ordered(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 598 598 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a> images <span class="op">=</span> batch.image <span class="co"># numpy array (32, H, W, C)</span></span> ··· 606 606 <section id="shuffled-iteration" class="level3"> 607 607 <h3 class="anchored" data-anchor-id="shuffled-iteration">Shuffled Iteration</h3> 608 608 <p>Iterate with randomized order at both shard and sample levels:</p> 609 - <div id="b90f38a0" class="cell"> 609 + <div id="96efde4e" class="cell"> 610 610 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.shuffled(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 611 611 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a> <span class="co"># Samples are shuffled</span></span> 612 612 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> process(batch)</span> ··· 637 637 <section id="samplebatch" class="level2"> 638 638 <h2 class="anchored" data-anchor-id="samplebatch">SampleBatch</h2> 639 639 <p>When iterating with a <code>batch_size</code>, each iteration yields a <code>SampleBatch</code> with automatic attribute aggregation.</p> 640 - <div id="0bbf6c70" class="cell"> 640 + <div id="3443ec5f" class="cell"> 641 641 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 642 642 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> Sample:</span> 643 643 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> features: NDArray <span class="co"># shape (256,)</span></span> ··· 657 657 <section id="type-transformations-with-lenses" class="level2"> 658 658 <h2 class="anchored" data-anchor-id="type-transformations-with-lenses">Type Transformations with Lenses</h2> 659 659 <p>View a dataset through a different sample type using registered lenses:</p> 660 - <div id="c50ad78c" class="cell"> 660 + <div id="f4d88332" class="cell"> 661 661 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 662 662 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> SimplifiedSample:</span> 663 663 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> label: <span class="bu">str</span></span> ··· 679 679 <section id="shard-list" class="level3"> 680 680 <h3 class="anchored" data-anchor-id="shard-list">Shard List</h3> 681 681 <p>Get the list of individual tar files:</p> 682 - <div id="405031bf" class="cell"> 682 + <div id="4ed4cc09" class="cell"> 683 683 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[Sample](<span class="st">"data-{000000..000009}.tar"</span>)</span> 684 684 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>shards <span class="op">=</span> dataset.shard_list</span> 685 685 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="co"># ['data-000000.tar', 'data-000001.tar', ..., 'data-000009.tar']</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> ··· 688 688 <section id="metadata" class="level3"> 689 689 <h3 class="anchored" data-anchor-id="metadata">Metadata</h3> 690 690 <p>Datasets can have associated metadata from a URL:</p> 691 - <div id="8e36e299" class="cell"> 691 + <div id="ef3e8cf7" class="cell"> 692 692 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[Sample](</span> 693 693 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> <span class="st">"data-{000000..000009}.tar"</span>,</span> 694 694 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> metadata_url<span class="op">=</span><span class="st">"https://example.com/metadata.msgpack"</span></span> ··· 702 702 <section id="writing-datasets" class="level2"> 703 703 <h2 class="anchored" data-anchor-id="writing-datasets">Writing Datasets</h2> 704 704 <p>Use WebDataset’s <code>TarWriter</code> or <code>ShardWriter</code> to create datasets:</p> 705 - <div id="99f7c9be" class="cell"> 705 + <div id="6fd03e7f" class="cell"> 706 706 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span> 707 707 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 708 708 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a></span> ··· 725 725 <section id="parquet-export" class="level2"> 726 726 <h2 class="anchored" data-anchor-id="parquet-export">Parquet Export</h2> 727 727 <p>Export dataset contents to parquet format:</p> 728 - <div id="62da55c2" class="cell"> 728 + <div id="266ef53f" class="cell"> 729 729 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Export entire dataset</span></span> 730 730 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>dataset.to_parquet(<span class="st">"output.parquet"</span>)</span> 731 731 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a></span>
+10 -10
docs/reference/lenses.html
··· 572 572 <section id="creating-a-lens" class="level2"> 573 573 <h2 class="anchored" data-anchor-id="creating-a-lens">Creating a Lens</h2> 574 574 <p>Use the <code>@lens</code> decorator to define a getter:</p> 575 - <div id="a93bf139" class="cell"> 575 + <div id="b95a4374" class="cell"> 576 576 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 577 577 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 578 578 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a></span> ··· 602 602 <section id="adding-a-putter" class="level2"> 603 603 <h2 class="anchored" data-anchor-id="adding-a-putter">Adding a Putter</h2> 604 604 <p>To enable bidirectional updates, add a putter:</p> 605 - <div id="58d17b0b" class="cell"> 605 + <div id="699f80c3" class="cell"> 606 606 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="at">@simplify.putter</span></span> 607 607 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> simplify_put(view: SimpleSample, source: FullSample) <span class="op">-&gt;</span> FullSample:</span> 608 608 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> FullSample(</span> ··· 622 622 <section id="using-lenses-with-datasets" class="level2"> 623 623 <h2 class="anchored" data-anchor-id="using-lenses-with-datasets">Using Lenses with Datasets</h2> 624 624 <p>Lenses integrate with <code>Dataset.as_type()</code>:</p> 625 - <div id="0e717120" class="cell"> 625 + <div id="8ab7a7e7" class="cell"> 626 626 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[FullSample](<span class="st">"data-{000000..000009}.tar"</span>)</span> 627 627 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span> 628 628 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="co"># View through a different type</span></span> ··· 637 637 <section id="direct-lens-usage" class="level2"> 638 638 <h2 class="anchored" data-anchor-id="direct-lens-usage">Direct Lens Usage</h2> 639 639 <p>Lenses can also be called directly:</p> 640 - <div id="2a37dc30" class="cell"> 640 + <div id="8326e4a6" class="cell"> 641 641 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 642 642 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a></span> 643 643 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>full <span class="op">=</span> FullSample(</span> ··· 666 666 <div class="tab-content"> 667 667 <div id="tabset-1-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-1-1-tab"> 668 668 <p>If you get a view and immediately put it back, the source is unchanged:</p> 669 - <div id="cd1abb79" class="cell"> 669 + <div id="7e189882" class="cell"> 670 670 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>view <span class="op">=</span> lens.get(source)</span> 671 671 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="cf">assert</span> lens.put(view, source) <span class="op">==</span> source</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 672 672 </div> 673 673 </div> 674 674 <div id="tabset-1-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-1-2-tab"> 675 675 <p>If you put a view, getting it back yields that view:</p> 676 - <div id="2375a951" class="cell"> 676 + <div id="140c85c9" class="cell"> 677 677 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>updated <span class="op">=</span> lens.put(view, source)</span> 678 678 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="cf">assert</span> lens.get(updated) <span class="op">==</span> view</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 679 679 </div> 680 680 </div> 681 681 <div id="tabset-1-3" class="tab-pane" role="tabpanel" aria-labelledby="tabset-1-3-tab"> 682 682 <p>Putting twice is equivalent to putting once with the final value:</p> 683 - <div id="c9b64fa8" class="cell"> 683 + <div id="0a794716" class="cell"> 684 684 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>result1 <span class="op">=</span> lens.put(v2, lens.put(v1, source))</span> 685 685 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>result2 <span class="op">=</span> lens.put(v2, source)</span> 686 686 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="cf">assert</span> result1 <span class="op">==</span> result2</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> ··· 692 692 <section id="trivial-putter" class="level2"> 693 693 <h2 class="anchored" data-anchor-id="trivial-putter">Trivial Putter</h2> 694 694 <p>If no putter is defined, a trivial putter is used that ignores view updates:</p> 695 - <div id="aa3bab9a" class="cell"> 695 + <div id="fc2a1c1e" class="cell"> 696 696 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.lens</span></span> 697 697 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> extract_label(src: FullSample) <span class="op">-&gt;</span> SimpleSample:</span> 698 698 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> SimpleSample(label<span class="op">=</span>src.label, confidence<span class="op">=</span>src.confidence)</span> ··· 706 706 <section id="lensnetwork-registry" class="level2"> 707 707 <h2 class="anchored" data-anchor-id="lensnetwork-registry">LensNetwork Registry</h2> 708 708 <p>The <code>LensNetwork</code> is a singleton that stores all registered lenses:</p> 709 - <div id="439ccf8f" class="cell"> 709 + <div id="a4afa8ee" class="cell"> 710 710 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.lens <span class="im">import</span> LensNetwork</span> 711 711 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span> 712 712 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>network <span class="op">=</span> LensNetwork()</span> ··· 723 723 </section> 724 724 <section id="example-feature-extraction" class="level2"> 725 725 <h2 class="anchored" data-anchor-id="example-feature-extraction">Example: Feature Extraction</h2> 726 - <div id="276d0c18" class="cell"> 726 + <div id="a32ec875" class="cell"> 727 727 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 728 728 <span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> RawSample:</span> 729 729 <span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a> audio: NDArray</span>
+12 -12
docs/reference/load-dataset.html
··· 581 581 </section> 582 582 <section id="basic-usage" class="level2"> 583 583 <h2 class="anchored" data-anchor-id="basic-usage">Basic Usage</h2> 584 - <div id="b9ab01b9" class="cell"> 584 + <div id="5ecb3749" class="cell"> 585 585 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 586 586 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata <span class="im">import</span> load_dataset</span> 587 587 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> ··· 604 604 <h2 class="anchored" data-anchor-id="path-formats">Path Formats</h2> 605 605 <section id="webdataset-brace-notation" class="level3"> 606 606 <h3 class="anchored" data-anchor-id="webdataset-brace-notation">WebDataset Brace Notation</h3> 607 - <div id="cbae6e05" class="cell"> 607 + <div id="ece59496" class="cell"> 608 608 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Range notation</span></span> 609 609 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"data-{000000..000099}.tar"</span>, MySample, split<span class="op">=</span><span class="st">"train"</span>)</span> 610 610 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span> ··· 614 614 </section> 615 615 <section id="glob-patterns" class="level3"> 616 616 <h3 class="anchored" data-anchor-id="glob-patterns">Glob Patterns</h3> 617 - <div id="055f0b83" class="cell"> 617 + <div id="b71ecdd0" class="cell"> 618 618 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Match all tar files</span></span> 619 619 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"path/to/*.tar"</span>, MySample)</span> 620 620 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span> ··· 624 624 </section> 625 625 <section id="local-directory" class="level3"> 626 626 <h3 class="anchored" data-anchor-id="local-directory">Local Directory</h3> 627 - <div id="03623bc5" class="cell"> 627 + <div id="daf1f3e7" class="cell"> 628 628 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Scans for .tar files</span></span> 629 629 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"./my-dataset/"</span>, MySample)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 630 630 </div> 631 631 </section> 632 632 <section id="remote-urls" class="level3"> 633 633 <h3 class="anchored" data-anchor-id="remote-urls">Remote URLs</h3> 634 - <div id="7755a058" class="cell"> 634 + <div id="f166ef50" class="cell"> 635 635 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># S3</span></span> 636 636 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(<span class="st">"s3://bucket/data-{000..099}.tar"</span>, MySample, split<span class="op">=</span><span class="st">"train"</span>)</span> 637 637 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span> ··· 644 644 </section> 645 645 <section id="index-lookup" class="level3"> 646 646 <h3 class="anchored" data-anchor-id="index-lookup">Index Lookup</h3> 647 - <div id="8ebc6301" class="cell"> 647 + <div id="9b7e84a9" class="cell"> 648 648 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> LocalIndex</span> 649 649 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a></span> 650 650 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> LocalIndex()</span> ··· 711 711 <section id="datasetdict" class="level2"> 712 712 <h2 class="anchored" data-anchor-id="datasetdict">DatasetDict</h2> 713 713 <p>When loading without <code>split=</code>, returns a <code>DatasetDict</code>:</p> 714 - <div id="a68bdf35" class="cell"> 714 + <div id="db8bf134" class="cell"> 715 715 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>ds_dict <span class="op">=</span> load_dataset(<span class="st">"path/to/data/"</span>, MySample)</span> 716 716 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a></span> 717 717 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Access splits</span></span> ··· 731 731 <section id="explicit-data-files" class="level2"> 732 732 <h2 class="anchored" data-anchor-id="explicit-data-files">Explicit Data Files</h2> 733 733 <p>Override automatic detection with <code>data_files</code>:</p> 734 - <div id="ef8c069a" class="cell"> 734 + <div id="a8ace4a8" class="cell"> 735 735 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Single pattern</span></span> 736 736 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_dataset(</span> 737 737 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> <span class="st">"path/to/"</span>,</span> ··· 760 760 <section id="streaming-mode" class="level2"> 761 761 <h2 class="anchored" data-anchor-id="streaming-mode">Streaming Mode</h2> 762 762 <p>The <code>streaming</code> parameter signals intent for streaming mode:</p> 763 - <div id="417b2092" class="cell"> 763 + <div id="b4adfe80" class="cell"> 764 764 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Mark as streaming</span></span> 765 765 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>ds_dict <span class="op">=</span> load_dataset(<span class="st">"path/to/data.tar"</span>, MySample, streaming<span class="op">=</span><span class="va">True</span>)</span> 766 766 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a></span> ··· 785 785 <section id="auto-type-resolution" class="level2"> 786 786 <h2 class="anchored" data-anchor-id="auto-type-resolution">Auto Type Resolution</h2> 787 787 <p>When using index lookup, the sample type can be resolved automatically:</p> 788 - <div id="c8d1bb96" class="cell"> 788 + <div id="ef1853c2" class="cell"> 789 789 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> LocalIndex</span> 790 790 <span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span> 791 791 <span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>index <span class="op">=</span> LocalIndex()</span> ··· 799 799 </section> 800 800 <section id="error-handling" class="level2"> 801 801 <h2 class="anchored" data-anchor-id="error-handling">Error Handling</h2> 802 - <div id="eb3d77c0" class="cell"> 802 + <div id="c7b85275" class="cell"> 803 803 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="cf">try</span>:</span> 804 804 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a> ds <span class="op">=</span> load_dataset(<span class="st">"path/to/data.tar"</span>, MySample, split<span class="op">=</span><span class="st">"train"</span>)</span> 805 805 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="cf">except</span> <span class="pp">FileNotFoundError</span>:</span> ··· 815 815 </section> 816 816 <section id="complete-example" class="level2"> 817 817 <h2 class="anchored" data-anchor-id="complete-example">Complete Example</h2> 818 - <div id="704fa1a4" class="cell"> 818 + <div id="164b4e80" class="cell"> 819 819 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 820 820 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 821 821 <span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span>
+10 -10
docs/reference/local-storage.html
··· 580 580 <section id="localindex" class="level2"> 581 581 <h2 class="anchored" data-anchor-id="localindex">LocalIndex</h2> 582 582 <p>The index tracks datasets in Redis:</p> 583 - <div id="a44d510b" class="cell"> 583 + <div id="c3cdad09" class="cell"> 584 584 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> LocalIndex</span> 585 585 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a></span> 586 586 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Default connection (localhost:6379)</span></span> ··· 596 596 </div> 597 597 <section id="adding-entries" class="level3"> 598 598 <h3 class="anchored" data-anchor-id="adding-entries">Adding Entries</h3> 599 - <div id="4bf31bbd" class="cell"> 599 + <div id="9307047e" class="cell"> 600 600 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 601 601 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 602 602 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span> ··· 621 621 </section> 622 622 <section id="listing-and-retrieving" class="level3"> 623 623 <h3 class="anchored" data-anchor-id="listing-and-retrieving">Listing and Retrieving</h3> 624 - <div id="e5ce8553" class="cell"> 624 + <div id="37f9c823" class="cell"> 625 625 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Iterate all entries</span></span> 626 626 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> entry <span class="kw">in</span> index.entries:</span> 627 627 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>entry<span class="sc">.</span>name<span class="sc">}</span><span class="ss">: </span><span class="sc">{</span>entry<span class="sc">.</span>cid<span class="sc">}</span><span class="ss">"</span>)</span> ··· 640 640 <section id="repo" class="level2"> 641 641 <h2 class="anchored" data-anchor-id="repo">Repo</h2> 642 642 <p>The Repo class combines S3 storage with Redis indexing:</p> 643 - <div id="c67e2c85" class="cell"> 643 + <div id="effb5f5b" class="cell"> 644 644 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> Repo</span> 645 645 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a></span> 646 646 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="co"># From credentials file</span></span> ··· 681 681 </section> 682 682 <section id="inserting-datasets" class="level3"> 683 683 <h3 class="anchored" data-anchor-id="inserting-datasets">Inserting Datasets</h3> 684 - <div id="d18044ce" class="cell"> 684 + <div id="42d10df9" class="cell"> 685 685 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span> 686 686 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 687 687 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a></span> ··· 711 711 </section> 712 712 <section id="insert-options" class="level3"> 713 713 <h3 class="anchored" data-anchor-id="insert-options">Insert Options</h3> 714 - <div id="9ade4142" class="cell"> 714 + <div id="18256c08" class="cell"> 715 715 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>entry, ds <span class="op">=</span> repo.insert(</span> 716 716 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> dataset,</span> 717 717 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> name<span class="op">=</span><span class="st">"my-dataset"</span>,</span> ··· 725 725 <section id="localdatasetentry" class="level2"> 726 726 <h2 class="anchored" data-anchor-id="localdatasetentry">LocalDatasetEntry</h2> 727 727 <p>Index entries provide content-addressable identification:</p> 728 - <div id="c546d783" class="cell"> 728 + <div id="c8e1e3e3" class="cell"> 729 729 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.get_entry_by_name(<span class="st">"my-dataset"</span>)</span> 730 730 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span> 731 731 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Core properties (IndexEntry protocol)</span></span> ··· 758 758 <section id="schema-storage" class="level2"> 759 759 <h2 class="anchored" data-anchor-id="schema-storage">Schema Storage</h2> 760 760 <p>Schemas can be stored and retrieved from the index:</p> 761 - <div id="59c2dea2" class="cell"> 761 + <div id="b161ccd5" class="cell"> 762 762 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Publish a schema</span></span> 763 763 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>schema_ref <span class="op">=</span> index.publish_schema(</span> 764 764 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> ImageSample,</span> ··· 789 789 <section id="s3datastore" class="level2"> 790 790 <h2 class="anchored" data-anchor-id="s3datastore">S3DataStore</h2> 791 791 <p>For direct S3 operations without Redis indexing:</p> 792 - <div id="9dcfccbe" class="cell"> 792 + <div id="6f124574" class="cell"> 793 793 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> S3DataStore</span> 794 794 <span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span> 795 795 <span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> S3DataStore(</span> ··· 811 811 </section> 812 812 <section id="complete-workflow-example" class="level2"> 813 813 <h2 class="anchored" data-anchor-id="complete-workflow-example">Complete Workflow Example</h2> 814 - <div id="7550e9ba" class="cell"> 814 + <div id="8de41bbe" class="cell"> 815 815 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 816 816 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 817 817 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span>
+12 -12
docs/reference/packable-samples.html
··· 575 575 <section id="the-packable-decorator" class="level2"> 576 576 <h2 class="anchored" data-anchor-id="the-packable-decorator">The <code>@packable</code> Decorator</h2> 577 577 <p>The recommended way to define a sample type is with the <code>@packable</code> decorator:</p> 578 - <div id="4ec9de2a" class="cell"> 578 + <div id="c88d75b7" class="cell"> 579 579 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 580 580 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 581 581 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> ··· 597 597 <h2 class="anchored" data-anchor-id="supported-field-types">Supported Field Types</h2> 598 598 <section id="primitives" class="level3"> 599 599 <h3 class="anchored" data-anchor-id="primitives">Primitives</h3> 600 - <div id="43fb84ca" class="cell"> 600 + <div id="29da51cc" class="cell"> 601 601 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 602 602 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> PrimitiveSample:</span> 603 603 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a> name: <span class="bu">str</span></span> ··· 610 610 <section id="numpy-arrays" class="level3"> 611 611 <h3 class="anchored" data-anchor-id="numpy-arrays">NumPy Arrays</h3> 612 612 <p>Fields annotated as <code>NDArray</code> are automatically converted:</p> 613 - <div id="8a207225" class="cell"> 613 + <div id="9a7dc8af" class="cell"> 614 614 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 615 615 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> ArraySample:</span> 616 616 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> features: NDArray <span class="co"># Required array</span></span> ··· 632 632 </section> 633 633 <section id="lists" class="level3"> 634 634 <h3 class="anchored" data-anchor-id="lists">Lists</h3> 635 - <div id="75b1f812" class="cell"> 635 + <div id="21da72f0" class="cell"> 636 636 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 637 637 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> ListSample:</span> 638 638 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> tags: <span class="bu">list</span>[<span class="bu">str</span>]</span> ··· 644 644 <h2 class="anchored" data-anchor-id="serialization">Serialization</h2> 645 645 <section id="packing-to-bytes" class="level3"> 646 646 <h3 class="anchored" data-anchor-id="packing-to-bytes">Packing to Bytes</h3> 647 - <div id="2465c665" class="cell"> 647 + <div id="63d34389" class="cell"> 648 648 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>sample <span class="op">=</span> ImageSample(</span> 649 649 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a> image<span class="op">=</span>np.random.rand(<span class="dv">224</span>, <span class="dv">224</span>, <span class="dv">3</span>).astype(np.float32),</span> 650 650 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> label<span class="op">=</span><span class="st">"cat"</span>,</span> ··· 658 658 </section> 659 659 <section id="unpacking-from-bytes" class="level3"> 660 660 <h3 class="anchored" data-anchor-id="unpacking-from-bytes">Unpacking from Bytes</h3> 661 - <div id="f3666c1c" class="cell"> 661 + <div id="1279f832" class="cell"> 662 662 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Deserialize from bytes</span></span> 663 663 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>restored <span class="op">=</span> ImageSample.from_bytes(packed_bytes)</span> 664 664 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a></span> ··· 670 670 <section id="webdataset-format" class="level3"> 671 671 <h3 class="anchored" data-anchor-id="webdataset-format">WebDataset Format</h3> 672 672 <p>The <code>as_wds</code> property returns a dict ready for WebDataset:</p> 673 - <div id="8e99a94a" class="cell"> 673 + <div id="567c26d3" class="cell"> 674 674 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>wds_dict <span class="op">=</span> sample.as_wds</span> 675 675 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="co"># {'__key__': '1234...', 'msgpack': b'...'}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 676 676 </div> 677 677 <p>Write samples to a tar file:</p> 678 - <div id="cca39d38" class="cell"> 678 + <div id="83d0f77f" class="cell"> 679 679 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span> 680 680 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span> 681 681 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> wds.writer.TarWriter(<span class="st">"data-000000.tar"</span>) <span class="im">as</span> sink:</span> ··· 688 688 <section id="direct-inheritance-alternative" class="level2"> 689 689 <h2 class="anchored" data-anchor-id="direct-inheritance-alternative">Direct Inheritance (Alternative)</h2> 690 690 <p>You can also inherit directly from <code>PackableSample</code>:</p> 691 - <div id="e77faded" class="cell"> 691 + <div id="4292f29d" class="cell"> 692 692 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> dataclasses <span class="im">import</span> dataclass</span> 693 693 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span> 694 694 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="at">@dataclass</span></span> ··· 726 726 <section id="the-_ensure_good-method" class="level3"> 727 727 <h3 class="anchored" data-anchor-id="the-_ensure_good-method">The <code>_ensure_good()</code> Method</h3> 728 728 <p>This method runs automatically after construction and handles NDArray conversion:</p> 729 - <div id="f96932dd" class="cell"> 729 + <div id="39755703" class="cell"> 730 730 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> _ensure_good(<span class="va">self</span>):</span> 731 731 <span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> field <span class="kw">in</span> dataclasses.fields(<span class="va">self</span>):</span> 732 732 <span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> _is_possibly_ndarray_type(field.<span class="bu">type</span>):</span> ··· 742 742 <ul class="nav nav-tabs" role="tablist"><li class="nav-item" role="presentation"><a class="nav-link active" id="tabset-2-1-tab" data-bs-toggle="tab" data-bs-target="#tabset-2-1" role="tab" aria-controls="tabset-2-1" aria-selected="true">Do</a></li><li class="nav-item" role="presentation"><a class="nav-link" id="tabset-2-2-tab" data-bs-toggle="tab" data-bs-target="#tabset-2-2" role="tab" aria-controls="tabset-2-2" aria-selected="false">Don’t</a></li></ul> 743 743 <div class="tab-content"> 744 744 <div id="tabset-2-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-2-1-tab"> 745 - <div id="5013430d" class="cell"> 745 + <div id="97d6bedd" class="cell"> 746 746 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 747 747 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> GoodSample:</span> 748 748 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a> features: NDArray <span class="co"># Clear type annotation</span></span> ··· 752 752 </div> 753 753 </div> 754 754 <div id="tabset-2-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-2-2-tab"> 755 - <div id="22f126e2" class="cell"> 755 + <div id="2e153167" class="cell"> 756 756 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 757 757 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> BadSample:</span> 758 758 <span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a> <span class="co"># DON'T: Nested dataclasses not supported</span></span>
+7 -7
docs/reference/promotion.html
··· 571 571 </section> 572 572 <section id="basic-usage" class="level2"> 573 573 <h2 class="anchored" data-anchor-id="basic-usage">Basic Usage</h2> 574 - <div id="159e75b7" class="cell"> 574 + <div id="291321dd" class="cell"> 575 575 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> LocalIndex</span> 576 576 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient</span> 577 577 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.promote <span class="im">import</span> promote_to_atmosphere</span> ··· 591 591 </section> 592 592 <section id="with-metadata" class="level2"> 593 593 <h2 class="anchored" data-anchor-id="with-metadata">With Metadata</h2> 594 - <div id="8e958d05" class="cell"> 594 + <div id="54574176" class="cell"> 595 595 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>at_uri <span class="op">=</span> promote_to_atmosphere(</span> 596 596 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a> entry,</span> 597 597 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a> local_index,</span> ··· 606 606 <section id="schema-deduplication" class="level2"> 607 607 <h2 class="anchored" data-anchor-id="schema-deduplication">Schema Deduplication</h2> 608 608 <p>The promotion workflow automatically checks for existing schemas:</p> 609 - <div id="9a02170b" class="cell"> 609 + <div id="bcff1b7d" class="cell"> 610 610 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># First promotion: publishes schema</span></span> 611 611 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>uri1 <span class="op">=</span> promote_to_atmosphere(entry1, local_index, client)</span> 612 612 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span> ··· 626 626 <div class="tab-content"> 627 627 <div id="tabset-1-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-1-1-tab"> 628 628 <p>By default, promotion keeps the original data URLs:</p> 629 - <div id="d77ff7b7" class="cell"> 629 + <div id="9a401da2" class="cell"> 630 630 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Data stays in original S3 location</span></span> 631 631 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>at_uri <span class="op">=</span> promote_to_atmosphere(entry, local_index, client)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 632 632 </div> ··· 639 639 </div> 640 640 <div id="tabset-1-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-1-2-tab"> 641 641 <p>To copy data to a different storage location:</p> 642 - <div id="3482956b" class="cell"> 642 + <div id="8be7529d" class="cell"> 643 643 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> S3DataStore</span> 644 644 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span> 645 645 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Create new data store</span></span> ··· 667 667 </section> 668 668 <section id="complete-workflow-example" class="level2"> 669 669 <h2 class="anchored" data-anchor-id="complete-workflow-example">Complete Workflow Example</h2> 670 - <div id="601f04bf" class="cell"> 670 + <div id="e8f9c9af" class="cell"> 671 671 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 672 672 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 673 673 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> ··· 739 739 </section> 740 740 <section id="error-handling" class="level2"> 741 741 <h2 class="anchored" data-anchor-id="error-handling">Error Handling</h2> 742 - <div id="71ff8cf6" class="cell"> 742 + <div id="df70fed6" class="cell"> 743 743 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="cf">try</span>:</span> 744 744 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> at_uri <span class="op">=</span> promote_to_atmosphere(entry, local_index, client)</span> 745 745 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="cf">except</span> <span class="pp">KeyError</span> <span class="im">as</span> e:</span>
+9 -9
docs/reference/protocols.html
··· 585 585 <section id="indexentry-protocol" class="level2"> 586 586 <h2 class="anchored" data-anchor-id="indexentry-protocol">IndexEntry Protocol</h2> 587 587 <p>Represents a dataset entry in any index:</p> 588 - <div id="0082e78a" class="cell"> 588 + <div id="44601668" class="cell"> 589 589 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata._protocols <span class="im">import</span> IndexEntry</span> 590 590 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a></span> 591 591 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> process_entry(entry: IndexEntry) <span class="op">-&gt;</span> <span class="va">None</span>:</span> ··· 639 639 <section id="abstractindex-protocol" class="level2"> 640 640 <h2 class="anchored" data-anchor-id="abstractindex-protocol">AbstractIndex Protocol</h2> 641 641 <p>Defines operations for managing schemas and datasets:</p> 642 - <div id="0e584ac4" class="cell"> 642 + <div id="34a50018" class="cell"> 643 643 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata._protocols <span class="im">import</span> AbstractIndex</span> 644 644 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a></span> 645 645 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> list_all_datasets(index: AbstractIndex) <span class="op">-&gt;</span> <span class="va">None</span>:</span> ··· 649 649 </div> 650 650 <section id="dataset-operations" class="level3"> 651 651 <h3 class="anchored" data-anchor-id="dataset-operations">Dataset Operations</h3> 652 - <div id="7a04b9f8" class="cell"> 652 + <div id="b575008c" class="cell"> 653 653 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Insert a dataset</span></span> 654 654 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> index.insert_dataset(</span> 655 655 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> dataset,</span> ··· 667 667 </section> 668 668 <section id="schema-operations" class="level3"> 669 669 <h3 class="anchored" data-anchor-id="schema-operations">Schema Operations</h3> 670 - <div id="9b946101" class="cell"> 670 + <div id="0940d143" class="cell"> 671 671 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Publish a schema</span></span> 672 672 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>schema_ref <span class="op">=</span> index.publish_schema(</span> 673 673 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> MySample,</span> ··· 698 698 <section id="abstractdatastore-protocol" class="level2"> 699 699 <h2 class="anchored" data-anchor-id="abstractdatastore-protocol">AbstractDataStore Protocol</h2> 700 700 <p>Abstracts over different storage backends:</p> 701 - <div id="e31f5db3" class="cell"> 701 + <div id="0d7d1588" class="cell"> 702 702 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata._protocols <span class="im">import</span> AbstractDataStore</span> 703 703 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span> 704 704 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> write_dataset(store: AbstractDataStore, dataset) <span class="op">-&gt;</span> <span class="bu">list</span>[<span class="bu">str</span>]:</span> ··· 708 708 </div> 709 709 <section id="methods" class="level3"> 710 710 <h3 class="anchored" data-anchor-id="methods">Methods</h3> 711 - <div id="b632f55a" class="cell"> 711 + <div id="70ce1ae2" class="cell"> 712 712 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Write dataset shards</span></span> 713 713 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>urls <span class="op">=</span> store.write_shards(</span> 714 714 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> dataset,</span> ··· 735 735 <section id="using-protocols-for-polymorphism" class="level2"> 736 736 <h2 class="anchored" data-anchor-id="using-protocols-for-polymorphism">Using Protocols for Polymorphism</h2> 737 737 <p>Write code that works with any backend:</p> 738 - <div id="75c51b75" class="cell"> 738 + <div id="b9de70df" class="cell"> 739 739 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata._protocols <span class="im">import</span> AbstractIndex, IndexEntry</span> 740 740 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata <span class="im">import</span> Dataset</span> 741 741 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a></span> ··· 793 793 <section id="type-checking" class="level2"> 794 794 <h2 class="anchored" data-anchor-id="type-checking">Type Checking</h2> 795 795 <p>Protocols are runtime-checkable:</p> 796 - <div id="72a77f87" class="cell"> 796 + <div id="0c54ebab" class="cell"> 797 797 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata._protocols <span class="im">import</span> IndexEntry, AbstractIndex</span> 798 798 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span> 799 799 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Check if object implements protocol</span></span> ··· 807 807 </section> 808 808 <section id="complete-example" class="level2"> 809 809 <h2 class="anchored" data-anchor-id="complete-example">Complete Example</h2> 810 - <div id="2e0393f6" class="cell"> 810 + <div id="a4ff8035" class="cell"> 811 811 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> 812 812 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> LocalIndex, S3DataStore</span> 813 813 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereClient, AtmosphereIndex</span>
+12 -12
docs/tutorials/atmosphere.html
··· 590 590 </section> 591 591 <section id="setup" class="level2"> 592 592 <h2 class="anchored" data-anchor-id="setup">Setup</h2> 593 - <div id="79efb574" class="cell"> 593 + <div id="0221f646" class="cell"> 594 594 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 595 595 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 596 596 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> ··· 607 607 </section> 608 608 <section id="define-sample-types" class="level2"> 609 609 <h2 class="anchored" data-anchor-id="define-sample-types">Define Sample Types</h2> 610 - <div id="3c88df23" class="cell"> 610 + <div id="f2056c97" class="cell"> 611 611 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 612 612 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> ImageSample:</span> 613 613 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a> <span class="co">"""A sample containing image data with metadata."""</span></span> ··· 626 626 <section id="type-introspection" class="level2"> 627 627 <h2 class="anchored" data-anchor-id="type-introspection">Type Introspection</h2> 628 628 <p>See what information is available from a PackableSample type:</p> 629 - <div id="2685cf9c" class="cell"> 629 + <div id="477ec7a7" class="cell"> 630 630 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> dataclasses <span class="im">import</span> fields, is_dataclass</span> 631 631 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span> 632 632 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Sample type: </span><span class="sc">{</span>ImageSample<span class="sc">.</span><span class="va">__name__</span><span class="sc">}</span><span class="ss">"</span>)</span> ··· 654 654 <section id="at-uri-parsing" class="level2"> 655 655 <h2 class="anchored" data-anchor-id="at-uri-parsing">AT URI Parsing</h2> 656 656 <p>ATProto records are identified by AT URIs:</p> 657 - <div id="5dee8ce2" class="cell"> 657 + <div id="83f3b662" class="cell"> 658 658 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>uris <span class="op">=</span> [</span> 659 659 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="st">"at://did:plc:abc123/ac.foundation.dataset.sampleSchema/xyz789"</span>,</span> 660 660 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> <span class="st">"at://alice.bsky.social/ac.foundation.dataset.record/my-dataset"</span>,</span> ··· 671 671 <section id="authentication" class="level2"> 672 672 <h2 class="anchored" data-anchor-id="authentication">Authentication</h2> 673 673 <p>Connect to ATProto:</p> 674 - <div id="673177ef" class="cell"> 674 + <div id="021ff685" class="cell"> 675 675 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span> 676 676 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"your.handle.social"</span>, <span class="st">"your-app-password"</span>)</span> 677 677 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span> ··· 681 681 </section> 682 682 <section id="publish-a-schema" class="level2"> 683 683 <h2 class="anchored" data-anchor-id="publish-a-schema">Publish a Schema</h2> 684 - <div id="477fc95a" class="cell"> 684 + <div id="d732fb90" class="cell"> 685 685 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>schema_publisher <span class="op">=</span> SchemaPublisher(client)</span> 686 686 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>schema_uri <span class="op">=</span> schema_publisher.publish(</span> 687 687 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> ImageSample,</span> ··· 694 694 </section> 695 695 <section id="list-your-schemas" class="level2"> 696 696 <h2 class="anchored" data-anchor-id="list-your-schemas">List Your Schemas</h2> 697 - <div id="fd23691c" class="cell"> 697 + <div id="aa61f989" class="cell"> 698 698 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>schema_loader <span class="op">=</span> SchemaLoader(client)</span> 699 699 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>schemas <span class="op">=</span> schema_loader.list_all(limit<span class="op">=</span><span class="dv">10</span>)</span> 700 700 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Found </span><span class="sc">{</span><span class="bu">len</span>(schemas)<span class="sc">}</span><span class="ss"> schema(s)"</span>)</span> ··· 707 707 <h2 class="anchored" data-anchor-id="publish-a-dataset">Publish a Dataset</h2> 708 708 <section id="with-external-urls" class="level3"> 709 709 <h3 class="anchored" data-anchor-id="with-external-urls">With External URLs</h3> 710 - <div id="48a2d850" class="cell"> 710 + <div id="7b9bd2d8" class="cell"> 711 711 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>dataset_publisher <span class="op">=</span> DatasetPublisher(client)</span> 712 712 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>dataset_uri <span class="op">=</span> dataset_publisher.publish_with_urls(</span> 713 713 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> urls<span class="op">=</span>[<span class="st">"s3://example-bucket/demo-data-{000000..000009}.tar"</span>],</span> ··· 723 723 <section id="with-blob-storage" class="level3"> 724 724 <h3 class="anchored" data-anchor-id="with-blob-storage">With Blob Storage</h3> 725 725 <p>For smaller datasets, store data directly in ATProto blobs:</p> 726 - <div id="847dc9a0" class="cell"> 726 + <div id="582dc515" class="cell"> 727 727 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> io</span> 728 728 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span> 729 729 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> ··· 764 764 </section> 765 765 <section id="list-and-load-datasets" class="level2"> 766 766 <h2 class="anchored" data-anchor-id="list-and-load-datasets">List and Load Datasets</h2> 767 - <div id="1232ab56" class="cell"> 767 + <div id="920c10fd" class="cell"> 768 768 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>dataset_loader <span class="op">=</span> DatasetLoader(client)</span> 769 769 <span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>datasets <span class="op">=</span> dataset_loader.list_all(limit<span class="op">=</span><span class="dv">10</span>)</span> 770 770 <span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Found </span><span class="sc">{</span><span class="bu">len</span>(datasets)<span class="sc">}</span><span class="ss"> dataset(s)"</span>)</span> ··· 779 779 </section> 780 780 <section id="load-a-dataset" class="level2"> 781 781 <h2 class="anchored" data-anchor-id="load-a-dataset">Load a Dataset</h2> 782 - <div id="1302d75a" class="cell"> 782 + <div id="3bd82e88" class="cell"> 783 783 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Check storage type</span></span> 784 784 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>storage_type <span class="op">=</span> dataset_loader.get_storage_type(<span class="bu">str</span>(blob_dataset_uri))</span> 785 785 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Storage type: </span><span class="sc">{</span>storage_type<span class="sc">}</span><span class="ss">"</span>)</span> ··· 796 796 </section> 797 797 <section id="complete-publishing-workflow" class="level2"> 798 798 <h2 class="anchored" data-anchor-id="complete-publishing-workflow">Complete Publishing Workflow</h2> 799 - <div id="fc25d258" class="cell"> 799 + <div id="83e0ad9c" class="cell"> 800 800 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Define and create samples</span></span> 801 801 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 802 802 <span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> FeatureSample:</span>
+8 -8
docs/tutorials/local-workflow.html
··· 586 586 </section> 587 587 <section id="setup" class="level2"> 588 588 <h2 class="anchored" data-anchor-id="setup">Setup</h2> 589 - <div id="dd0b9482" class="cell"> 589 + <div id="a5ef36ed" class="cell"> 590 590 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 591 591 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 592 592 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> ··· 596 596 </section> 597 597 <section id="define-sample-types" class="level2"> 598 598 <h2 class="anchored" data-anchor-id="define-sample-types">Define Sample Types</h2> 599 - <div id="3f29999a" class="cell"> 599 + <div id="ab2f69ac" class="cell"> 600 600 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 601 601 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> TrainingSample:</span> 602 602 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> <span class="co">"""A sample containing features and label for training."""</span></span> ··· 613 613 <section id="localdatasetentry" class="level2"> 614 614 <h2 class="anchored" data-anchor-id="localdatasetentry">LocalDatasetEntry</h2> 615 615 <p>Create entries with content-addressable CIDs:</p> 616 - <div id="5294c5f2" class="cell"> 616 + <div id="7039b362" class="cell"> 617 617 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create an entry manually</span></span> 618 618 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>entry <span class="op">=</span> LocalDatasetEntry(</span> 619 619 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> _name<span class="op">=</span><span class="st">"my-dataset"</span>,</span> ··· 645 645 <section id="localindex" class="level2"> 646 646 <h2 class="anchored" data-anchor-id="localindex">LocalIndex</h2> 647 647 <p>The index tracks datasets in Redis:</p> 648 - <div id="358c728b" class="cell"> 648 + <div id="09bcb72f" class="cell"> 649 649 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> redis <span class="im">import</span> Redis</span> 650 650 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span> 651 651 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Connect to Redis</span></span> ··· 656 656 </div> 657 657 <section id="schema-management" class="level3"> 658 658 <h3 class="anchored" data-anchor-id="schema-management">Schema Management</h3> 659 - <div id="caa01ec8" class="cell"> 659 + <div id="58787171" class="cell"> 660 660 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Publish a schema</span></span> 661 661 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>schema_ref <span class="op">=</span> index.publish_schema(TrainingSample, version<span class="op">=</span><span class="st">"1.0.0"</span>)</span> 662 662 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Published schema: </span><span class="sc">{</span>schema_ref<span class="sc">}</span><span class="ss">"</span>)</span> ··· 678 678 <section id="s3datastore" class="level2"> 679 679 <h2 class="anchored" data-anchor-id="s3datastore">S3DataStore</h2> 680 680 <p>For direct S3 operations:</p> 681 - <div id="58702d2d" class="cell"> 681 + <div id="660c7258" class="cell"> 682 682 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>creds <span class="op">=</span> {</span> 683 683 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> <span class="st">"AWS_ENDPOINT"</span>: <span class="st">"http://localhost:9000"</span>,</span> 684 684 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> <span class="st">"AWS_ACCESS_KEY_ID"</span>: <span class="st">"minioadmin"</span>,</span> ··· 694 694 <section id="complete-repo-workflow" class="level2"> 695 695 <h2 class="anchored" data-anchor-id="complete-repo-workflow">Complete Repo Workflow</h2> 696 696 <p>The Repo class combines S3 storage with Redis indexing:</p> 697 - <div id="05705ab0" class="cell"> 697 + <div id="69add154" class="cell"> 698 698 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Create sample data</span></span> 699 699 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>samples <span class="op">=</span> [</span> 700 700 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> TrainingSample(</span> ··· 740 740 <section id="using-load_dataset-with-index" class="level2"> 741 741 <h2 class="anchored" data-anchor-id="using-load_dataset-with-index">Using load_dataset with Index</h2> 742 742 <p>The <code>load_dataset()</code> function supports index lookup:</p> 743 - <div id="d70ffd7c" class="cell"> 743 + <div id="6803ce77" class="cell"> 744 744 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata <span class="im">import</span> load_dataset</span> 745 745 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span> 746 746 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Load from local index</span></span>
+11 -11
docs/tutorials/promotion.html
··· 580 580 </section> 581 581 <section id="setup" class="level2"> 582 582 <h2 class="anchored" data-anchor-id="setup">Setup</h2> 583 - <div id="315d6801" class="cell"> 583 + <div id="e5ca780d" class="cell"> 584 584 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 585 585 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 586 586 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> ··· 593 593 <section id="prepare-a-local-dataset" class="level2"> 594 594 <h2 class="anchored" data-anchor-id="prepare-a-local-dataset">Prepare a Local Dataset</h2> 595 595 <p>First, set up a dataset in local storage:</p> 596 - <div id="96942ceb" class="cell"> 596 + <div id="ff1e9d10" class="cell"> 597 597 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Define sample type</span></span> 598 598 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 599 599 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> ExperimentSample:</span> ··· 642 642 <section id="basic-promotion" class="level2"> 643 643 <h2 class="anchored" data-anchor-id="basic-promotion">Basic Promotion</h2> 644 644 <p>Promote the dataset to ATProto:</p> 645 - <div id="8986dcf0" class="cell"> 645 + <div id="ea693841" class="cell"> 646 646 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Connect to atmosphere</span></span> 647 647 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> AtmosphereClient()</span> 648 648 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>client.login(<span class="st">"myhandle.bsky.social"</span>, <span class="st">"app-password"</span>)</span> ··· 655 655 <section id="promotion-with-metadata" class="level2"> 656 656 <h2 class="anchored" data-anchor-id="promotion-with-metadata">Promotion with Metadata</h2> 657 657 <p>Add description, tags, and license:</p> 658 - <div id="d444d286" class="cell"> 658 + <div id="80f3b4d7" class="cell"> 659 659 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>at_uri <span class="op">=</span> promote_to_atmosphere(</span> 660 660 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a> local_entry,</span> 661 661 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> local_index,</span> ··· 671 671 <section id="schema-deduplication" class="level2"> 672 672 <h2 class="anchored" data-anchor-id="schema-deduplication">Schema Deduplication</h2> 673 673 <p>The promotion workflow automatically checks for existing schemas:</p> 674 - <div id="1b905a07" class="cell"> 674 + <div id="0d2d042e" class="cell"> 675 675 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.promote <span class="im">import</span> _find_existing_schema</span> 676 676 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a></span> 677 677 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Check if schema already exists</span></span> ··· 683 683 <span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="st">"No existing schema found, will publish new one"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 684 684 </div> 685 685 <p>When you promote multiple datasets with the same sample type:</p> 686 - <div id="ca3f1b25" class="cell"> 686 + <div id="1687332f" class="cell"> 687 687 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># First promotion: publishes schema</span></span> 688 688 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>uri1 <span class="op">=</span> promote_to_atmosphere(entry1, local_index, client)</span> 689 689 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a></span> ··· 698 698 <div class="tab-content"> 699 699 <div id="tabset-1-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-1-1-tab"> 700 700 <p>By default, promotion keeps the original data URLs:</p> 701 - <div id="11ca1702" class="cell"> 701 + <div id="5ecc58cf" class="cell"> 702 702 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Data stays in original S3 location</span></span> 703 703 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>at_uri <span class="op">=</span> promote_to_atmosphere(local_entry, local_index, client)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> 704 704 </div> ··· 711 711 </div> 712 712 <div id="tabset-1-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-1-2-tab"> 713 713 <p>To copy data to a different storage location:</p> 714 - <div id="cd32fb5d" class="cell"> 714 + <div id="e0e2ec76" class="cell"> 715 715 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.local <span class="im">import</span> S3DataStore</span> 716 716 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span> 717 717 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Create new data store</span></span> ··· 741 741 <section id="verify-on-atmosphere" class="level2"> 742 742 <h2 class="anchored" data-anchor-id="verify-on-atmosphere">Verify on Atmosphere</h2> 743 743 <p>After promotion, verify the dataset is accessible:</p> 744 - <div id="3224902e" class="cell"> 744 + <div id="7fc91306" class="cell"> 745 745 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> atdata.atmosphere <span class="im">import</span> AtmosphereIndex</span> 746 746 <span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span> 747 747 <span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>atm_index <span class="op">=</span> AtmosphereIndex(client)</span> ··· 762 762 </section> 763 763 <section id="error-handling" class="level2"> 764 764 <h2 class="anchored" data-anchor-id="error-handling">Error Handling</h2> 765 - <div id="99396631" class="cell"> 765 + <div id="22a884f6" class="cell"> 766 766 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="cf">try</span>:</span> 767 767 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a> at_uri <span class="op">=</span> promote_to_atmosphere(local_entry, local_index, client)</span> 768 768 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="cf">except</span> <span class="pp">KeyError</span> <span class="im">as</span> e:</span> ··· 786 786 </section> 787 787 <section id="complete-workflow" class="level2"> 788 788 <h2 class="anchored" data-anchor-id="complete-workflow">Complete Workflow</h2> 789 - <div id="ff0aa5d0" class="cell"> 789 + <div id="0d2e92c1" class="cell"> 790 790 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Complete local-to-atmosphere workflow</span></span> 791 791 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 792 792 <span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span>
+6 -6
docs/tutorials/quickstart.html
··· 569 569 <section id="define-a-sample-type" class="level2"> 570 570 <h2 class="anchored" data-anchor-id="define-a-sample-type">Define a Sample Type</h2> 571 571 <p>Use the <code>@packable</code> decorator to create a typed sample:</p> 572 - <div id="c381b4b0" class="cell"> 572 + <div id="f05ce6d5" class="cell"> 573 573 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span> 574 574 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> numpy.typing <span class="im">import</span> NDArray</span> 575 575 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> atdata</span> ··· 590 590 </section> 591 591 <section id="create-sample-instances" class="level2"> 592 592 <h2 class="anchored" data-anchor-id="create-sample-instances">Create Sample Instances</h2> 593 - <div id="43ef7372" class="cell"> 593 + <div id="4ad5b1d0" class="cell"> 594 594 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a single sample</span></span> 595 595 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>sample <span class="op">=</span> ImageSample(</span> 596 596 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> image<span class="op">=</span>np.random.rand(<span class="dv">224</span>, <span class="dv">224</span>, <span class="dv">3</span>).astype(np.float32),</span> ··· 611 611 <section id="write-a-dataset" class="level2"> 612 612 <h2 class="anchored" data-anchor-id="write-a-dataset">Write a Dataset</h2> 613 613 <p>Use WebDataset’s <code>TarWriter</code> to create dataset files:</p> 614 - <div id="60bdb028" class="cell"> 614 + <div id="bed8df64" class="cell"> 615 615 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> webdataset <span class="im">as</span> wds</span> 616 616 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a></span> 617 617 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Create 100 samples</span></span> ··· 635 635 <section id="load-and-iterate" class="level2"> 636 636 <h2 class="anchored" data-anchor-id="load-and-iterate">Load and Iterate</h2> 637 637 <p>Create a typed <code>Dataset</code> and iterate with batching:</p> 638 - <div id="4d186c87" class="cell"> 638 + <div id="b46b4847" class="cell"> 639 639 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Load dataset with type</span></span> 640 640 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>dataset <span class="op">=</span> atdata.Dataset[ImageSample](<span class="st">"my-dataset-000000.tar"</span>)</span> 641 641 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span> ··· 656 656 <section id="shuffled-iteration" class="level2"> 657 657 <h2 class="anchored" data-anchor-id="shuffled-iteration">Shuffled Iteration</h2> 658 658 <p>For training, use shuffled iteration:</p> 659 - <div id="383314d6" class="cell"> 659 + <div id="ac1c4e6d" class="cell"> 660 660 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> batch <span class="kw">in</span> dataset.shuffled(batch_size<span class="op">=</span><span class="dv">32</span>):</span> 661 661 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a> <span class="co"># Samples are shuffled at shard and sample level</span></span> 662 662 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> images <span class="op">=</span> batch.image</span> ··· 670 670 <section id="use-lenses-for-type-transformations" class="level2"> 671 671 <h2 class="anchored" data-anchor-id="use-lenses-for-type-transformations">Use Lenses for Type Transformations</h2> 672 672 <p>View datasets through different schemas:</p> 673 - <div id="404e33c6" class="cell"> 673 + <div id="e72a9506" class="cell"> 674 674 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Define a simplified view type</span></span> 675 675 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="at">@atdata.packable</span></span> 676 676 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> SimplifiedSample:</span>
+4 -121
docs_src/assets/styles.css
··· 4 4 LIGHT THEME STYLES 5 5 =========================================== */ 6 6 7 - /* Code block styling */ 8 - .sourceCode { 9 - background-color: #f8f9fa; 10 - border-radius: 4px; 11 - } 12 - 13 - /* Callout styling */ 14 - .callout { 15 - margin: 1rem 0; 16 - } 7 + /* Note: Quarto handles code block and callout styling. 8 + Only add custom component styles here. */ 17 9 18 10 /* API reference styling */ 19 11 .api-signature { ··· 89 81 90 82 /* =========================================== 91 83 DARK THEME STYLES 92 - Quarto uses [data-bs-theme="dark"] on html element 84 + Only for custom components - let Quarto handle 85 + standard elements (callouts, code blocks, tables) 93 86 =========================================== */ 94 87 95 - /* Code blocks in dark mode */ 96 - [data-bs-theme="dark"] .sourceCode { 97 - background-color: #1e1e1e; 98 - border: 1px solid #444; 99 - } 100 - 101 - [data-bs-theme="dark"] .sourceCode pre { 102 - background-color: #1e1e1e; 103 - color: #d4d4d4; 104 - } 105 - 106 - [data-bs-theme="dark"] pre > code.sourceCode { 107 - background-color: transparent; 108 - } 109 - 110 - [data-bs-theme="dark"] code { 111 - background-color: #2d2d2d; 112 - color: #e0e0e0; 113 - } 114 - 115 - /* Inline code in dark mode */ 116 - [data-bs-theme="dark"] p code, 117 - [data-bs-theme="dark"] li code { 118 - background-color: #3d3d3d; 119 - color: #e0e0e0; 120 - padding: 0.125rem 0.25rem; 121 - border-radius: 3px; 122 - } 123 - 124 88 /* API reference styling in dark mode */ 125 89 [data-bs-theme="dark"] .api-signature { 126 90 background-color: #2d2d2d; ··· 137 101 [data-bs-theme="dark"] .hero .tagline { 138 102 color: #adb5bd; 139 103 } 140 - 141 - /* Callout boxes in dark mode */ 142 - [data-bs-theme="dark"] .callout { 143 - border-left-width: 4px; 144 - } 145 - 146 - [data-bs-theme="dark"] .callout-note { 147 - background-color: rgba(13, 110, 253, 0.15); 148 - border-color: #0d6efd; 149 - } 150 - 151 - [data-bs-theme="dark"] .callout-tip { 152 - background-color: rgba(25, 135, 84, 0.15); 153 - border-color: #198754; 154 - } 155 - 156 - [data-bs-theme="dark"] .callout-warning { 157 - background-color: rgba(255, 193, 7, 0.15); 158 - border-color: #ffc107; 159 - } 160 - 161 - [data-bs-theme="dark"] .callout-caution { 162 - background-color: rgba(253, 126, 20, 0.15); 163 - border-color: #fd7e14; 164 - } 165 - 166 - [data-bs-theme="dark"] .callout-important { 167 - background-color: rgba(220, 53, 69, 0.15); 168 - border-color: #dc3545; 169 - } 170 - 171 - [data-bs-theme="dark"] .callout .callout-title-container { 172 - color: inherit; 173 - } 174 - 175 - [data-bs-theme="dark"] .callout-body-container { 176 - color: #e0e0e0; 177 - } 178 - 179 - /* Table styling in dark mode */ 180 - [data-bs-theme="dark"] table { 181 - color: #e0e0e0; 182 - } 183 - 184 - [data-bs-theme="dark"] table th { 185 - background-color: #2d2d2d; 186 - border-color: #444; 187 - } 188 - 189 - [data-bs-theme="dark"] table td { 190 - border-color: #444; 191 - } 192 - 193 - [data-bs-theme="dark"] table tr:nth-child(even) { 194 - background-color: rgba(255, 255, 255, 0.05); 195 - } 196 - 197 - /* Panel tabset in dark mode */ 198 - [data-bs-theme="dark"] .panel-tabset .nav-link { 199 - color: #adb5bd; 200 - } 201 - 202 - [data-bs-theme="dark"] .panel-tabset .nav-link.active { 203 - color: #fff; 204 - background-color: #375a7f; 205 - border-color: #375a7f; 206 - } 207 - 208 - /* Sidebar in dark mode */ 209 - [data-bs-theme="dark"] .sidebar { 210 - background-color: #222; 211 - } 212 - 213 - /* Links in dark mode for better visibility */ 214 - [data-bs-theme="dark"] a { 215 - color: #6ea8fe; 216 - } 217 - 218 - [data-bs-theme="dark"] a:hover { 219 - color: #9ec5fe; 220 - }