A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

chore: add ruff linter and fix all linting errors

- Add ruff>=0.14.13 to dev dependencies
- Fix unused imports across source and test files
- Use explicit re-exports in __init__.py (F401)
- Replace type() comparisons with isinstance()
- Convert f-strings without placeholders to regular strings
- Add blob storage demo to atmosphere_demo.py example

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+171 -117
+1
CHANGELOG.md
··· 11 11 ### Fixed 12 12 13 13 ### Changed 14 + - Add blob storage demo to atmosphere_demo.py example (#216) 14 15 - Implement full blob storage support for atmosphere datasets (#211) 15 16 - Add E2E blob roundtrip test (#215) 16 17 - Implement get_blobs() and blob URL generation in DatasetLoader (#214)
+109 -10
examples/atmosphere_demo.py
··· 20 20 """ 21 21 22 22 import argparse 23 - import sys 24 - from dataclasses import asdict, fields, is_dataclass 23 + from dataclasses import fields, is_dataclass 25 24 from datetime import datetime 26 25 27 26 import numpy as np ··· 83 82 confidence=0.95, 84 83 ) 85 84 86 - print(f"\nSample instance:") 85 + print("\nSample instance:") 87 86 print(f" image shape: {sample.image.shape}") 88 87 print(f" image dtype: {sample.image.dtype}") 89 88 print(f" label: {sample.label}") ··· 228 227 client = AtmosphereClient() 229 228 client.login(handle, password) 230 229 231 - print(f"Authenticated!") 230 + print("Authenticated!") 232 231 print(f" DID: {client.did}") 233 232 print(f" Handle: {client.handle}") 234 233 ··· 252 251 print(f" - {schema.get('name', 'Unknown')}: v{schema.get('version', '?')}") 253 252 254 253 # Publish a dataset record (pointing to example URLs) 255 - print("\nPublishing dataset record...") 254 + print("\nPublishing dataset record (external URL storage)...") 256 255 dataset_publisher = DatasetPublisher(client) 257 256 dataset_uri = dataset_publisher.publish_with_urls( 258 257 urls=["s3://example-bucket/demo-data-{000000..000009}.tar"], ··· 277 276 print(f" Tags: {', '.join(tags)}") 278 277 279 278 279 + def demo_blob_storage(handle: str, password: str): 280 + """Demonstrate blob storage for smaller datasets. 281 + 282 + ATProto supports blob storage (up to 50MB per blob by default, configurable). 283 + This is useful for smaller datasets that don't need external storage. 284 + 285 + Args: 286 + handle: Bluesky handle (e.g., 'alice.bsky.social') 287 + password: App-specific password 288 + """ 289 + import io 290 + import tarfile 291 + import msgpack 292 + 293 + print("\n" + "=" * 60) 294 + print("Blob Storage Demo") 295 + print("=" * 60) 296 + 297 + # Create client and authenticate 298 + print(f"\nConnecting as {handle}...") 299 + client = AtmosphereClient() 300 + client.login(handle, password) 301 + print(f"Authenticated as {client.handle}") 302 + 303 + # Define a simple sample type for this demo 304 + @atdata.packable 305 + class DemoSample: 306 + id: int 307 + text: str 308 + 309 + # Create a small WebDataset tar in memory 310 + print("\nCreating small dataset in memory...") 311 + samples = [ 312 + {"id": 0, "text": "Hello from blob storage!"}, 313 + {"id": 1, "text": "ATProto is decentralized."}, 314 + {"id": 2, "text": "atdata makes ML data easy."}, 315 + ] 316 + 317 + tar_buffer = io.BytesIO() 318 + with tarfile.open(fileobj=tar_buffer, mode="w") as tar: 319 + for i, sample in enumerate(samples): 320 + packed = msgpack.packb(sample) 321 + info = tarfile.TarInfo(name=f"sample_{i:06d}.msgpack") 322 + info.size = len(packed) 323 + tar.addfile(info, io.BytesIO(packed)) 324 + 325 + tar_data = tar_buffer.getvalue() 326 + print(f" Created tar with {len(samples)} samples ({len(tar_data):,} bytes)") 327 + 328 + # Publish schema 329 + print("\nPublishing schema...") 330 + schema_publisher = SchemaPublisher(client) 331 + schema_uri = schema_publisher.publish(DemoSample, version="1.0.0") 332 + print(f" Schema URI: {schema_uri}") 333 + 334 + # Publish dataset with blob storage 335 + print("\nUploading data as blob and publishing dataset...") 336 + dataset_publisher = DatasetPublisher(client) 337 + dataset_uri = dataset_publisher.publish_with_blobs( 338 + blobs=[tar_data], 339 + schema_uri=str(schema_uri), 340 + name="Blob Storage Demo Dataset", 341 + description="Small dataset stored directly in ATProto blobs", 342 + tags=["demo", "blob-storage"], 343 + ) 344 + print(f" Dataset URI: {dataset_uri}") 345 + 346 + # Verify storage type 347 + print("\nVerifying blob storage...") 348 + dataset_loader = DatasetLoader(client) 349 + storage_type = dataset_loader.get_storage_type(str(dataset_uri)) 350 + print(f" Storage type: {storage_type}") 351 + 352 + # Get blob URLs 353 + blob_urls = dataset_loader.get_blob_urls(str(dataset_uri)) 354 + print(f" Blob URLs: {len(blob_urls)} blob(s)") 355 + for url in blob_urls: 356 + # Truncate URL for display 357 + print(f" {url[:80]}...") 358 + 359 + # Load and iterate over data 360 + print("\nLoading and iterating over blob data...") 361 + ds = dataset_loader.to_dataset(str(dataset_uri), DemoSample) 362 + for batch in ds.ordered(): 363 + print(f" Sample id={batch.id}, text={batch.text}") 364 + 365 + print("\nBlob storage demo complete!") 366 + 367 + 280 368 def demo_dataset_loading(): 281 369 """Demonstrate loading a dataset from an ATProto record.""" 282 370 print("\n" + "=" * 60) ··· 296 384 # Get the dataset record 297 385 record = loader.get("at://did:plc:abc123/ac.foundation.dataset.record/xyz") 298 386 299 - # Get the WebDataset URLs 300 - urls = loader.get_urls("at://did:plc:abc123/ac.foundation.dataset.record/xyz") 301 - print(f"Dataset URLs: {urls}") 387 + # Check storage type (external URLs or ATProto blobs) 388 + storage_type = loader.get_storage_type(uri) 389 + print(f"Storage type: {storage_type}") 302 390 303 - # If you have the sample type class, create a Dataset directly 391 + # For external URL storage: 392 + if storage_type == "external": 393 + urls = loader.get_urls(uri) 394 + print(f"Dataset URLs: {urls}") 395 + 396 + # For blob storage: 397 + elif storage_type == "blobs": 398 + blob_urls = loader.get_blob_urls(uri) 399 + print(f"Blob URLs: {blob_urls}") 400 + 401 + # to_dataset() handles both storage types automatically: 304 402 dataset = loader.to_dataset( 305 403 "at://did:plc:abc123/ac.foundation.dataset.record/xyz", 306 404 sample_type=ImageSample, ··· 348 446 demo_mock_client() 349 447 demo_dataset_loading() 350 448 351 - # Run live demo if credentials provided 449 + # Run live demos if credentials provided 352 450 if args.handle and args.password: 353 451 demo_live_connection(args.handle, args.password) 452 + demo_blob_storage(args.handle, args.password) 354 453 else: 355 454 print("\n" + "=" * 60) 356 455 print("Live Demo Skipped")
+4 -4
examples/local_workflow.py
··· 31 31 from numpy.typing import NDArray 32 32 33 33 import atdata 34 - from atdata.local import LocalIndex, LocalDatasetEntry, Repo, S3DataStore 34 + from atdata.local import LocalIndex, LocalDatasetEntry, S3DataStore 35 35 36 36 37 37 # ============================================================================= ··· 83 83 _data_urls=["s3://bucket/data-000000.tar", "s3://bucket/data-000001.tar"], 84 84 ) 85 85 86 - print(f"\nCID comparison (same content, different name):") 86 + print("\nCID comparison (same content, different name):") 87 87 print(f" Entry 1 CID: {entry.cid}") 88 88 print(f" Entry 2 CID: {entry2.cid}") 89 89 print(f" Match: {entry.cid == entry2.cid}") ··· 96 96 print("=" * 60) 97 97 98 98 # LocalIndex without Redis connection works for read operations 99 - index = LocalIndex() 99 + _index = LocalIndex() # noqa: F841 - demo instantiation 100 100 101 101 print("\nLocalIndex created (no Redis connection)") 102 102 print("Methods available:") ··· 170 170 171 171 store = S3DataStore(creds, bucket="my-bucket") 172 172 173 - print(f"\nS3DataStore created:") 173 + print("\nS3DataStore created:") 174 174 print(f" Bucket: {store.bucket}") 175 175 print(f" Supports streaming: {store.supports_streaming()}") 176 176
+12 -13
examples/promote_workflow.py
··· 21 21 22 22 import argparse 23 23 from datetime import datetime 24 - from unittest.mock import Mock, MagicMock 24 + from unittest.mock import Mock 25 25 26 - import numpy as np 27 26 from numpy.typing import NDArray 28 27 29 28 import atdata ··· 99 98 }, 100 99 ) 101 100 102 - print(f"\nLocal entry to promote:") 101 + print("\nLocal entry to promote:") 103 102 print(f" Name: {local_entry.name}") 104 103 print(f" Schema: {local_entry.schema_ref}") 105 104 print(f" URLs: {len(local_entry.data_urls)} shards") ··· 147 146 license="CC-BY-4.0", 148 147 ) 149 148 150 - print(f"\nPromotion result:") 149 + print("\nPromotion result:") 151 150 print(f" AT URI: {result}") 152 - print(f"\nPublished:") 153 - print(f" Schema: at://did:plc:demo123456789/.../exp001") 154 - print(f" Dataset: at://did:plc:demo123456789/.../exp2024001") 151 + print("\nPublished:") 152 + print(" Schema: at://did:plc:demo123456789/.../exp001") 153 + print(" Dataset: at://did:plc:demo123456789/.../exp2024001") 155 154 156 155 157 156 def demo_schema_deduplication(): ··· 180 179 ] 181 180 182 181 result = _find_existing_schema(mock_client, "mymodule.MySample", "1.0.0") 183 - print(f" Looking for: mymodule.MySample@1.0.0") 182 + print(" Looking for: mymodule.MySample@1.0.0") 184 183 print(f" Found: {result}") 185 - print(f" Action: Reuse existing schema (no republish)") 184 + print(" Action: Reuse existing schema (no republish)") 186 185 187 186 # Scenario 2: Different version 188 187 print("\nScenario 2: Same name but different version") ··· 199 198 ] 200 199 201 200 result = _find_existing_schema(mock_client, "mymodule.MySample", "2.0.0") # Looking for v2.0.0 202 - print(f" Looking for: mymodule.MySample@2.0.0") 201 + print(" Looking for: mymodule.MySample@2.0.0") 203 202 print(f" Found: {result}") 204 - print(f" Action: Publish new schema record") 203 + print(" Action: Publish new schema record") 205 204 206 205 207 206 def demo_data_migration_options(): ··· 286 285 license="MIT", 287 286 ) 288 287 289 - print(f"\nPromotion successful!") 288 + print("\nPromotion successful!") 290 289 print(f" AT URI: {result}") 291 - print(f"\nYou can now discover this dataset via:") 290 + print("\nYou can now discover this dataset via:") 292 291 print(f" atdata.load_dataset('@{handle}/demo-promoted-dataset')") 293 292 294 293
+1
pyproject.toml
··· 50 50 "moto[s3]>=5.0.29", 51 51 "pytest>=8.4.2", 52 52 "pytest-cov>=7.0.0", 53 + "ruff>=0.14.13", 53 54 ]
+17 -17
src/atdata/__init__.py
··· 39 39 # Expose components 40 40 41 41 from .dataset import ( 42 - PackableSample, 43 - SampleBatch, 44 - Dataset, 45 - packable, 42 + PackableSample as PackableSample, 43 + SampleBatch as SampleBatch, 44 + Dataset as Dataset, 45 + packable as packable, 46 46 ) 47 47 48 48 from .lens import ( 49 - Lens, 50 - LensNetwork, 51 - lens, 49 + Lens as Lens, 50 + LensNetwork as LensNetwork, 51 + lens as lens, 52 52 ) 53 53 54 54 from ._hf_api import ( 55 - load_dataset, 56 - DatasetDict, 55 + load_dataset as load_dataset, 56 + DatasetDict as DatasetDict, 57 57 ) 58 58 59 59 from ._protocols import ( 60 - IndexEntry, 61 - AbstractIndex, 62 - AbstractDataStore, 60 + IndexEntry as IndexEntry, 61 + AbstractIndex as AbstractIndex, 62 + AbstractDataStore as AbstractDataStore, 63 63 ) 64 64 65 65 from ._schema_codec import ( 66 - schema_to_type, 66 + schema_to_type as schema_to_type, 67 67 ) 68 68 69 69 from ._cid import ( 70 - generate_cid, 71 - verify_cid, 70 + generate_cid as generate_cid, 71 + verify_cid as verify_cid, 72 72 ) 73 73 74 74 from .promote import ( 75 - promote_to_atmosphere, 75 + promote_to_atmosphere as promote_to_atmosphere, 76 76 ) 77 77 78 78 # ATProto integration (lazy import to avoid requiring atproto package) 79 - from . import atmosphere 79 + from . import atmosphere as atmosphere 80 80 81 81 82 82 #
-3
src/atdata/_hf_api.py
··· 32 32 from pathlib import Path 33 33 from typing import ( 34 34 TYPE_CHECKING, 35 - Any, 36 35 Generic, 37 - Iterator, 38 36 Mapping, 39 37 Optional, 40 38 Type, 41 39 TypeVar, 42 - Union, 43 40 overload, 44 41 ) 45 42
+2 -2
src/atdata/_schema_codec.py
··· 22 22 >>> sample = ImageSample(image=np.zeros((64, 64)), label="cat") 23 23 """ 24 24 25 - from dataclasses import dataclass, field, make_dataclass 26 - from typing import Any, Optional, Type, Union, get_origin 25 + from dataclasses import field, make_dataclass 26 + from typing import Any, Optional, Type 27 27 import hashlib 28 28 29 29 from numpy.typing import NDArray
+1 -1
src/atdata/atmosphere/lens.py
··· 9 9 implementations. 10 10 """ 11 11 12 - from typing import Optional, Callable 12 + from typing import Optional 13 13 14 14 from .client import AtmosphereClient 15 15 from ._types import (
+1 -1
src/atdata/dataset.py
··· 638 638 a lens if ``as_type()`` was called. 639 639 """ 640 640 assert 'msgpack' in sample 641 - assert type( sample['msgpack'] ) == bytes 641 + assert isinstance(sample['msgpack'], bytes) 642 642 643 643 if self._output_lens is None: 644 644 return self.sample_type.from_bytes( sample['msgpack'] )
-3
src/atdata/local.py
··· 23 23 Dataset, 24 24 ) 25 25 from atdata._cid import generate_cid 26 - from atdata._protocols import IndexEntry 27 26 from atdata._type_utils import numpy_dtype_to_string, PRIMITIVE_TYPE_MAP 28 27 29 28 from pathlib import Path ··· 42 41 43 42 from dataclasses import ( 44 43 dataclass, 45 - asdict, 46 44 field, 47 45 ) 48 46 from typing import ( 49 47 Any, 50 - Optional, 51 48 Type, 52 49 TypeVar, 53 50 Generator,
+1 -2
src/atdata/promote.py
··· 23 23 24 24 if TYPE_CHECKING: 25 25 from .local import LocalDatasetEntry, Index as LocalIndex 26 - from .atmosphere import AtmosphereClient, AtUri 27 - from .atmosphere._types import AtUri as AtUriType 26 + from .atmosphere import AtmosphereClient 28 27 from .dataset import PackableSample 29 28 from ._protocols import AbstractDataStore 30 29
+5 -6
tests/test_atmosphere.py
··· 8 8 - Lens publishing/loading (lens.py) 9 9 """ 10 10 11 - from datetime import datetime, timezone 12 11 from typing import Optional 13 12 from unittest.mock import Mock, MagicMock, patch 14 13 import pytest ··· 521 520 mock_class = Mock() 522 521 mock_get.return_value = mock_class 523 522 524 - client = AtmosphereClient(base_url="https://custom.pds.example") 523 + AtmosphereClient(base_url="https://custom.pds.example") 525 524 526 525 mock_class.assert_called_once_with(base_url="https://custom.pds.example") 527 526 ··· 668 667 mock_response.cursor = None 669 668 mock_atproto_client.com.atproto.repo.list_records.return_value = mock_response 670 669 671 - schemas = authenticated_client.list_schemas() 670 + authenticated_client.list_schemas() 672 671 673 672 call_args = mock_atproto_client.com.atproto.repo.list_records.call_args 674 673 assert f"{LEXICON_NAMESPACE}.sampleSchema" in str(call_args) ··· 883 882 mock_dataset.metadata = None 884 883 885 884 publisher = DatasetPublisher(authenticated_client) 886 - uri = publisher.publish( 885 + publisher.publish( 887 886 mock_dataset, 888 887 name="AutoSchemaDataset", 889 888 auto_publish_schema=True, ··· 1098 1097 mock_atproto_client.com.atproto.repo.create_record.return_value = mock_response 1099 1098 1100 1099 publisher = LensPublisher(authenticated_client) 1101 - uri = publisher.publish( 1100 + publisher.publish( 1102 1101 name="MetadataOnlyLens", 1103 1102 source_schema_uri="at://source", 1104 1103 target_schema_uri="at://target", ··· 1124 1123 ) 1125 1124 1126 1125 publisher = LensPublisher(authenticated_client) 1127 - uri = publisher.publish_from_lens( 1126 + publisher.publish_from_lens( 1128 1127 test_lens, 1129 1128 name="FromObjectLens", 1130 1129 source_schema_uri="at://source",
+1 -2
tests/test_dataset.py
··· 21 21 from numpy.typing import NDArray 22 22 from typing import ( 23 23 Type, 24 - Any, 25 24 ) 26 25 27 26 ··· 142 141 for k, v in sample_data.items(): 143 142 cur_assertion: bool 144 143 if isinstance( v, np.ndarray ): 145 - cur_assertion = np.all( getattr( sample, k ) == v ) == True 144 + cur_assertion = np.all( getattr( sample, k ) == v ) 146 145 else: 147 146 cur_assertion = getattr( sample, k ) == v 148 147 assert cur_assertion, \
+1 -3
tests/test_hf_api.py
··· 4 4 # Imports 5 5 6 6 import pytest 7 - from dataclasses import dataclass 8 - from pathlib import Path 9 7 10 8 import numpy as np 11 9 import webdataset as wds ··· 26 24 _is_indexed_path, 27 25 _parse_indexed_path, 28 26 ) 29 - from unittest.mock import Mock, MagicMock 27 + from unittest.mock import Mock 30 28 31 29 from numpy.typing import NDArray 32 30
+3 -9
tests/test_integration.py
··· 4 4 components, using mocks for external services (Redis, ATProto PDS). 5 5 """ 6 6 7 - import pytest 8 - from unittest.mock import Mock, MagicMock, patch 9 - from dataclasses import dataclass 10 - import tempfile 11 - from pathlib import Path 7 + from unittest.mock import Mock, patch 12 8 13 - import numpy as np 14 9 import webdataset as wds 15 10 16 11 import atdata 17 - from atdata.local import LocalIndex, LocalDatasetEntry 18 - from atdata.atmosphere import AtmosphereIndex, AtmosphereIndexEntry 12 + from atdata.local import LocalDatasetEntry 19 13 from atdata.promote import promote_to_atmosphere 20 14 21 15 ··· 62 56 __str__=lambda s: "at://did:plc:test/record/xyz" 63 57 ) 64 58 65 - result = promote_to_atmosphere( 59 + promote_to_atmosphere( 66 60 local_entry, 67 61 mock_local_index, 68 62 mock_client,
+2 -5
tests/test_integration_atmosphere.py
··· 8 8 """ 9 9 10 10 import pytest 11 - from dataclasses import dataclass 12 - from unittest.mock import Mock, MagicMock, patch 11 + from unittest.mock import Mock, MagicMock 13 12 14 - import numpy as np 15 13 from numpy.typing import NDArray 16 14 import msgpack 17 15 ··· 23 21 SchemaPublisher, 24 22 SchemaLoader, 25 23 DatasetPublisher, 26 - DatasetLoader, 27 24 AtUri, 28 25 ) 29 26 from atdata.atmosphere._types import LEXICON_NAMESPACE ··· 346 343 mock_atproto_client.com.atproto.repo.create_record.return_value = mock_response 347 344 348 345 publisher = SchemaPublisher(authenticated_client) 349 - uri = publisher.publish(AtmoNDArraySample, version="1.0.0") 346 + publisher.publish(AtmoNDArraySample, version="1.0.0") 350 347 351 348 call_args = mock_atproto_client.com.atproto.repo.create_record.call_args 352 349 record = call_args.kwargs["data"]["record"]
-2
tests/test_integration_atmosphere_live.py
··· 15 15 """ 16 16 17 17 import os 18 - import time 19 18 import uuid 20 19 import pytest 21 20 from datetime import datetime 22 21 23 - import numpy as np 24 22 from numpy.typing import NDArray 25 23 26 24 import atdata
+1 -4
tests/test_integration_cross_backend.py
··· 7 7 """ 8 8 9 9 import pytest 10 - from dataclasses import dataclass 11 - from typing import Type 12 10 from unittest.mock import Mock, MagicMock 13 11 14 - import numpy as np 15 12 from numpy.typing import NDArray 16 13 17 14 import atdata 18 15 from atdata.local import LocalIndex, LocalDatasetEntry 19 - from atdata._protocols import IndexEntry, AbstractIndex 16 + from atdata._protocols import IndexEntry 20 17 from atdata.atmosphere import ( 21 18 AtmosphereClient, 22 19 AtmosphereIndex,
-1
tests/test_integration_e2e.py
··· 10 10 - Parquet export with transformations 11 11 """ 12 12 13 - import pytest 14 13 from dataclasses import dataclass 15 14 from pathlib import Path 16 15
+1 -4
tests/test_integration_edge_cases.py
··· 9 9 - All primitive type variations 10 10 """ 11 11 12 - import pytest 13 12 from pathlib import Path 14 - from dataclasses import dataclass 15 - from typing import Optional 16 13 17 14 import numpy as np 18 15 from numpy.typing import NDArray ··· 450 447 451 448 def test_multidimensional_arrays(self, tmp_path): 452 449 """Multidimensional arrays should preserve shape.""" 453 - tar_path = tmp_path / "multidim-000000.tar" 450 + tmp_path / "multidim-000000.tar" 454 451 455 452 shapes = [(3, 4), (2, 3, 4), (2, 2, 2, 2)] 456 453
+1 -6
tests/test_integration_error_handling.py
··· 8 8 """ 9 9 10 10 import pytest 11 - from pathlib import Path 12 - from unittest.mock import Mock, MagicMock, patch 11 + from unittest.mock import Mock, MagicMock 13 12 import tarfile 14 - import tempfile 15 13 16 - import numpy as np 17 - from numpy.typing import NDArray 18 14 19 15 import atdata 20 16 from atdata.local import LocalIndex, LocalDatasetEntry 21 17 from atdata.atmosphere import AtmosphereClient, AtUri 22 - from atdata.atmosphere._types import LEXICON_NAMESPACE 23 18 24 19 25 20 ##
-1
tests/test_integration_lens.py
··· 10 10 """ 11 11 12 12 import pytest 13 - from dataclasses import dataclass 14 13 15 14 import numpy as np 16 15 from numpy.typing import NDArray
+1 -4
tests/test_integration_promotion.py
··· 9 9 """ 10 10 11 11 import pytest 12 - from pathlib import Path 13 12 from unittest.mock import Mock, MagicMock, patch 14 - from dataclasses import dataclass 15 13 16 - import numpy as np 17 14 from numpy.typing import NDArray 18 15 import webdataset as wds 19 16 20 17 import atdata 21 18 from atdata.local import LocalIndex, LocalDatasetEntry 22 - from atdata.promote import promote_to_atmosphere, _find_existing_schema 19 + from atdata.promote import promote_to_atmosphere 23 20 from atdata.atmosphere import AtmosphereClient 24 21 from atdata.atmosphere._types import LEXICON_NAMESPACE 25 22
+1 -1
tests/test_lens.py
··· 164 164 assert sample.favorite_pizza == test_view.favorite_pizza, \ 165 165 f'Divergence on auto-mapped dataset: `favorite_pizza` should be {test_view.favorite_pizza}, but is {sample.favorite_pizza}' 166 166 assert np.all( sample.favorite_image == test_view.favorite_image ), \ 167 - f'Divergence on auto-mapped dataset: `favorite_image`' 167 + 'Divergence on auto-mapped dataset: `favorite_image`' 168 168 169 169 170 170 ##
+1 -3
tests/test_local.py
··· 8 8 # System 9 9 from dataclasses import dataclass 10 10 from pathlib import Path 11 - from uuid import UUID 12 11 13 12 # External 14 13 import numpy as np ··· 22 21 23 22 # Typing 24 23 from numpy.typing import NDArray 25 - from typing import Any 26 24 27 25 28 26 ## ··· 937 935 RuntimeError. 938 936 """ 939 937 dataset_path = tmp_path / "empty-dataset-000000.tar" 940 - with wds.writer.TarWriter(str(dataset_path)) as sink: 938 + with wds.writer.TarWriter(str(dataset_path)): 941 939 pass # Write no samples 942 940 943 941 ds = atdata.Dataset[SimpleTestSample](url=str(dataset_path))
+3 -4
tests/test_promote.py
··· 1 1 """Tests for the promote module.""" 2 2 3 3 import pytest 4 - from unittest.mock import Mock, MagicMock, patch 5 - from dataclasses import dataclass 4 + from unittest.mock import Mock, patch 6 5 7 6 import atdata 8 7 from atdata.promote import ( ··· 221 220 mock_publisher = MockPublisher.return_value 222 221 mock_publisher.publish_with_urls.return_value = Mock(__str__=lambda s: "at://result") 223 222 224 - result = promote_to_atmosphere( 223 + promote_to_atmosphere( 225 224 entry, 226 225 mock_index, 227 226 mock_client, ··· 266 265 mock_publisher.publish_with_urls.return_value = Mock(__str__=lambda s: "at://result") 267 266 268 267 with patch("atdata.dataset.Dataset"): 269 - result = promote_to_atmosphere( 268 + promote_to_atmosphere( 270 269 entry, 271 270 mock_index, 272 271 mock_client,
+1 -6
tests/test_protocols.py
··· 4 4 definitions, ensuring interoperability between local and atmosphere backends. 5 5 """ 6 6 7 - import pytest 8 - from unittest.mock import Mock, MagicMock 9 - from dataclasses import dataclass 7 + from unittest.mock import Mock 10 8 11 - import atdata 12 9 from atdata._protocols import ( 13 10 IndexEntry, 14 - AbstractIndex, 15 - AbstractDataStore, 16 11 ) 17 12 from atdata.local import LocalDatasetEntry, Index as LocalIndex, S3DataStore 18 13 from atdata.atmosphere import AtmosphereIndex, AtmosphereIndexEntry