A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix: handle ATProto DotDict responses and add live integration tests

- Convert ATProto model objects (DotDict) to plain dicts in get_record()
and list_records() methods for consistent downstream handling
- Add pytest markers for 'network' and 'slow' tests in pyproject.toml
- Add live atmosphere integration test for real ATProto interactions
- Ignore testing.env credentials file

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+488 -2
+1
.gitignore
··· 219 219 marimo/_static/ 220 220 marimo/_lsp/ 221 221 __marimo__/ 222 + testing.env
+4
pyproject.toml
··· 39 39 40 40 [tool.pytest.ini_options] 41 41 addopts = "--cov=atdata --cov-report=html" 42 + markers = [ 43 + "network: marks tests as requiring network access (deselect with '-m \"not network\"')", 44 + "slow: marks tests as slow running", 45 + ] 42 46 43 47 [dependency-groups] 44 48 dev = [
+27 -2
src/atdata/atmosphere/client.py
··· 254 254 } 255 255 ) 256 256 257 - return response.value 257 + # Convert ATProto model to dict if needed 258 + value = response.value 259 + # DotDict and similar ATProto models have to_dict() 260 + if hasattr(value, "to_dict") and callable(value.to_dict): 261 + return value.to_dict() 262 + elif isinstance(value, dict): 263 + return dict(value) 264 + elif hasattr(value, "model_dump") and callable(value.model_dump): 265 + return value.model_dump() 266 + elif hasattr(value, "__dict__"): 267 + return dict(value.__dict__) 268 + return value 258 269 259 270 def delete_record( 260 271 self, ··· 324 335 } 325 336 ) 326 337 327 - records = [r.value for r in response.records] 338 + # Convert ATProto models to dicts if needed 339 + records = [] 340 + for r in response.records: 341 + value = r.value 342 + # DotDict and similar ATProto models have to_dict() 343 + if hasattr(value, "to_dict") and callable(value.to_dict): 344 + records.append(value.to_dict()) 345 + elif isinstance(value, dict): 346 + records.append(dict(value)) 347 + elif hasattr(value, "model_dump") and callable(value.model_dump): 348 + records.append(value.model_dump()) 349 + elif hasattr(value, "__dict__"): 350 + records.append(dict(value.__dict__)) 351 + else: 352 + records.append(value) 328 353 return records, response.cursor 329 354 330 355 # Convenience methods for atdata collections
+456
tests/test_integration_atmosphere_live.py
··· 1 + """Live ATProto network integration tests. 2 + 3 + These tests connect to the real ATProto network (Bluesky) and verify 4 + that atdata's atmosphere functionality works correctly with actual API calls. 5 + 6 + Requirements: 7 + - testing.env file with ATDATA_TEST_HANDLE and ATDATA_TEST_APP_PASSWORD 8 + - Network connectivity to bsky.social 9 + 10 + Run with: 11 + source testing.env && uv run pytest tests/test_integration_atmosphere_live.py -v 12 + 13 + These tests are marked with @pytest.mark.network and @pytest.mark.slow. 14 + To skip in CI: pytest -m "not network" 15 + """ 16 + 17 + import os 18 + import time 19 + import uuid 20 + import pytest 21 + from datetime import datetime 22 + 23 + import numpy as np 24 + from numpy.typing import NDArray 25 + 26 + import atdata 27 + from atdata.atmosphere import ( 28 + AtmosphereClient, 29 + AtmosphereIndex, 30 + SchemaPublisher, 31 + SchemaLoader, 32 + DatasetPublisher, 33 + DatasetLoader, 34 + ) 35 + from atdata.atmosphere._types import LEXICON_NAMESPACE 36 + 37 + 38 + ## 39 + # Test Configuration 40 + 41 + 42 + # Prefix for all test records - makes cleanup easier 43 + TEST_PREFIX = "atdata-live-test" 44 + 45 + 46 + def get_test_credentials(): 47 + """Get test credentials from environment.""" 48 + handle = os.environ.get("ATDATA_TEST_HANDLE") 49 + password = os.environ.get("ATDATA_TEST_APP_PASSWORD") 50 + return handle, password 51 + 52 + 53 + def skip_if_no_credentials(): 54 + """Skip test if credentials not available.""" 55 + handle, password = get_test_credentials() 56 + if not handle or not password: 57 + pytest.skip("Live test credentials not configured (set ATDATA_TEST_HANDLE and ATDATA_TEST_APP_PASSWORD)") 58 + 59 + 60 + ## 61 + # Test Sample Types 62 + 63 + 64 + @atdata.packable 65 + class LiveTestSample: 66 + """Simple sample for live tests.""" 67 + name: str 68 + value: int 69 + 70 + 71 + @atdata.packable 72 + class LiveTestArraySample: 73 + """Sample with NDArray for live tests.""" 74 + label: str 75 + data: NDArray 76 + 77 + 78 + ## 79 + # Fixtures 80 + 81 + 82 + @pytest.fixture(scope="module") 83 + def live_client(): 84 + """Create authenticated client for live tests. 85 + 86 + This fixture is module-scoped so we reuse the same session 87 + across all tests in this file, reducing API calls. 88 + """ 89 + handle, password = get_test_credentials() 90 + if not handle or not password: 91 + pytest.skip("Live test credentials not configured") 92 + 93 + client = AtmosphereClient() 94 + client.login(handle, password) 95 + 96 + yield client 97 + 98 + # Cleanup: delete test records created during this session 99 + # This runs after all tests in the module complete 100 + 101 + 102 + @pytest.fixture(scope="module") 103 + def live_index(live_client): 104 + """Create AtmosphereIndex for live tests.""" 105 + return AtmosphereIndex(live_client) 106 + 107 + 108 + @pytest.fixture 109 + def unique_name(): 110 + """Generate a unique name for test records.""" 111 + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") 112 + unique_id = uuid.uuid4().hex[:8] 113 + return f"{TEST_PREFIX}-{timestamp}-{unique_id}" 114 + 115 + 116 + ## 117 + # Cleanup Utilities 118 + 119 + 120 + def cleanup_test_schemas(client: AtmosphereClient): 121 + """Delete all test schema records.""" 122 + loader = SchemaLoader(client) 123 + deleted = 0 124 + failed = 0 125 + 126 + for record in loader.list_all(): 127 + name = record.get("value", {}).get("name", "") 128 + if TEST_PREFIX in name: 129 + uri = record.get("uri", "") 130 + if uri: 131 + try: 132 + client.delete_record(uri) 133 + deleted += 1 134 + except Exception: 135 + failed += 1 # Continue cleanup even if individual deletes fail 136 + 137 + return deleted 138 + 139 + 140 + def cleanup_test_datasets(client: AtmosphereClient): 141 + """Delete all test dataset records.""" 142 + loader = DatasetLoader(client) 143 + deleted = 0 144 + failed = 0 145 + 146 + for record in loader.list_all(): 147 + name = record.get("value", {}).get("name", "") 148 + if TEST_PREFIX in name: 149 + uri = record.get("uri", "") 150 + if uri: 151 + try: 152 + client.delete_record(uri) 153 + deleted += 1 154 + except Exception: 155 + failed += 1 # Continue cleanup even if individual deletes fail 156 + 157 + return deleted 158 + 159 + 160 + ## 161 + # Authentication Tests 162 + 163 + 164 + @pytest.mark.network 165 + @pytest.mark.slow 166 + class TestLiveAuthentication: 167 + """Live tests for authentication flow.""" 168 + 169 + def test_login_succeeds(self): 170 + """Should successfully authenticate with valid credentials.""" 171 + handle, password = get_test_credentials() 172 + skip_if_no_credentials() 173 + 174 + client = AtmosphereClient() 175 + client.login(handle, password) 176 + 177 + assert client.is_authenticated 178 + assert client.did is not None 179 + assert client.did.startswith("did:plc:") 180 + 181 + def test_session_export_import(self): 182 + """Should export and restore session.""" 183 + handle, password = get_test_credentials() 184 + skip_if_no_credentials() 185 + 186 + # Create first client and login 187 + client1 = AtmosphereClient() 188 + client1.login(handle, password) 189 + session_string = client1.export_session() 190 + 191 + assert session_string is not None 192 + assert len(session_string) > 0 193 + 194 + # Create second client and restore session 195 + client2 = AtmosphereClient() 196 + client2.login_with_session(session_string) 197 + 198 + assert client2.is_authenticated 199 + assert client2.did == client1.did 200 + 201 + def test_invalid_credentials_raises(self): 202 + """Should raise on invalid credentials.""" 203 + client = AtmosphereClient() 204 + 205 + with pytest.raises(Exception): 206 + client.login("invalid.handle.test", "wrong-password") 207 + 208 + 209 + ## 210 + # Schema Operation Tests 211 + 212 + 213 + @pytest.mark.network 214 + @pytest.mark.slow 215 + class TestLiveSchemaOperations: 216 + """Live tests for schema publishing and retrieval.""" 217 + 218 + def test_publish_schema(self, live_client, unique_name): 219 + """Should publish a schema to ATProto.""" 220 + # Create a unique sample type for this test 221 + @atdata.packable 222 + class UniqueTestSample: 223 + name: str 224 + count: int 225 + 226 + # Monkey-patch the module name to include test prefix 227 + UniqueTestSample.__module__ = f"{TEST_PREFIX}.{unique_name}" 228 + 229 + publisher = SchemaPublisher(live_client) 230 + uri = publisher.publish(UniqueTestSample, version="1.0.0") 231 + 232 + assert uri is not None 233 + assert "at://" in str(uri) 234 + assert LEXICON_NAMESPACE in str(uri) 235 + 236 + def test_list_schemas(self, live_client): 237 + """Should list published schemas.""" 238 + loader = SchemaLoader(live_client) 239 + schemas = loader.list_all() 240 + 241 + # Should return a list (may be empty if no schemas published) 242 + assert isinstance(schemas, list) 243 + 244 + def test_publish_and_retrieve_schema(self, live_client, unique_name): 245 + """Should publish then retrieve a schema by URI.""" 246 + @atdata.packable 247 + class RetrievableTestSample: 248 + field1: str 249 + field2: int 250 + 251 + RetrievableTestSample.__module__ = f"{TEST_PREFIX}.{unique_name}" 252 + 253 + publisher = SchemaPublisher(live_client) 254 + uri = publisher.publish(RetrievableTestSample, version="1.0.0") 255 + 256 + loader = SchemaLoader(live_client) 257 + schema = loader.get(str(uri)) 258 + 259 + assert schema is not None 260 + # Schema name defaults to just the class name (not full module path) 261 + assert schema["name"] == "RetrievableTestSample" 262 + assert schema["version"] == "1.0.0" 263 + 264 + field_names = {f["name"] for f in schema["fields"]} 265 + assert "field1" in field_names 266 + assert "field2" in field_names 267 + 268 + def test_schema_with_ndarray_field(self, live_client, unique_name): 269 + """Should publish schema with NDArray field type.""" 270 + @atdata.packable 271 + class ArrayTestSample: 272 + label: str 273 + embedding: NDArray 274 + 275 + ArrayTestSample.__module__ = f"{TEST_PREFIX}.{unique_name}" 276 + 277 + publisher = SchemaPublisher(live_client) 278 + uri = publisher.publish(ArrayTestSample, version="1.0.0") 279 + 280 + loader = SchemaLoader(live_client) 281 + schema = loader.get(str(uri)) 282 + 283 + # Find the embedding field 284 + embedding_field = next(f for f in schema["fields"] if f["name"] == "embedding") 285 + assert embedding_field is not None 286 + # Field type should indicate ndarray 287 + assert "ndarray" in embedding_field["fieldType"]["$type"].lower() 288 + 289 + 290 + ## 291 + # Dataset Operation Tests 292 + 293 + 294 + @pytest.mark.network 295 + @pytest.mark.slow 296 + class TestLiveDatasetOperations: 297 + """Live tests for dataset publishing and retrieval.""" 298 + 299 + def test_publish_dataset_with_urls(self, live_client, unique_name): 300 + """Should publish a dataset record with external URLs.""" 301 + # First publish a schema 302 + @atdata.packable 303 + class DatasetTestSample: 304 + id: int 305 + 306 + DatasetTestSample.__module__ = f"{TEST_PREFIX}.{unique_name}" 307 + 308 + schema_pub = SchemaPublisher(live_client) 309 + schema_uri = schema_pub.publish(DatasetTestSample, version="1.0.0") 310 + 311 + # Now publish dataset with external URLs 312 + dataset_pub = DatasetPublisher(live_client) 313 + dataset_uri = dataset_pub.publish_with_urls( 314 + urls=["https://example.com/test-shard-000000.tar"], 315 + schema_uri=str(schema_uri), 316 + name=unique_name, 317 + description="Test dataset for live integration tests", 318 + ) 319 + 320 + assert dataset_uri is not None 321 + assert "at://" in str(dataset_uri) 322 + 323 + def test_list_datasets(self, live_client): 324 + """Should list published datasets.""" 325 + loader = DatasetLoader(live_client) 326 + datasets = loader.list_all() 327 + 328 + assert isinstance(datasets, list) 329 + 330 + def test_publish_and_retrieve_dataset(self, live_client, unique_name): 331 + """Should publish then retrieve a dataset.""" 332 + @atdata.packable 333 + class RetrievableDatasetSample: 334 + value: int 335 + 336 + RetrievableDatasetSample.__module__ = f"{TEST_PREFIX}.{unique_name}" 337 + 338 + # Publish schema 339 + schema_pub = SchemaPublisher(live_client) 340 + schema_uri = schema_pub.publish(RetrievableDatasetSample, version="1.0.0") 341 + 342 + # Publish dataset 343 + test_urls = [ 344 + "https://example.com/shard-000000.tar", 345 + "https://example.com/shard-000001.tar", 346 + ] 347 + 348 + dataset_pub = DatasetPublisher(live_client) 349 + dataset_uri = dataset_pub.publish_with_urls( 350 + urls=test_urls, 351 + schema_uri=str(schema_uri), 352 + name=unique_name, 353 + description="Retrievable test dataset", 354 + tags=["test", "integration"], 355 + ) 356 + 357 + # Retrieve dataset 358 + loader = DatasetLoader(live_client) 359 + dataset = loader.get(str(dataset_uri)) 360 + 361 + assert dataset is not None 362 + assert dataset["name"] == unique_name 363 + assert dataset["description"] == "Retrievable test dataset" 364 + 365 + 366 + ## 367 + # AtmosphereIndex Tests 368 + 369 + 370 + @pytest.mark.network 371 + @pytest.mark.slow 372 + class TestLiveAtmosphereIndex: 373 + """Live tests for AtmosphereIndex wrapper.""" 374 + 375 + def test_index_list_datasets(self, live_index): 376 + """Should list datasets via AtmosphereIndex.""" 377 + datasets = list(live_index.list_datasets()) 378 + 379 + # Should return iterable of entries 380 + assert isinstance(datasets, list) 381 + 382 + def test_index_publish_schema(self, live_index, unique_name): 383 + """Should publish schema via AtmosphereIndex.""" 384 + @atdata.packable 385 + class IndexTestSample: 386 + data: str 387 + 388 + IndexTestSample.__module__ = f"{TEST_PREFIX}.{unique_name}" 389 + 390 + schema_ref = live_index.publish_schema(IndexTestSample, version="1.0.0") 391 + 392 + assert schema_ref is not None 393 + assert "at://" in str(schema_ref) 394 + 395 + def test_index_get_schema(self, live_index, unique_name): 396 + """Should retrieve schema via AtmosphereIndex.""" 397 + @atdata.packable 398 + class GetSchemaTestSample: 399 + field: int 400 + 401 + GetSchemaTestSample.__module__ = f"{TEST_PREFIX}.{unique_name}" 402 + 403 + schema_ref = live_index.publish_schema(GetSchemaTestSample, version="1.0.0") 404 + schema = live_index.get_schema(str(schema_ref)) 405 + 406 + # Schema name defaults to just the class name (not full module path) 407 + assert schema["name"] == "GetSchemaTestSample" 408 + 409 + 410 + ## 411 + # Error Handling Tests 412 + 413 + 414 + @pytest.mark.network 415 + @pytest.mark.slow 416 + class TestLiveErrorHandling: 417 + """Live tests for error handling with real API.""" 418 + 419 + def test_get_nonexistent_record(self, live_client): 420 + """Should raise on getting non-existent record.""" 421 + loader = SchemaLoader(live_client) 422 + 423 + fake_uri = f"at://{live_client.did}/{LEXICON_NAMESPACE}.sampleSchema/nonexistent12345" 424 + 425 + with pytest.raises(Exception): 426 + loader.get(fake_uri) 427 + 428 + def test_publish_without_auth_raises(self): 429 + """Should raise when publishing without authentication.""" 430 + client = AtmosphereClient() 431 + # Not logged in 432 + 433 + publisher = SchemaPublisher(client) 434 + 435 + with pytest.raises(ValueError, match="authenticated"): 436 + publisher.publish(LiveTestSample, version="1.0.0") 437 + 438 + 439 + ## 440 + # Cleanup Test (runs last) 441 + 442 + 443 + @pytest.mark.network 444 + @pytest.mark.slow 445 + class TestZZZCleanup: 446 + """Cleanup test records. Named ZZZ to run last.""" 447 + 448 + def test_cleanup_test_records(self, live_client): 449 + """Clean up all test records created during this test run.""" 450 + schemas_deleted = cleanup_test_schemas(live_client) 451 + datasets_deleted = cleanup_test_datasets(live_client) 452 + 453 + print(f"\nCleanup: deleted {schemas_deleted} schemas, {datasets_deleted} datasets") 454 + 455 + # Just verify cleanup ran without error 456 + assert True