A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactor(local): improve type annotations and remove obsolete code

- Fix BasicIndexEntry.uuid type (str | None → str, always has default)
- Add type: ignore[arg-type] for redis.hset with explanatory comment
- Add explicit type casts for redis.hgetall return values
- Handle metadata_url optional field with proper typing
- Remove 40+ lines of commented-out s3fs mount code
- Remove obsolete TODO comments about redis-py typing issues

All type checker warnings resolved while maintaining runtime correctness.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

+10 -49
.chainlink/issues.db

This is a binary file and will not be displayed.

+10 -49
src/atdata/local.py
··· 118 118 metadata_url: str | None 119 119 """S3 URL to the dataset's metadata msgpack file, if any.""" 120 120 121 - uuid: str | None = field( default_factory = lambda: str( uuid4() ) ) 121 + uuid: str = field( default_factory = lambda: str( uuid4() ) ) 122 122 """Unique identifier for this dataset entry. Defaults to a new UUID if not provided.""" 123 123 124 124 def write_to( self, redis: Redis ): ··· 132 132 save_key = f'BasicIndexEntry:{self.uuid}' 133 133 # Filter out None values - Redis doesn't accept None 134 134 data = {k: v for k, v in asdict(self).items() if v is not None} 135 - # TODO figure out how to get linting to work correctly here 136 - redis.hset( save_key, mapping = data ) 135 + # redis-py typing uses untyped dict, so type checker complains about dict[str, Any] 136 + redis.hset( save_key, mapping = data ) # type: ignore[arg-type] 137 137 138 138 def _s3_env( credentials_path: str | Path ) -> dict[str, Any]: 139 139 """Load S3 credentials from a .env file. ··· 373 373 for sample in ds.ordered( batch_size = None ): 374 374 sink.write( sample.as_wds ) 375 375 376 - # with TemporaryDirectory() as tmpdir: 377 - 378 - # # Mount S3 filesystem 379 - # mount_path = Path( tmpdir ) / 'atdata-s3' / self.hive_bucket 380 - # mount_path.mkdir( parents = True, exist_ok = True ) 381 - # s3fs_cmd = shutil.which( 's3fs' ) 382 - # mount_cmd = [ 383 - # s3fs_cmd, 384 - # self.hive_bucket, 385 - # mount_path.as_posix() 386 - # ] 387 - # result = subprocess.run( mount_cmd, env = self.s3_credentials ) 388 - # print( result ) 389 - 390 - # new_uuid = str( uuid4() ) 391 - 392 - # # Write metadata 393 - # metadata_path = ( 394 - # mount_path 395 - # / 'metadata' 396 - # / f'atdata-metadata--{new_uuid}.msgpack' 397 - # ) 398 - # metadata_path.parent.mkdir( parents = True, exist_ok = True ) 399 - # with open( metadata_path, 'wb' ) as f: 400 - # if ds.metadata is not None: 401 - # # TODO Figure out how to make linting work better here 402 - # f.write( msgpack.packb( ds.metadata ) ) 403 - 404 - # # Write data 405 - # shard_pattern = (Path( tmpdir ) / 'atdata-cache' / f'atdata--{new_uuid}--%06d.tar').as_posix() 406 - # written_shards = [] 407 - # with wds.writer.ShardWriter( shard_pattern, 408 - # opener = lambda s: 409 - # post = lambda s: written_shards.append( s ), 410 - # **kwargs 411 - # ) as sink: 412 - # for sample in ds.ordered( batch_size = None ): 413 - # sink.write( sample.as_wds ) 414 - 415 376 # Make a new Dataset object for the written dataset copy 416 377 if len( written_shards ) == 0: 417 378 raise RuntimeError( 'Cannot form new dataset entry -- did not write any shards' ) ··· 472 433 if redis is not None: 473 434 self._redis = redis 474 435 else: 475 - self._redis = Redis( **kwargs ) 476 - 477 - # needed before we can do anything with `redis` 478 - # TODO this only works / is necessary for `redis_om`` 479 - # Migrator().run() 436 + self._redis: Redis = Redis( **kwargs ) 480 437 481 438 @property 482 439 def all_entries( self ) -> list[BasicIndexEntry]: ··· 498 455 """ 499 456 ## 500 457 for key in self._redis.scan_iter( match = 'BasicIndexEntry:*' ): 501 - # TODO typing issue for `redis` 502 - cur_entry_data = _decode_bytes_dict( self._redis.hgetall( key ) ) 458 + # hgetall returns dict[bytes, bytes] which we decode to dict[str, str] 459 + cur_entry_data = _decode_bytes_dict( cast(dict[bytes, bytes], self._redis.hgetall( key )) ) 460 + 503 461 # Provide default None for optional fields that may be missing 462 + # Type checker complains about None in dict[str, str], but BasicIndexEntry accepts it 463 + cur_entry_data: dict[str, Any] = dict( **cur_entry_data ) 504 464 cur_entry_data.setdefault('metadata_url', None) 465 + 505 466 cur_entry = BasicIndexEntry( **cur_entry_data ) 506 467 yield cur_entry 507 468