A loose federation of distributed, typed datasets
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

copied over changes from prototyping branch

+19 -6
+19 -6
src/atdata/local.py
··· 228 228 print( 'Deleting local cache file ...', end = '' ) 229 229 os.remove( local_cache_path ) 230 230 print( ' done.' ) 231 + 232 + written_shards.append( s ) 231 233 writer_post = _writer_post 232 234 233 235 else: ··· 285 287 # sink.write( sample.as_wds ) 286 288 287 289 # Make a new Dataset object for the written dataset copy 288 - shard_s3_format = ( 289 - ( 290 + if len( written_shards ) == 0: 291 + raise RuntimeError( 'Cannot form new dataset entry -- did not write any shards' ) 292 + 293 + elif len( written_shards ) < 2: 294 + new_dataset_url = ( 290 295 self.hive_path 291 - / f'atdata--{new_uuid}' 296 + / ( Path( written_shards[0] ).name ) 292 297 ).as_posix() 293 - ) + '--{shard_id}.tar' 294 - shard_id_braced = '{' + f'{0:06d}..{len( written_shards ) - 1:06d}' + '}' 298 + 299 + else: 300 + shard_s3_format = ( 301 + ( 302 + self.hive_path 303 + / f'atdata--{new_uuid}' 304 + ).as_posix() 305 + ) + '--{shard_id}.tar' 306 + shard_id_braced = '{' + f'{0:06d}..{len( written_shards ) - 1:06d}' + '}' 307 + new_dataset_url = shard_s3_format.format( shard_id = shard_id_braced ) 295 308 296 309 new_dataset = Dataset[ds.sample_type]( 297 - url = shard_s3_format.format( shard_id = shard_id_braced ), 310 + url = new_dataset_url, 298 311 metadata_url = metadata_path.as_posix(), 299 312 ) 300 313