this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add gzip to tarfile store

Paul Frazee 37766c62 997d08e0

+48 -21
+44 -17
cmd/butterfly/store/tarfiles.go
··· 4 4 import ( 5 5 "archive/tar" 6 6 "bytes" 7 + "compress/gzip" 7 8 "context" 8 9 "encoding/json" 9 10 "fmt" ··· 17 18 "github.com/bluesky-social/indigo/cmd/butterfly/remote" 18 19 ) 19 20 20 - // TarfilesStore implements Store by writing repository data to tar files 21 + // TarfilesStore implements Store by writing repository data to gzipped tar files 21 22 type TarfilesStore struct { 22 - // The directory to store the .tar files 23 - // Each repository is stored as a single .tar file 24 - // The contents of the .tar file is a collection of json files 23 + // The directory to store the .tar.gz files 24 + // Each repository is stored as a single .tar.gz file 25 + // The contents of the .tar.gz file is a collection of json files 25 26 // The directory structure is based on the collections 26 27 Dirpath string 27 28 ··· 34 35 // tarWriter manages writing to a single tar file 35 36 type tarWriter struct { 36 37 file *os.File 38 + gzipWriter *gzip.Writer 37 39 writer *tar.Writer 38 40 entries map[string]bool // Track existing entries 39 41 tempFile string ··· 150 152 151 153 // Sanitize DID for filename 152 154 filename := strings.ReplaceAll(did, ":", "_") 153 - finalPath := filepath.Join(t.Dirpath, filename+".tar") 154 - tempPath := filepath.Join(t.tempDir, filename+".tar.tmp") 155 + finalPath := filepath.Join(t.Dirpath, filename+".tar.gz") 156 + tempPath := filepath.Join(t.tempDir, filename+".tar.gz.tmp") 155 157 156 158 // Check if tar file already exists and load entries 157 159 entries := make(map[string]bool) ··· 167 169 return nil, fmt.Errorf("failed to create tar file: %w", err) 168 170 } 169 171 170 - // Create tar writer first 171 - newTarWriter := tar.NewWriter(file) 172 + // Create gzip writer first 173 + gzipWriter := gzip.NewWriter(file) 174 + // Create tar writer on top of gzip writer 175 + newTarWriter := tar.NewWriter(gzipWriter) 172 176 173 177 // If we had existing entries, copy them to the new tar 174 178 if len(entries) > 0 { 175 179 if err := t.copyExistingTarEntries(finalPath, newTarWriter, entries); err != nil { 176 180 newTarWriter.Close() 181 + gzipWriter.Close() 177 182 file.Close() 178 183 os.Remove(tempPath) 179 184 return nil, fmt.Errorf("failed to copy existing tar: %w", err) ··· 181 186 } 182 187 183 188 tw := &tarWriter{ 184 - file: file, 185 - writer: newTarWriter, 186 - entries: entries, 187 - tempFile: tempPath, 188 - finalFile: finalPath, 189 + file: file, 190 + gzipWriter: gzipWriter, 191 + writer: newTarWriter, 192 + entries: entries, 193 + tempFile: tempPath, 194 + finalFile: finalPath, 189 195 } 190 196 191 197 t.writers[did] = tw ··· 200 206 } 201 207 defer file.Close() 202 208 203 - reader := tar.NewReader(file) 209 + gzipReader, err := gzip.NewReader(file) 210 + if err != nil { 211 + return err 212 + } 213 + defer gzipReader.Close() 214 + 215 + reader := tar.NewReader(gzipReader) 204 216 for { 205 217 header, err := reader.Next() 206 218 if err == io.EOF { ··· 222 234 } 223 235 defer src.Close() 224 236 225 - reader := tar.NewReader(src) 237 + gzipReader, err := gzip.NewReader(src) 238 + if err != nil { 239 + return err 240 + } 241 + defer gzipReader.Close() 242 + 243 + reader := tar.NewReader(gzipReader) 226 244 227 245 for { 228 246 header, err := reader.Next() ··· 271 289 if err := tw.writer.Close(); err != nil { 272 290 return fmt.Errorf("failed to close tar writer: %w", err) 273 291 } 292 + if err := tw.gzipWriter.Close(); err != nil { 293 + return fmt.Errorf("failed to close gzip writer: %w", err) 294 + } 274 295 if err := tw.file.Close(); err != nil { 275 296 return fmt.Errorf("failed to close file: %w", err) 276 297 } ··· 283 304 return nil 284 305 } 285 306 286 - // ReadTarFile reads a tar file and returns its contents (for debugging/testing) 307 + // ReadTarFile reads a gzipped tar file and returns its contents (for debugging/testing) 287 308 func ReadTarFile(path string) (map[string][]byte, error) { 288 309 file, err := os.Open(path) 289 310 if err != nil { ··· 291 312 } 292 313 defer file.Close() 293 314 315 + gzipReader, err := gzip.NewReader(file) 316 + if err != nil { 317 + return nil, err 318 + } 319 + defer gzipReader.Close() 320 + 294 321 contents := make(map[string][]byte) 295 - reader := tar.NewReader(file) 322 + reader := tar.NewReader(gzipReader) 296 323 297 324 for { 298 325 header, err := reader.Next()
+4 -4
cmd/butterfly/store/tarfiles_test.go
··· 122 122 require.NoError(t, err) 123 123 124 124 // Verify the tar file was created 125 - expectedFile := filepath.Join(tmpDir, "did_plc_testuser123.tar") 125 + expectedFile := filepath.Join(tmpDir, "did_plc_testuser123.tar.gz") 126 126 assert.FileExists(t, expectedFile) 127 127 128 128 // Read and verify the tar contents ··· 196 196 197 197 // Verify tar files were created for each DID 198 198 for _, did := range testDIDs { 199 - filename := strings.ReplaceAll(did, ":", "_") + ".tar" 199 + filename := strings.ReplaceAll(did, ":", "_") + ".tar.gz" 200 200 expectedFile := filepath.Join(tmpDir, filename) 201 201 assert.FileExists(t, expectedFile) 202 202 } ··· 310 310 } 311 311 312 312 // Verify both records exist in the tar file 313 - expectedFile := filepath.Join(tmpDir, "did_plc_testuser.tar") 313 + expectedFile := filepath.Join(tmpDir, "did_plc_testuser.tar.gz") 314 314 contents, err := ReadTarFile(expectedFile) 315 315 require.NoError(t, err) 316 316 ··· 388 388 require.NoError(t, err) 389 389 390 390 // Verify only valid events were processed 391 - expectedFile := filepath.Join(tmpDir, "did_plc_testuser.tar") 391 + expectedFile := filepath.Join(tmpDir, "did_plc_testuser.tar.gz") 392 392 contents, err := ReadTarFile(expectedFile) 393 393 require.NoError(t, err) 394 394