A file-based task manager
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Length-prefix property value blobs

Property values were stored as one-per-line in a blob, so any value
containing a newline was silently corrupted. Switch the on-disk format
to size-prefixed blocks: `size: N\n<N bytes>\n` per value, mirroring
the patch wire format. Values may now contain any bytes, including
newlines.

The reader detects the new format via the `size: ` prefix and falls
back to legacy line-split decoding for older blobs, so existing
workspaces keep reading. New writes always use the new format, and
`tsk fix-up` gains a one-shot pass that re-saves every task to migrate
its property tree onto the new encoding.

Per-key property indices (refs/tsk/properties/*) use the same codec
under the hood.

Higher-level value typing (Date / TaskRef / Enum) is intentionally
deferred — values stay as plain strings; any further parsing belongs
in the caller.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

+146 -10
+3
src/lib.rs
··· 4 4 mod merge; 5 5 mod object; 6 6 mod patch; 7 + mod propvalue; 7 8 mod properties; 8 9 mod queue; 9 10 mod task; ··· 372 373 let ws = Workspace::from_path(dir)?; 373 374 let n = ws.backfill_status()?; 374 375 println!("backfill-status: set status=open on {n} task(s)"); 376 + let m = ws.migrate_property_encoding()?; 377 + println!("migrate-property-encoding: rewrote {m} task(s)"); 375 378 Ok(()) 376 379 } 377 380 Commands::GitSetup { remote } => {
+4 -4
src/object.rs
··· 9 9 //! <prop-key> → blob: property value (one file per property) 10 10 11 11 use crate::errors::Result; 12 + use crate::propvalue; 12 13 use git2::{Oid, Repository, Signature}; 13 14 use std::collections::BTreeMap; 14 15 use std::fmt::Display; ··· 84 85 if k == CONTENT_FILE || k == TITLE_FILE { 85 86 continue; 86 87 } 87 - let body: String = values.iter().map(|v| format!("{v}\n")).collect(); 88 - let oid = repo.blob(body.as_bytes())?; 88 + let body = propvalue::encode(values); 89 + let oid = repo.blob(&body)?; 89 90 tb.insert(k.as_str(), oid, 0o100644)?; 90 91 } 91 92 Ok(tb.write()?) ··· 156 157 CONTENT_FILE => task.content = val, 157 158 TITLE_FILE => {} // cache only; canonical title is content's first line 158 159 _ => { 159 - let values: Vec<String> = 160 - val.lines().map(str::to_string).collect(); 160 + let values = propvalue::decode(blob.content()); 161 161 task.properties.insert(name, values); 162 162 } 163 163 }
+4 -6
src/properties.rs
··· 9 9 10 10 use crate::errors::Result; 11 11 use crate::object::StableId; 12 + use crate::propvalue; 12 13 use git2::{Oid, Repository, Signature}; 13 14 use std::collections::BTreeMap; 14 15 ··· 37 38 for entry in tree.iter() { 38 39 let Some(name) = entry.name() else { continue }; 39 40 let blob = entry.to_object(repo)?.peel_to_blob()?; 40 - let values: Vec<String> = String::from_utf8_lossy(blob.content()) 41 - .lines() 42 - .map(str::to_string) 43 - .collect(); 41 + let values = propvalue::decode(blob.content()); 44 42 out.insert(StableId(name.to_string()), values); 45 43 } 46 44 Ok(out) ··· 61 59 } 62 60 let mut tb = repo.treebuilder(None)?; 63 61 for (stable, values) in entries { 64 - let body: String = values.iter().map(|v| format!("{v}\n")).collect(); 65 - let oid = repo.blob(body.as_bytes())?; 62 + let body = propvalue::encode(values); 63 + let oid = repo.blob(&body)?; 66 64 tb.insert(stable.0.as_str(), oid, 0o100644)?; 67 65 } 68 66 let tree_oid: Oid = tb.write()?;
+108
src/propvalue.rs
··· 1 + //! Encoding for property value blobs. 2 + //! 3 + //! Each value is prefixed with `size: N\n` and followed by exactly `N` 4 + //! bytes plus a trailing `\n` separator. Length-prefix means values may 5 + //! contain any bytes (including newlines) without escaping. 6 + //! 7 + //! Reading is forgiving: blobs that don't start with `size: ` are decoded 8 + //! using the legacy line-split format (one value per non-empty line). New 9 + //! writes always use the size-prefix format, so legacy blobs migrate 10 + //! automatically on the next save. 11 + 12 + /// Serialize a list of values as length-prefixed blocks. 13 + pub fn encode(values: &[String]) -> Vec<u8> { 14 + let mut out = Vec::new(); 15 + for v in values { 16 + out.extend_from_slice(format!("size: {}\n", v.len()).as_bytes()); 17 + out.extend_from_slice(v.as_bytes()); 18 + out.push(b'\n'); 19 + } 20 + out 21 + } 22 + 23 + /// Parse a property value blob. 24 + pub fn decode(bytes: &[u8]) -> Vec<String> { 25 + if bytes.starts_with(b"size: ") { 26 + return decode_size_prefixed(bytes); 27 + } 28 + String::from_utf8_lossy(bytes) 29 + .lines() 30 + .filter(|l| !l.is_empty()) 31 + .map(str::to_string) 32 + .collect() 33 + } 34 + 35 + fn decode_size_prefixed(bytes: &[u8]) -> Vec<String> { 36 + let mut out = Vec::new(); 37 + let mut rest = bytes; 38 + while !rest.is_empty() { 39 + let Some(nl) = rest.iter().position(|b| *b == b'\n') else { 40 + break; 41 + }; 42 + let header = std::str::from_utf8(&rest[..nl]).unwrap_or(""); 43 + let Some(size_str) = header.strip_prefix("size: ") else { 44 + break; 45 + }; 46 + let Ok(size) = size_str.parse::<usize>() else { 47 + break; 48 + }; 49 + rest = &rest[nl + 1..]; 50 + if rest.len() < size + 1 { 51 + break; 52 + } 53 + let value = String::from_utf8_lossy(&rest[..size]).into_owned(); 54 + out.push(value); 55 + rest = &rest[size + 1..]; 56 + } 57 + out 58 + } 59 + 60 + #[cfg(test)] 61 + mod test { 62 + use super::*; 63 + 64 + #[test] 65 + fn round_trip_simple_values() { 66 + let values = vec!["high".to_string(), "alpha".to_string()]; 67 + let bytes = encode(&values); 68 + assert_eq!(decode(&bytes), values); 69 + } 70 + 71 + #[test] 72 + fn round_trip_with_embedded_newline() { 73 + let values = vec!["line1\nline2\n\nline4".to_string(), "next".to_string()]; 74 + let bytes = encode(&values); 75 + assert_eq!(decode(&bytes), values); 76 + } 77 + 78 + #[test] 79 + fn round_trip_empty_string_value() { 80 + let values = vec!["".to_string(), "non-empty".to_string()]; 81 + let bytes = encode(&values); 82 + assert_eq!(decode(&bytes), values); 83 + } 84 + 85 + #[test] 86 + fn empty_list_encodes_to_empty_blob() { 87 + assert_eq!(encode(&[]), Vec::<u8>::new()); 88 + assert!(decode(&[]).is_empty()); 89 + } 90 + 91 + #[test] 92 + fn legacy_line_split_format_still_decodes() { 93 + let legacy = b"high\nalpha\nbeta\n"; 94 + assert_eq!( 95 + decode(legacy), 96 + vec!["high".to_string(), "alpha".to_string(), "beta".to_string()] 97 + ); 98 + } 99 + 100 + #[test] 101 + fn legacy_format_skips_blank_lines() { 102 + let legacy = b"high\n\nalpha\n"; 103 + assert_eq!( 104 + decode(legacy), 105 + vec!["high".to_string(), "alpha".to_string()] 106 + ); 107 + } 108 + }
+27
src/workspace.rs
··· 523 523 Ok(updated) 524 524 } 525 525 526 + /// Re-save every task in the active namespace whose property blobs are 527 + /// in the legacy line-split encoding, rewriting them as size-prefixed. 528 + /// Returns the number of tasks rewritten. Idempotent — already-migrated 529 + /// tasks have a tree that matches the rewrite, so `object::update` 530 + /// no-ops for them. 531 + pub fn migrate_property_encoding(&self) -> Result<usize> { 532 + let repo = self.repo()?; 533 + let ns = namespace::read(&repo, &self.namespace())?; 534 + let mut rewritten = 0usize; 535 + for (human, _stable) in ns.mapping.iter() { 536 + let task = self.task(TaskIdentifier::Id(Id(*human)))?; 537 + let head_before = repo 538 + .find_reference(&task.stable.refname()) 539 + .ok() 540 + .and_then(|r| r.target()); 541 + self.save_task(&task)?; 542 + let head_after = repo 543 + .find_reference(&task.stable.refname()) 544 + .ok() 545 + .and_then(|r| r.target()); 546 + if head_before != head_after { 547 + rewritten += 1; 548 + } 549 + } 550 + Ok(rewritten) 551 + } 552 + 526 553 /// Drop a task from the active queue and mark it `status=done`. The 527 554 /// namespace binding is kept so the task remains addressable by its 528 555 /// human id (and discoverable via `tsk prop find status done`); the