this repo has no description
13
fork

Configure Feed

Select the types of activity you want to include in your feed.

remove dead file

-349
-349
MIGRATION_ZG_TO_UUCODE.md
··· 1 - # Migration Plan: zg → uucode 2 - 3 - ## Overview 4 - 5 - This document outlines the plan to migrate from the `zg` dependency to `uucode` for grapheme segmentation and display width measurement in libvaxis. 6 - 7 - ## Key Advantage 8 - 9 - **No allocation required** - uucode uses compile-time lookup tables instead of runtime-allocated data structures, eliminating the need to initialize, pass around, and deinitialize Unicode data. 10 - 11 - ## Current zg Usage 12 - 13 - ### Dependencies (from build.zig) 14 - - `code_point` - UTF-8 codepoint iteration 15 - - `Graphemes` - Grapheme cluster segmentation 16 - - `DisplayWidth` - Display width calculation 17 - 18 - ### Files Using zg 19 - - `src/main.zig` - Re-exports `Graphemes` and `DisplayWidth` 20 - - `src/Unicode.zig` - Wrapper around zg data (allocates) 21 - - `src/gwidth.zig` - Width calculation using `DisplayWidth` 22 - - `src/Parser.zig` - Uses `code_point` and `Graphemes` 23 - - `src/Loop.zig` - Uses `Graphemes` 24 - - `src/widgets/TextView.zig` - Uses `Graphemes` and `DisplayWidth` 25 - - `src/widgets/terminal/Terminal.zig` - Uses `code_point` and `DisplayWidth` 26 - 27 - ### Allocation Pattern (zg) 28 - ```zig 29 - // Initialize with allocator 30 - const graphemes = try Graphemes.init(alloc); 31 - defer graphemes.deinit(alloc); 32 - 33 - const width_data = try DisplayWidth.init(alloc); 34 - defer width_data.deinit(alloc); 35 - 36 - // Use 37 - var iter = graphemes.iterator(str); 38 - const width = width_data.codePointWidth(cp); 39 - ``` 40 - 41 - ## uucode API 42 - 43 - ### Available Modules 44 - - `uucode.utf8.Iterator` - UTF-8 codepoint iteration (no allocation) 45 - - `uucode.grapheme.Iterator` - Grapheme cluster iteration (no allocation) 46 - - `uucode.get()` - Compile-time Unicode property lookup (no allocation) 47 - 48 - ### Usage Pattern (uucode) 49 - ```zig 50 - // UTF-8 iteration 51 - var cp_iter = uucode.utf8.Iterator.init(str); 52 - while (cp_iter.next()) |cp| { 53 - // process codepoint 54 - } 55 - 56 - // Grapheme iteration 57 - var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 58 - while (grapheme_iter.next()) |result| { 59 - // result.cp is the codepoint 60 - // result.is_break indicates grapheme boundary 61 - } 62 - 63 - // Width lookup (requires wcwidth field in build config) 64 - const width = uucode.get(.wcwidth, cp); 65 - 66 - // Grapheme width (from uucode.x extension) 67 - const g_width = uucode.x.grapheme.unverifiedWcwidth(grapheme_iter); 68 - ``` 69 - 70 - ### Iterator Result Structure 71 - ```zig 72 - pub const IteratorResult = struct { 73 - cp: u21, // The codepoint 74 - is_break: bool, // true if this is a grapheme cluster boundary 75 - }; 76 - ``` 77 - 78 - ## Migration Steps 79 - 80 - ### 1. Update build.zig.zon 81 - 82 - Add `wcwidth` field to uucode dependency configuration: 83 - 84 - ```zig 85 - .uucode = .{ 86 - .url = "git+https://github.com/jacobsandlund/uucode#5f05f8f83a75caea201f12cc8ea32a2d82ea9732", 87 - .hash = "uucode-0.1.0-ZZjBPj96QADXyt5sqwBJUnhaDYs_qBeeKijZvlRa0eqM", 88 - }, 89 - ``` 90 - 91 - ### 2. Update build.zig 92 - 93 - In the uucode dependency configuration, update the fields array: 94 - 95 - ```zig 96 - const uucode_dep = b.dependency("uucode", .{ 97 - .target = target, 98 - .optimize = optimize, 99 - .fields = @as([]const []const u8, &.{ 100 - "grapheme_break", 101 - "wcwidth", // ADD THIS 102 - }), 103 - }); 104 - ``` 105 - 106 - Remove zg dependency: 107 - - Delete the `zg_dep` declaration 108 - - Remove all `zg_dep.module()` references 109 - - Remove `.zg` from build.zig.zon 110 - 111 - ### 3. Update Module Imports in build.zig 112 - 113 - Replace: 114 - ```zig 115 - vaxis_mod.addImport("code_point", zg_dep.module("code_point")); 116 - vaxis_mod.addImport("Graphemes", zg_dep.module("Graphemes")); 117 - vaxis_mod.addImport("DisplayWidth", zg_dep.module("DisplayWidth")); 118 - ``` 119 - 120 - No replacement needed - uucode is already imported. 121 - 122 - ### 4. Update src/main.zig 123 - 124 - Remove: 125 - ```zig 126 - pub const DisplayWidth = @import("DisplayWidth"); 127 - pub const Graphemes = @import("Graphemes"); 128 - ``` 129 - 130 - These become internal implementation details or are removed entirely. 131 - 132 - ### 5. Update src/Unicode.zig 133 - 134 - **Before:** 135 - ```zig 136 - const Graphemes = @import("Graphemes"); 137 - const DisplayWidth = @import("DisplayWidth"); 138 - 139 - const Unicode = @This(); 140 - 141 - width_data: DisplayWidth, 142 - 143 - pub fn init(alloc: std.mem.Allocator) !Unicode { 144 - return .{ 145 - .width_data = try DisplayWidth.init(alloc), 146 - }; 147 - } 148 - 149 - pub fn deinit(self: *const Unicode, alloc: std.mem.Allocator) void { 150 - self.width_data.deinit(alloc); 151 - } 152 - 153 - pub fn graphemeIterator(self: *const Unicode, str: []const u8) Graphemes.Iterator { 154 - return self.width_data.graphemes.iterator(str); 155 - } 156 - ``` 157 - 158 - **After:** 159 - ```zig 160 - const uucode = @import("uucode"); 161 - 162 - const Unicode = @This(); 163 - 164 - // No fields needed - all operations are stateless 165 - 166 - pub fn init(alloc: std.mem.Allocator) !Unicode { 167 - _ = alloc; 168 - return .{}; 169 - } 170 - 171 - pub fn deinit(self: *const Unicode, alloc: std.mem.Allocator) void { 172 - _ = self; 173 - _ = alloc; 174 - } 175 - 176 - pub fn graphemeIterator(self: *const Unicode, str: []const u8) uucode.grapheme.Iterator(uucode.utf8.Iterator) { 177 - _ = self; 178 - return uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 179 - } 180 - ``` 181 - 182 - Or consider removing the `Unicode` wrapper entirely since it no longer serves a purpose. 183 - 184 - ### 6. Update src/gwidth.zig 185 - 186 - **Before:** 187 - ```zig 188 - const DisplayWidth = @import("DisplayWidth"); 189 - const code_point = @import("code_point"); 190 - 191 - pub fn gwidth(str: []const u8, method: Method, data: *const DisplayWidth) u16 { 192 - switch (method) { 193 - .unicode => { 194 - return @intCast(data.strWidth(str)); 195 - }, 196 - .wcwidth => { 197 - var total: u16 = 0; 198 - var iter: code_point.Iterator = .{ .bytes = str }; 199 - while (iter.next()) |cp| { 200 - const w: u16 = switch (cp.code) { 201 - 0x1f3fb...0x1f3ff => 2, 202 - else => @max(0, data.codePointWidth(cp.code)), 203 - }; 204 - total += w; 205 - } 206 - return total; 207 - }, 208 - // ... 209 - } 210 - } 211 - ``` 212 - 213 - **After:** 214 - ```zig 215 - const uucode = @import("uucode"); 216 - 217 - pub fn gwidth(str: []const u8, method: Method) u16 { 218 - switch (method) { 219 - .unicode => { 220 - var total: u16 = 0; 221 - var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 222 - while (grapheme_iter.next()) |result| { 223 - if (result.is_break) { 224 - // Calculate width for previous grapheme 225 - // This requires buffering the grapheme - may need different approach 226 - } 227 - } 228 - return total; 229 - }, 230 - .wcwidth => { 231 - var total: u16 = 0; 232 - var iter = uucode.utf8.Iterator.init(str); 233 - while (iter.next()) |cp| { 234 - const w: u16 = switch (cp) { 235 - 0x1f3fb...0x1f3ff => 2, 236 - else => @max(0, uucode.get(.wcwidth, cp)), 237 - }; 238 - total += w; 239 - } 240 - return total; 241 - }, 242 - // ... 243 - } 244 - } 245 - ``` 246 - 247 - Note: Remove the `data` parameter entirely. 248 - 249 - ### 7. Update src/Parser.zig 250 - 251 - Replace: 252 - ```zig 253 - const code_point = @import("code_point"); 254 - const Graphemes = @import("Graphemes"); 255 - ``` 256 - 257 - With: 258 - ```zig 259 - const uucode = @import("uucode"); 260 - ``` 261 - 262 - Replace: 263 - ```zig 264 - grapheme_data: *const Graphemes, 265 - ``` 266 - 267 - With: 268 - ```zig 269 - // Remove this field entirely if only used for iteration 270 - ``` 271 - 272 - Replace usage: 273 - ```zig 274 - var iter: code_point.Iterator = .{ .bytes = input }; 275 - ``` 276 - 277 - With: 278 - ```zig 279 - var iter = uucode.utf8.Iterator.init(input); 280 - ``` 281 - 282 - ### 8. Update Other Files 283 - 284 - Apply similar transformations to: 285 - - `src/Loop.zig` 286 - - `src/widgets/TextView.zig` 287 - - `src/widgets/terminal/Terminal.zig` 288 - 289 - Pattern: 290 - 1. Replace imports with `const uucode = @import("uucode");` 291 - 2. Remove allocated data fields 292 - 3. Replace `code_point.Iterator` with `uucode.utf8.Iterator` 293 - 4. Replace `graphemes.iterator()` with `uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str))` 294 - 5. Replace `data.codePointWidth(cp)` with `uucode.get(.wcwidth, cp)` 295 - 296 - ### 9. Update Tests 297 - 298 - All test code that does: 299 - ```zig 300 - const data = try DisplayWidth.init(alloc); 301 - defer data.deinit(alloc); 302 - ``` 303 - 304 - Can be removed entirely. Width lookups become: 305 - ```zig 306 - const width = uucode.get(.wcwidth, cp); 307 - ``` 308 - 309 - ## API Mapping Reference 310 - 311 - | zg API | uucode API | 312 - |--------|------------| 313 - | `code_point.Iterator{ .bytes = str }` | `uucode.utf8.Iterator.init(str)` | 314 - | `iter.next().code` | `iter.next()` (returns u21 directly) | 315 - | `Graphemes.init(alloc)` | _(no initialization needed)_ | 316 - | `graphemes.iterator(str)` | `uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str))` | 317 - | `DisplayWidth.init(alloc)` | _(no initialization needed)_ | 318 - | `width_data.codePointWidth(cp)` | `uucode.get(.wcwidth, cp)` | 319 - | `width_data.strWidth(str)` | _(implement using iterator + uucode.get)_ | 320 - 321 - ## Benefits 322 - 323 - 1. **No allocations** - All Unicode data is compile-time generated 324 - 2. **Simpler API** - No init/deinit lifecycle 325 - 3. **Less state to manage** - No data structures to pass around 326 - 4. **Smaller binary** - Only requested fields are included 327 - 5. **Type-safe lookups** - Field names are compile-time checked 328 - 329 - ## Potential Challenges 330 - 331 - 1. **String width calculation** - zg's `strWidth()` is convenient; need to implement equivalent using iterator 332 - 2. **Grapheme-aware width** - May need `uucode.x.grapheme.unverifiedWcwidth()` for proper emoji/ZWJ handling 333 - 3. **Iterator API differences** - zg returns struct with `.code`, uucode returns `u21` directly 334 - 4. **Breaking API changes** - Any public APIs exposing `Graphemes` or `DisplayWidth` types will need updates 335 - 336 - ## Testing Strategy 337 - 338 - 1. Run existing tests with uucode implementation 339 - 2. Pay special attention to: 340 - - Emoji with ZWJ sequences 341 - - Skin tone modifiers 342 - - Variation selectors 343 - - Complex grapheme clusters 344 - 3. Compare width calculations with zg implementation 345 - 4. Test memory usage (should be lower without allocations) 346 - 347 - ## Rollback Plan 348 - 349 - If issues arise, the zg dependency can be re-added to build.zig.zon and the imports restored. The changes are isolated to a small number of files.