this repo has no description
13
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix: treat non-ASCII bytes as word characters to avoid splitting UTF-8

isWordChar now treats bytes >= 0x80 as word constituents, preventing
word motion from splitting inside multi-byte UTF-8 sequences like
accented characters. Added regression tests with café/über text.

authored by

Mike Bannister and committed by
Tim Culverhouse
c4eeee81 c09eb895

+61 -6
+29 -3
src/vxfw/TextField.zig
··· 346 346 self.buf.growGapRight(grapheme.len); 347 347 } 348 348 349 - /// Returns true if the byte is a word constituent (alnum or underscore), 350 - /// matching readline/emacs word character classes. 349 + /// Returns true if the byte is a word constituent: ASCII alnum, underscore, 350 + /// or any non-ASCII byte (part of a multi-byte UTF-8 sequence). This ensures 351 + /// word motion never splits inside non-ASCII characters like accented letters. 351 352 fn isWordChar(c: u8) bool { 352 - return std.ascii.isAlphanumeric(c) or c == '_'; 353 + return std.ascii.isAlphanumeric(c) or c == '_' or c >= 0x80; 353 354 } 354 355 355 356 /// Moves the cursor backward by one word using character-class boundaries. ··· 719 720 input.moveBackwardWordwise(); 720 721 try std.testing.expectEqualStrings("hello_world-", input.buf.firstHalf()); 721 722 input.moveBackwardWordwise(); 723 + try std.testing.expectEqualStrings("", input.buf.firstHalf()); 724 + } 725 + 726 + test "word motion with non-ASCII text" { 727 + var input = TextField.init(std.testing.allocator); 728 + defer input.deinit(); 729 + try input.insertSliceAtCursor("café-latte"); 730 + input.moveBackwardWordwise(); 731 + try std.testing.expectEqualStrings("café-", input.buf.firstHalf()); 732 + try std.testing.expectEqualStrings("latte", input.buf.secondHalf()); 733 + input.moveBackwardWordwise(); 734 + try std.testing.expectEqualStrings("", input.buf.firstHalf()); 735 + 736 + input.moveForwardWordwise(); 737 + try std.testing.expectEqualStrings("caf\xc3\xa9", input.buf.firstHalf()); 738 + try std.testing.expectEqualStrings("-latte", input.buf.secondHalf()); 739 + } 740 + 741 + test "deleteWordBefore with non-ASCII text" { 742 + var input = TextField.init(std.testing.allocator); 743 + defer input.deinit(); 744 + try input.insertSliceAtCursor("über-cool"); 745 + input.deleteWordBefore(); 746 + try std.testing.expectEqualStrings("über-", input.buf.firstHalf()); 747 + input.deleteWordBefore(); 722 748 try std.testing.expectEqualStrings("", input.buf.firstHalf()); 723 749 } 724 750
+32 -3
src/widgets/TextInput.zig
··· 265 265 self.buf.growGapRight(grapheme.len); 266 266 } 267 267 268 - /// Returns true if the byte is a word constituent (alnum or underscore), 269 - /// matching readline/emacs word character classes. 268 + /// Returns true if the byte is a word constituent: ASCII alnum, underscore, 269 + /// or any non-ASCII byte (part of a multi-byte UTF-8 sequence). This ensures 270 + /// word motion never splits inside non-ASCII characters like accented letters. 270 271 fn isWordChar(c: u8) bool { 271 - return std.ascii.isAlphanumeric(c) or c == '_'; 272 + return std.ascii.isAlphanumeric(c) or c == '_' or c >= 0x80; 272 273 } 273 274 274 275 /// Moves the cursor backward by one word using character-class boundaries. ··· 573 574 try std.testing.expectEqualStrings("hello_world-", input.buf.firstHalf()); 574 575 input.moveBackwardWordwise(); 575 576 // "hello_world" is one word (underscore is word char): "|hello_world-test" 577 + try std.testing.expectEqualStrings("", input.buf.firstHalf()); 578 + } 579 + 580 + test "word motion with non-ASCII text" { 581 + var input = TextInput.init(std.testing.allocator); 582 + defer input.deinit(); 583 + // "café-latte" — the é is multi-byte UTF-8, should not split inside it 584 + try input.insertSliceAtCursor("café-latte"); 585 + input.moveBackwardWordwise(); 586 + try std.testing.expectEqualStrings("café-", input.buf.firstHalf()); 587 + try std.testing.expectEqualStrings("latte", input.buf.secondHalf()); 588 + input.moveBackwardWordwise(); 589 + try std.testing.expectEqualStrings("", input.buf.firstHalf()); 590 + 591 + // Forward from start 592 + input.moveForwardWordwise(); 593 + // Should stop at end of "café" 594 + try std.testing.expectEqualStrings("caf\xc3\xa9", input.buf.firstHalf()); 595 + try std.testing.expectEqualStrings("-latte", input.buf.secondHalf()); 596 + } 597 + 598 + test "deleteWordBefore with non-ASCII text" { 599 + var input = TextInput.init(std.testing.allocator); 600 + defer input.deinit(); 601 + try input.insertSliceAtCursor("über-cool"); 602 + input.deleteWordBefore(); 603 + try std.testing.expectEqualStrings("über-", input.buf.firstHalf()); 604 + input.deleteWordBefore(); 576 605 try std.testing.expectEqualStrings("", input.buf.firstHalf()); 577 606 } 578 607