this repo has no description
13
fork

Configure Feed

Select the types of activity you want to include in your feed.

parser: handle graphemes

I'm not positive this is the final approach. We fill in the `text` field
if there was multi-codepoint text generated from the key_press.

Signed-off-by: Tim Culverhouse <tim@timculverhouse.com>

+137 -18
+78
src/GraphemeCache.zig
··· 1 + const std = @import("std"); 2 + const testing = std.testing; 3 + 4 + const GraphemeCache = @This(); 5 + 6 + /// the underlying storage for graphemes 7 + buf: [1024 * 4]u8 = undefined, 8 + 9 + // the start index of the next grapheme 10 + idx: usize = 0, 11 + 12 + /// the cache of graphemes. This allows up to 1024 graphemes with 4 codepoints 13 + /// each 14 + grapheme_buf: [1024]Grapheme = undefined, 15 + 16 + // index of our next grapheme 17 + g_idx: u21 = 0, 18 + 19 + pub const UNICODE_MAX = 1_114_112; 20 + 21 + const Grapheme = struct { 22 + // codepoint is an index into the internal storage 23 + codepoint: u21, 24 + start: usize, 25 + end: usize, 26 + }; 27 + 28 + /// put a slice of bytes in the cache as a grapheme 29 + pub fn put(self: *GraphemeCache, bytes: []const u8) !u21 { 30 + // See if we already have these bytes. It's a likely case that if we get one 31 + // grapheme, we'll get it again. So this will save a lot of storage and is 32 + // most likely worth the cost as it's pretty rare 33 + for (self.grapheme_buf) |grapheme| { 34 + const g_bytes = self.buf[grapheme.start..grapheme.end]; 35 + if (std.mem.eql(u8, g_bytes, bytes)) { 36 + return grapheme.codepoint; 37 + } 38 + } 39 + if (self.idx + bytes.len > self.buf.len) return error.OutOfGraphemeBufferMemory; 40 + if (self.g_idx + 1 > self.grapheme_buf.len) return error.OutOfGraphemeMemory; 41 + 42 + // copy the grapheme to our storage 43 + @memcpy(self.buf[self.idx .. self.idx + bytes.len], bytes); 44 + 45 + const g = Grapheme{ 46 + // assign a codepoint that is always outside of valid unicode 47 + .codepoint = self.g_idx + UNICODE_MAX + 1, 48 + .start = self.idx, 49 + .end = self.idx + bytes.len, 50 + }; 51 + self.grapheme_buf[self.g_idx] = g; 52 + self.g_idx += 1; 53 + self.idx += bytes.len; 54 + 55 + return g.codepoint; 56 + } 57 + 58 + /// get the slice of bytes for a given grapheme 59 + pub fn get(self: *GraphemeCache, cp: u21) ![]const u8 { 60 + if (cp < (UNICODE_MAX + 1)) return error.InvalidGraphemeIndex; 61 + const idx: usize = cp - UNICODE_MAX - 1; 62 + if (idx > self.g_idx) return error.InvalidGraphemeIndex; 63 + const g = self.grapheme_buf[idx]; 64 + return self.buf[g.start..g.end]; 65 + } 66 + 67 + test "GraphemeCache: roundtrip" { 68 + var cache: GraphemeCache = .{}; 69 + const cp = try cache.put("abc"); 70 + const bytes = try cache.get(cp); 71 + try testing.expectEqualStrings("abc", bytes); 72 + 73 + const cp_2 = try cache.put("abc"); 74 + try testing.expectEqual(cp, cp_2); 75 + 76 + const cp_3 = try cache.put("def"); 77 + try testing.expectEqual(cp + 1, cp_3); 78 + }
+1 -2
src/Key.zig
··· 11 11 num_lock: bool = false, 12 12 }; 13 13 14 - /// the unicode codepoint of the key event. This can be greater than the maximum 15 - /// allowable unicode codepoint for special keys 14 + /// the unicode codepoint of the key event. 16 15 codepoint: u21, 17 16 18 17 /// the text generated from the key event, if any
+6 -1
src/Tty.zig
··· 143 143 switch (event) { 144 144 .key_press => |key| { 145 145 if (@hasField(EventType, "key_press")) { 146 - vx.postEvent(.{ .key_press = key }); 146 + // HACK: yuck. there has to be a better way 147 + var mut_key = key; 148 + if (key.text) |text| { 149 + mut_key.codepoint = try vx.g_cache.put(text); 150 + } 151 + vx.postEvent(.{ .key_press = mut_key }); 147 152 } 148 153 }, 149 154 .focus_in => {
+1
src/main.zig
··· 13 13 } 14 14 15 15 test { 16 + _ = @import("GraphemeCache.zig"); 16 17 _ = @import("Key.zig"); 17 18 _ = @import("Options.zig"); 18 19 _ = @import("Screen.zig");
+41 -15
src/parser.zig
··· 4 4 const Key = @import("Key.zig"); 5 5 const CodePointIterator = @import("ziglyph").CodePointIterator; 6 6 const graphemeBreak = @import("ziglyph").graphemeBreak; 7 + const UNICODE_MAX = @import("GraphemeCache.zig").UNICODE_MAX; 7 8 8 9 const log = std.log.scoped(.parser); 9 10 ··· 82 83 // 0x20...0x7E => .{ .codepoint = b }, 83 84 0x7F => .{ .codepoint = Key.backspace }, 84 85 else => blk: { 85 - // TODO: iterate codepoints to find a complete grapheme. 86 - // For now we are just taking the first codepoint and 87 - // throwing a warning. I think we'll end up mapping a 88 - // u21 to a look-aside table of graphemes, I just need 89 - // to implement that table somewhere and give access to 90 - // it here. 91 86 var iter: CodePointIterator = .{ .bytes = input[i..] }; 92 87 // return null if we don't have a valid codepoint 93 - const cp = iter.next() orelse return .{ .event = null, .n = 0 }; 94 - if (iter.next()) |next_cp| { 95 - var break_state: u3 = 0; 96 - if (!graphemeBreak(cp.code, next_cp.code, &break_state)) { 97 - log.warn("grapheme support not implemented yet", .{}); 88 + var cp = iter.next() orelse return .{ .event = null, .n = 0 }; 89 + 90 + var code = cp.code; 91 + const g_start = i; 92 + i += cp.len - 1; // subtract one for the loop iter 93 + var g_state: u3 = 0; 94 + while (iter.next()) |next_cp| { 95 + if (graphemeBreak(cp.code, next_cp.code, &g_state)) { 96 + break; 98 97 } 98 + code = UNICODE_MAX + 1; 99 + i += next_cp.len; 100 + cp = next_cp; 99 101 } 100 - i += cp.len - 1; 101 - break :blk .{ .codepoint = cp.code }; 102 + const text: ?[]const u8 = multi: { 103 + if (code > UNICODE_MAX) { 104 + break :multi input[g_start .. i + 1]; 105 + } else { 106 + break :multi null; 107 + } 108 + }; 109 + 110 + break :blk .{ .codepoint = code, .text = text }; 102 111 }, 103 112 }; 104 113 return .{ ··· 562 571 const input = "👩‍🚀"; 563 572 const result = try parse(input); 564 573 const expected_key: Key = .{ 565 - .codepoint = 0x1F469, 574 + .codepoint = UNICODE_MAX + 1, 575 + .text = input, 566 576 }; 567 577 const expected_event: Event = .{ .key_press = expected_key }; 568 578 569 - try testing.expectEqual(4, result.n); 579 + try testing.expectEqual(input.len, result.n); 570 580 try testing.expectEqual(expected_event, result.event); 571 581 } 582 + 583 + test "parse: multiple codepoint grapheme with more after" { 584 + // TODO: this test is passing but throws a warning. Not sure how we'll 585 + // handle graphemes yet 586 + const input = "👩‍🚀abc"; 587 + const result = try parse(input); 588 + const expected_key: Key = .{ 589 + .codepoint = UNICODE_MAX + 1, 590 + .text = "👩‍🚀", 591 + }; 592 + 593 + try testing.expectEqual(expected_key.text.?.len, result.n); 594 + const actual = result.event.?.key_press; 595 + try testing.expectEqualStrings(expected_key.text.?, actual.text.?); 596 + try testing.expectEqual(expected_key.codepoint, actual.codepoint); 597 + }
+10
src/vaxis.zig
··· 9 9 const Window = @import("Window.zig"); 10 10 const Options = @import("Options.zig"); 11 11 const Style = @import("cell.zig").Style; 12 + const GraphemeCache = @import("GraphemeCache.zig"); 12 13 13 14 /// Vaxis is the entrypoint for a Vaxis application. The provided type T should 14 15 /// be a tagged union which contains all of the events the application will ··· 46 47 renders: usize = 0, 47 48 render_dur: i128 = 0, 48 49 50 + // grapheme cache 51 + g_cache: GraphemeCache = .{}, 52 + 49 53 /// Initialize Vaxis with runtime options 50 54 pub fn init(_: Options) !Self { 51 55 return Self{ ··· 78 82 const tpr = @divTrunc(self.render_dur, self.renders); 79 83 log.info("total renders = {d}", .{self.renders}); 80 84 log.info("microseconds per render = {d}", .{tpr}); 85 + log.info("cached graphemes n = {d} / {d}, bytes = {d} / {d}", .{ 86 + self.g_cache.g_idx, 87 + self.g_cache.grapheme_buf.len, 88 + self.g_cache.idx, 89 + self.g_cache.buf.len, 90 + }); 81 91 } 82 92 } 83 93