this repo has no description
1const std = @import("std");
2const uucode = @import("uucode");
3
4// Old API-compatible Grapheme value
5pub const Grapheme = struct {
6 start: usize,
7 len: usize,
8
9 pub fn bytes(self: Grapheme, str: []const u8) []const u8 {
10 return str[self.start .. self.start + self.len];
11 }
12};
13
14// Old API-compatible iterator that yields Grapheme with .len and .bytes()
15pub const GraphemeIterator = struct {
16 str: []const u8,
17 inner: uucode.grapheme.Iterator(uucode.utf8.Iterator),
18 start: usize = 0,
19 prev_break: bool = true,
20
21 pub fn init(str: []const u8) GraphemeIterator {
22 return .{
23 .str = str,
24 .inner = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)),
25 };
26 }
27
28 pub fn next(self: *GraphemeIterator) ?Grapheme {
29 while (self.inner.nextCodePoint()) |res| {
30
31 // When leaving a break and entering a non-break, set the start of a cluster
32 if (self.prev_break and !res.is_break) {
33 const cp_len: usize = std.unicode.utf8CodepointSequenceLength(res.code_point) catch 1;
34 self.start = self.inner.i - cp_len;
35 }
36
37 // A break marks the end of the current grapheme
38 if (res.is_break) {
39 const end = self.inner.i;
40 const s = self.start;
41 self.start = end;
42 self.prev_break = true;
43 return .{ .start = s, .len = end - s };
44 }
45
46 self.prev_break = false;
47 }
48
49 // Flush the last grapheme if we ended mid-cluster
50 if (!self.prev_break and self.start < self.str.len) {
51 const s = self.start;
52 const len = self.str.len - s;
53 self.start = self.str.len;
54 self.prev_break = true;
55 return .{ .start = s, .len = len };
56 }
57
58 return null;
59 }
60};
61
62/// creates a grapheme iterator based on str
63pub fn graphemeIterator(str: []const u8) GraphemeIterator {
64 return GraphemeIterator.init(str);
65}
66
67test {
68 std.testing.refAllDecls(@This());
69}