this repo has no description
1const std = @import("std");
2const uucode = @import("uucode");
3
4/// A thin wrapper around Unicode data - no longer needs allocation with uucode
5const Unicode = @This();
6
7/// initialize all unicode data vaxis may possibly need
8/// With uucode, no initialization is needed but we keep this for API compatibility
9pub fn init(alloc: std.mem.Allocator) !Unicode {
10 _ = alloc;
11 return .{};
12}
13
14/// free all data
15/// With uucode, no deinitialization is needed but we keep this for API compatibility
16pub fn deinit(self: *const Unicode, alloc: std.mem.Allocator) void {
17 _ = self;
18 _ = alloc;
19}
20
21// Old API-compatible Grapheme value
22pub const Grapheme = struct {
23 start: usize,
24 len: usize,
25
26 pub fn bytes(self: Grapheme, str: []const u8) []const u8 {
27 return str[self.start .. self.start + self.len];
28 }
29};
30
31// Old API-compatible iterator that yields Grapheme with .len and .bytes()
32pub const GraphemeIterator = struct {
33 str: []const u8,
34 inner: uucode.grapheme.Iterator(uucode.utf8.Iterator),
35 start: usize = 0,
36 prev_break: bool = true,
37
38 pub fn init(str: []const u8) GraphemeIterator {
39 return .{
40 .str = str,
41 .inner = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)),
42 };
43 }
44
45 pub fn next(self: *GraphemeIterator) ?Grapheme {
46 while (self.inner.next()) |res| {
47 // When leaving a break and entering a non-break, set the start of a cluster
48 if (self.prev_break and !res.is_break) {
49 const cp_len: usize = std.unicode.utf8CodepointSequenceLength(res.cp) catch 1;
50 self.start = self.inner.i - cp_len;
51 }
52
53 // A break marks the end of the current grapheme
54 if (res.is_break) {
55 const end = self.inner.i;
56 const s = self.start;
57 self.start = end;
58 self.prev_break = true;
59 return .{ .start = s, .len = end - s };
60 }
61
62 self.prev_break = false;
63 }
64
65 // Flush the last grapheme if we ended mid-cluster
66 if (!self.prev_break and self.start < self.str.len) {
67 const s = self.start;
68 const len = self.str.len - s;
69 self.start = self.str.len;
70 self.prev_break = true;
71 return .{ .start = s, .len = len };
72 }
73
74 return null;
75 }
76};
77
78/// creates a grapheme iterator based on str
79pub fn graphemeIterator(str: []const u8) GraphemeIterator {
80 return GraphemeIterator.init(str);
81}