forked from
stavola.xyz/mlf
A human-friendly DSL for ATProto Lexicons
1use mlf_lang::ast::*;
2use serde_json::Value as JsonValue;
3use std::fmt;
4use unicode_segmentation::UnicodeSegmentation;
5use regex::Regex;
6use url::Url;
7use time::format_description::well_known::Rfc3339;
8use time::OffsetDateTime;
9use langtag::LangTag;
10
11#[derive(Debug, Clone)]
12pub struct ValidationError {
13 pub path: String,
14 pub message: String,
15}
16
17impl fmt::Display for ValidationError {
18 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
19 write!(f, "{}: {}", self.path, self.message)
20 }
21}
22
23impl std::error::Error for ValidationError {}
24
25pub struct RecordValidator<'a> {
26 lexicon: &'a Lexicon,
27}
28
29impl<'a> RecordValidator<'a> {
30 pub fn new(lexicon: &'a Lexicon) -> Self {
31 Self { lexicon }
32 }
33
34 pub fn validate_record(&self, record: &JsonValue) -> Result<(), Vec<ValidationError>> {
35 let mut errors = Vec::new();
36
37 // Find the main record definition
38 let main_item = self.find_main_item()?;
39
40 match main_item {
41 Item::Record(record_def) => {
42 // Validate as object with the record's fields
43 self.validate_object(record, &record_def.fields, "$", &mut errors);
44 }
45 Item::Query(_) | Item::Procedure(_) => {
46 errors.push(ValidationError {
47 path: "$".to_string(),
48 message: "Cannot validate records against query/procedure definitions".to_string(),
49 });
50 }
51 _ => {
52 errors.push(ValidationError {
53 path: "$".to_string(),
54 message: "No record definition found in lexicon".to_string(),
55 });
56 }
57 }
58
59 if errors.is_empty() {
60 Ok(())
61 } else {
62 Err(errors)
63 }
64 }
65
66 fn find_main_item(&self) -> Result<&Item, Vec<ValidationError>> {
67 // Look for a record, query, or procedure (main definitions)
68 for item in &self.lexicon.items {
69 match item {
70 Item::Record(_) | Item::Query(_) | Item::Procedure(_) => {
71 return Ok(item);
72 }
73 _ => continue,
74 }
75 }
76 Err(vec![ValidationError {
77 path: "$".to_string(),
78 message: "No main definition found in lexicon".to_string(),
79 }])
80 }
81
82 fn validate_against_type(
83 &self,
84 value: &JsonValue,
85 ty: &Type,
86 path: &str,
87 errors: &mut Vec<ValidationError>,
88 ) {
89 match ty {
90 Type::Primitive { kind, .. } => {
91 self.validate_primitive(value, *kind, path, errors);
92 }
93 Type::Constrained { base, constraints, .. } => {
94 self.validate_against_type(value, base, path, errors);
95 self.validate_constraints(value, constraints, path, errors);
96 }
97 Type::Object { fields, .. } => {
98 self.validate_object(value, fields, path, errors);
99 }
100 Type::Array { inner, .. } => {
101 self.validate_array(value, inner, path, errors);
102 }
103 Type::Union { types, .. } => {
104 self.validate_union(value, types, path, errors);
105 }
106 Type::Parenthesized { inner, .. } => {
107 self.validate_against_type(value, inner, path, errors);
108 }
109 Type::Reference { path: ref_path, .. } => {
110 // Try to resolve reference
111 if let Some(resolved_type) = self.resolve_reference(ref_path) {
112 self.validate_against_type(value, &resolved_type, path, errors);
113 } else {
114 // Can't resolve, skip validation
115 }
116 }
117 Type::Unknown { .. } => {
118 // Unknown type accepts anything
119 }
120 }
121 }
122
123 fn resolve_reference(&self, path: &Path) -> Option<Type> {
124 // Simple resolution: look for inline/def types with matching name
125 if path.segments.len() == 1 {
126 let name = &path.segments[0].name;
127 for item in &self.lexicon.items {
128 match item {
129 Item::InlineType(i) if i.name.name == *name => {
130 return Some(i.ty.clone());
131 }
132 Item::DefType(d) if d.name.name == *name => {
133 return Some(d.ty.clone());
134 }
135 _ => {}
136 }
137 }
138 }
139 None
140 }
141
142 fn validate_primitive(
143 &self,
144 value: &JsonValue,
145 kind: PrimitiveType,
146 path: &str,
147 errors: &mut Vec<ValidationError>,
148 ) {
149 match kind {
150 PrimitiveType::Null => {
151 if !value.is_null() {
152 errors.push(ValidationError {
153 path: path.to_string(),
154 message: "Expected null".to_string(),
155 });
156 }
157 }
158 PrimitiveType::Boolean => {
159 if !value.is_boolean() {
160 errors.push(ValidationError {
161 path: path.to_string(),
162 message: "Expected boolean".to_string(),
163 });
164 }
165 }
166 PrimitiveType::Integer => {
167 if let Some(n) = value.as_i64() {
168 // Check JavaScript-safe integer range (-2^53 to 2^53)
169 if n < -(1i64 << 53) || n > (1i64 << 53) {
170 errors.push(ValidationError {
171 path: path.to_string(),
172 message: "Integer out of JavaScript-safe range".to_string(),
173 });
174 }
175 } else {
176 errors.push(ValidationError {
177 path: path.to_string(),
178 message: "Expected integer".to_string(),
179 });
180 }
181 }
182 PrimitiveType::String => {
183 if !value.is_string() {
184 errors.push(ValidationError {
185 path: path.to_string(),
186 message: "Expected string".to_string(),
187 });
188 }
189 }
190 PrimitiveType::Bytes => {
191 // Bytes should be encoded as {"$bytes": "base64-string"}
192 if let Some(obj) = value.as_object() {
193 if let Some(bytes_val) = obj.get("$bytes") {
194 if !bytes_val.is_string() {
195 errors.push(ValidationError {
196 path: path.to_string(),
197 message: "Expected $bytes to be a base64 string".to_string(),
198 });
199 }
200 } else {
201 errors.push(ValidationError {
202 path: path.to_string(),
203 message: "Expected object with $bytes field".to_string(),
204 });
205 }
206 } else {
207 errors.push(ValidationError {
208 path: path.to_string(),
209 message: "Expected bytes object with $bytes field".to_string(),
210 });
211 }
212 }
213 PrimitiveType::Blob => {
214 // Blob should have $type, ref, mimeType, size
215 if let Some(obj) = value.as_object() {
216 let required = ["$type", "ref", "mimeType", "size"];
217 for field in &required {
218 if !obj.contains_key(*field) {
219 errors.push(ValidationError {
220 path: path.to_string(),
221 message: format!("Blob missing required field: {}", field),
222 });
223 }
224 }
225 } else {
226 errors.push(ValidationError {
227 path: path.to_string(),
228 message: "Expected blob object".to_string(),
229 });
230 }
231 }
232 }
233 }
234
235 fn validate_constraints(
236 &self,
237 value: &JsonValue,
238 constraints: &[Constraint],
239 path: &str,
240 errors: &mut Vec<ValidationError>,
241 ) {
242 for constraint in constraints {
243 match constraint {
244 Constraint::MinLength { value: min, .. } => {
245 if let Some(s) = value.as_str() {
246 if s.len() < *min {
247 errors.push(ValidationError {
248 path: path.to_string(),
249 message: format!("String too short: {} bytes (min: {})", s.len(), min),
250 });
251 }
252 } else if let Some(arr) = value.as_array() {
253 // MinLength can also apply to arrays (element count)
254 if arr.len() < *min {
255 errors.push(ValidationError {
256 path: path.to_string(),
257 message: format!("Array too short: {} elements (min: {})", arr.len(), min),
258 });
259 }
260 }
261 }
262 Constraint::MaxLength { value: max, .. } => {
263 if let Some(s) = value.as_str() {
264 if s.len() > *max {
265 errors.push(ValidationError {
266 path: path.to_string(),
267 message: format!("String too long: {} bytes (max: {})", s.len(), max),
268 });
269 }
270 } else if let Some(arr) = value.as_array() {
271 // MaxLength can also apply to arrays (element count)
272 if arr.len() > *max {
273 errors.push(ValidationError {
274 path: path.to_string(),
275 message: format!("Array too long: {} elements (max: {})", arr.len(), max),
276 });
277 }
278 }
279 }
280 Constraint::MinGraphemes { value: min, .. } => {
281 if let Some(s) = value.as_str() {
282 // Use proper Unicode grapheme cluster counting
283 let count = s.graphemes(true).count();
284 if count < *min {
285 errors.push(ValidationError {
286 path: path.to_string(),
287 message: format!("String has too few graphemes: {} (min: {})", count, min),
288 });
289 }
290 }
291 }
292 Constraint::MaxGraphemes { value: max, .. } => {
293 if let Some(s) = value.as_str() {
294 // Use proper Unicode grapheme cluster counting
295 let count = s.graphemes(true).count();
296 if count > *max {
297 errors.push(ValidationError {
298 path: path.to_string(),
299 message: format!("String has too many graphemes: {} (max: {})", count, max),
300 });
301 }
302 }
303 }
304 Constraint::Minimum { value: min, .. } => {
305 if let Some(n) = value.as_i64() {
306 if n < *min {
307 errors.push(ValidationError {
308 path: path.to_string(),
309 message: format!("Value too small: {} (min: {})", n, min),
310 });
311 }
312 }
313 }
314 Constraint::Maximum { value: max, .. } => {
315 if let Some(n) = value.as_i64() {
316 if n > *max {
317 errors.push(ValidationError {
318 path: path.to_string(),
319 message: format!("Value too large: {} (max: {})", n, max),
320 });
321 }
322 }
323 }
324 Constraint::Enum { values, .. } => {
325 if let Some(s) = value.as_str() {
326 let enum_strings: Vec<String> = values.iter().map(|v| match v {
327 mlf_lang::ast::ValueRef::Literal(lit) => lit.clone(),
328 mlf_lang::ast::ValueRef::Reference(path) => path.to_string(),
329 }).collect();
330 if !enum_strings.contains(&s.to_string()) {
331 errors.push(ValidationError {
332 path: path.to_string(),
333 message: format!("Value '{}' not in enum: {:?}", s, enum_strings),
334 });
335 }
336 }
337 }
338 Constraint::Format { value: format, .. } => {
339 if let Some(s) = value.as_str() {
340 self.validate_format(s, format, path, errors);
341 }
342 }
343 Constraint::Accept { mimes, .. } => {
344 // Validate blob mimeType against accept list
345 if let Some(obj) = value.as_object() {
346 if let Some(mime) = obj.get("mimeType").and_then(|v| v.as_str()) {
347 if !mimes.iter().any(|m| m == mime) {
348 errors.push(ValidationError {
349 path: path.to_string(),
350 message: format!("MIME type '{}' not accepted (allowed: {:?})", mime, mimes),
351 });
352 }
353 }
354 }
355 }
356 Constraint::MaxSize { value: max, .. } => {
357 // Validate blob size
358 if let Some(obj) = value.as_object() {
359 if let Some(size) = obj.get("size").and_then(|v| v.as_u64()) {
360 if size as usize > *max {
361 errors.push(ValidationError {
362 path: path.to_string(),
363 message: format!("Blob size {} exceeds maximum: {}", size, max),
364 });
365 }
366 }
367 }
368 }
369 Constraint::KnownValues { .. } => {
370 // knownValues is a hint, not enforced
371 }
372 Constraint::Default { .. } => {
373 // Default values are used when field is missing, not for validation
374 }
375 Constraint::Const { .. } => {
376 // Const values are enforced at compile time, not runtime validation
377 }
378 }
379 }
380 }
381
382 fn validate_format(
383 &self,
384 value: &str,
385 format: &str,
386 path: &str,
387 errors: &mut Vec<ValidationError>,
388 ) {
389 let is_valid = match format {
390 "datetime" => validate_datetime(value),
391 "uri" => validate_uri(value),
392 "at-uri" => validate_at_uri(value),
393 "did" => validate_did(value),
394 "handle" => validate_handle(value),
395 "nsid" => validate_nsid(value),
396 "cid" => validate_cid(value),
397 "at-identifier" => validate_at_identifier(value),
398 "language" => validate_language(value),
399 "tid" => validate_tid(value),
400 "record-key" => validate_record_key(value),
401 _ => true, // Unknown format, pass validation
402 };
403
404 if !is_valid {
405 errors.push(ValidationError {
406 path: path.to_string(),
407 message: format!("Invalid {} format: '{}'", format, value),
408 });
409 }
410 }
411
412 fn validate_object(
413 &self,
414 value: &JsonValue,
415 fields: &[Field],
416 path: &str,
417 errors: &mut Vec<ValidationError>,
418 ) {
419 if let Some(obj) = value.as_object() {
420 // Check required fields
421 for field in fields {
422 if !field.optional && !obj.contains_key(&field.name.name) {
423 errors.push(ValidationError {
424 path: if path == "$" {
425 field.name.name.clone()
426 } else {
427 format!("{}.{}", path, field.name.name)
428 },
429 message: "Required field missing".to_string(),
430 });
431 } else if let Some(field_value) = obj.get(&field.name.name) {
432 let field_path = if path == "$" {
433 field.name.name.clone()
434 } else {
435 format!("{}.{}", path, field.name.name)
436 };
437 self.validate_against_type(field_value, &field.ty, &field_path, errors);
438 }
439 }
440 } else {
441 errors.push(ValidationError {
442 path: path.to_string(),
443 message: format!("Expected object, got {}", value_type_name(value)),
444 });
445 }
446 }
447
448 fn validate_array(
449 &self,
450 value: &JsonValue,
451 inner: &Type,
452 path: &str,
453 errors: &mut Vec<ValidationError>,
454 ) {
455 if let Some(arr) = value.as_array() {
456 for (i, item) in arr.iter().enumerate() {
457 let item_path = format!("{}[{}]", path, i);
458 self.validate_against_type(item, inner, &item_path, errors);
459 }
460 } else {
461 errors.push(ValidationError {
462 path: path.to_string(),
463 message: format!("Expected array, got {}", value_type_name(value)),
464 });
465 }
466 }
467
468 fn validate_union(
469 &self,
470 value: &JsonValue,
471 types: &[Type],
472 path: &str,
473 errors: &mut Vec<ValidationError>,
474 ) {
475 // Try to validate against each type in the union
476 let mut matched = false;
477
478 for ty in types {
479 let mut type_errors = Vec::new();
480 self.validate_against_type(value, ty, path, &mut type_errors);
481
482 if type_errors.is_empty() {
483 matched = true;
484 break;
485 }
486 }
487
488 if !matched {
489 errors.push(ValidationError {
490 path: path.to_string(),
491 message: format!("Value does not match any type in union ({} variants tried)", types.len()),
492 });
493 }
494 }
495}
496
497fn value_type_name(value: &JsonValue) -> &'static str {
498 match value {
499 JsonValue::Null => "null",
500 JsonValue::Bool(_) => "boolean",
501 JsonValue::Number(_) => "number",
502 JsonValue::String(_) => "string",
503 JsonValue::Array(_) => "array",
504 JsonValue::Object(_) => "object",
505 }
506}
507
508// Format validators
509
510/// Validate datetime format (RFC 3339 / ISO 8601)
511fn validate_datetime(value: &str) -> bool {
512 // Use time crate for proper RFC 3339 parsing
513 OffsetDateTime::parse(value, &Rfc3339).is_ok()
514}
515
516/// Validate URI format (RFC 3986)
517fn validate_uri(value: &str) -> bool {
518 // Use url crate for proper URI parsing
519 Url::parse(value).is_ok()
520}
521
522/// Validate AT-URI format (at://did:plc:xyz/com.example.foo/record-key)
523fn validate_at_uri(value: &str) -> bool {
524 // AT-URI format: at://authority/collection/rkey
525 // authority is a DID or handle
526 // collection is an NSID
527 // rkey is optional
528
529 if !value.starts_with("at://") {
530 return false;
531 }
532
533 // Strip the scheme
534 let without_scheme = &value[5..];
535
536 // Split by first slash to get authority and path
537 let (authority, path) = match without_scheme.split_once('/') {
538 Some((auth, p)) => (auth, Some(p)),
539 None => (without_scheme, None),
540 };
541
542 // Authority must be a DID or handle
543 if !validate_did(authority) && !validate_handle(authority) {
544 return false;
545 }
546
547 // Path validation (if present)
548 if let Some(path_str) = path {
549 if !path_str.is_empty() {
550 // Path should be collection or collection/rkey
551 let parts: Vec<&str> = path_str.split('/').filter(|s| !s.is_empty()).collect();
552 if parts.is_empty() || parts.len() > 2 {
553 return false;
554 }
555 // Collection should be an NSID
556 if !validate_nsid(parts[0]) {
557 return false;
558 }
559 // Record key validation (if present)
560 if parts.len() == 2 && !validate_record_key(parts[1]) {
561 return false;
562 }
563 }
564 }
565
566 true
567}
568
569/// Validate DID format (did:method:identifier)
570fn validate_did(value: &str) -> bool {
571 // DID format: did:method:method-specific-id
572 // method: lowercase letters, numbers
573 // method-specific-id: alphanumeric plus . - _ :
574 let re = Regex::new(
575 r"^did:[a-z0-9]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$"
576 ).unwrap();
577 re.is_match(value)
578}
579
580/// Validate handle format (domain name)
581fn validate_handle(value: &str) -> bool {
582 // Handle is a domain name: segment.segment.segment
583 // Each segment: alphanumeric and hyphen, can't start or end with hyphen
584 // Must have at least one dot
585 if !value.contains('.') || value.starts_with('.') || value.ends_with('.') {
586 return false;
587 }
588
589 // Check each segment
590 for segment in value.split('.') {
591 if segment.is_empty()
592 || segment.starts_with('-')
593 || segment.ends_with('-')
594 || segment.len() > 63 {
595 return false;
596 }
597 if !segment.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
598 return false;
599 }
600 }
601
602 // Total length check
603 value.len() <= 253
604}
605
606/// Validate NSID format (namespaced identifier)
607fn validate_nsid(value: &str) -> bool {
608 // NSID format: authority.name(.name)*
609 // authority: domain name (reversed)
610 // name: lowercase alphanumeric, max 63 chars per segment
611 // Total: 3-317 chars
612 if value.len() < 3 || value.len() > 317 {
613 return false;
614 }
615
616 let parts: Vec<&str> = value.split('.').collect();
617 if parts.len() < 3 {
618 return false;
619 }
620
621 // Check each segment
622 for part in &parts {
623 if part.is_empty() || part.len() > 63 {
624 return false;
625 }
626 // NSID segments must be lowercase alphanumeric (and hyphen for domain parts)
627 if !part.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-') {
628 return false;
629 }
630 // Can't start with digit
631 if part.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) {
632 return false;
633 }
634 }
635
636 true
637}
638
639/// Validate CID format (Content Identifier)
640fn validate_cid(value: &str) -> bool {
641 // CID format is complex (multibase encoded multihash)
642 // Basic validation: non-empty, starts with base58btc or base32 prefix
643 // Full validation would require parsing the multibase/multihash
644 if value.is_empty() {
645 return false;
646 }
647
648 // CIDv0: starts with 'Qm' (base58btc)
649 // CIDv1: starts with 'b' (base32) or 'z' (base58btc) followed by version
650 if value.starts_with("Qm") && value.len() == 46 {
651 // CIDv0 - all base58btc chars
652 return value.chars().all(|c| {
653 c.is_ascii_alphanumeric() && c != '0' && c != 'O' && c != 'I' && c != 'l'
654 });
655 }
656
657 if (value.starts_with('b') || value.starts_with('z')) && value.len() > 10 {
658 // CIDv1 - basic check for valid multibase chars
659 return value.chars().all(|c| c.is_ascii_alphanumeric());
660 }
661
662 false
663}
664
665/// Validate AT-identifier format (DID or handle)
666fn validate_at_identifier(value: &str) -> bool {
667 validate_did(value) || validate_handle(value)
668}
669
670/// Validate language code format (BCP 47)
671fn validate_language(value: &str) -> bool {
672 // Use langtag crate for proper BCP 47 / RFC 5646 validation
673 LangTag::new(value).is_ok()
674}
675
676/// Validate TID format (Timestamp Identifier)
677fn validate_tid(value: &str) -> bool {
678 // TID: 13 character base32-sortable timestamp
679 // Uses a-z2-7 character set (no 0,1,8,9)
680 if value.len() != 13 {
681 return false;
682 }
683
684 value.chars().all(|c| {
685 matches!(c, 'a'..='z' | '2'..='7')
686 })
687}
688
689/// Validate record-key format
690fn validate_record_key(value: &str) -> bool {
691 // Record key: alphanumeric, dot, underscore, tilde, hyphen
692 // 1-512 characters
693 // Can be TID or custom key
694 if value.is_empty() || value.len() > 512 {
695 return false;
696 }
697
698 // If it looks like a TID, validate as TID
699 if value.len() == 13 && value.chars().all(|c| matches!(c, 'a'..='z' | '2'..='7')) {
700 return validate_tid(value);
701 }
702
703 // Otherwise, general record key validation
704 value.chars().all(|c| {
705 c.is_ascii_alphanumeric() || c == '.' || c == '_' || c == '~' || c == '-'
706 })
707}
708
709#[cfg(test)]
710mod tests {
711 use super::*;
712
713 #[test]
714 fn test_validate_datetime() {
715 // Valid datetimes
716 assert!(validate_datetime("2024-01-15T10:30:00Z"));
717 assert!(validate_datetime("2024-01-15T10:30:00.123Z"));
718 assert!(validate_datetime("2024-01-15T10:30:00+05:30"));
719 assert!(validate_datetime("2024-01-15T10:30:00-08:00"));
720
721 // Invalid datetimes
722 assert!(!validate_datetime("2024-01-15"));
723 assert!(!validate_datetime("2024-01-15 10:30:00"));
724 assert!(!validate_datetime("not-a-date"));
725 }
726
727 #[test]
728 fn test_validate_uri() {
729 // Valid URIs
730 assert!(validate_uri("https://example.com"));
731 assert!(validate_uri("http://example.com/path"));
732 assert!(validate_uri("ftp://example.com"));
733 assert!(validate_uri("custom-scheme://something"));
734
735 // Invalid URIs
736 assert!(!validate_uri("not a uri"));
737 assert!(!validate_uri("://missing-scheme"));
738 assert!(!validate_uri(""));
739 }
740
741 #[test]
742 fn test_validate_at_uri() {
743 // Valid AT-URIs
744 assert!(validate_at_uri("at://did:plc:abc123"));
745 assert!(validate_at_uri("at://did:plc:abc123/com.example.foo"));
746 assert!(validate_at_uri("at://did:plc:abc123/com.example.foo/abc123"));
747 assert!(validate_at_uri("at://alice.example.com/com.example.post/abc"));
748
749 // Invalid AT-URIs
750 assert!(!validate_at_uri("https://example.com"));
751 assert!(!validate_at_uri("at://"));
752 assert!(!validate_at_uri("not-at-uri"));
753 }
754
755 #[test]
756 fn test_validate_did() {
757 // Valid DIDs
758 assert!(validate_did("did:plc:abc123xyz"));
759 assert!(validate_did("did:web:example.com"));
760 assert!(validate_did("did:key:abc123"));
761
762 // Invalid DIDs
763 assert!(!validate_did("not-a-did"));
764 assert!(!validate_did("did:"));
765 assert!(!validate_did("did:UPPERCASE:test")); // method must be lowercase
766 }
767
768 #[test]
769 fn test_validate_handle() {
770 // Valid handles
771 assert!(validate_handle("example.com"));
772 assert!(validate_handle("alice.example.com"));
773 assert!(validate_handle("my-site.example.com"));
774
775 // Invalid handles
776 assert!(!validate_handle("nodomainext"));
777 assert!(!validate_handle(".example.com"));
778 assert!(!validate_handle("example.com."));
779 assert!(!validate_handle("-invalid.com"));
780 assert!(!validate_handle("invalid-.com"));
781 }
782
783 #[test]
784 fn test_validate_nsid() {
785 // Valid NSIDs
786 assert!(validate_nsid("com.example.foo"));
787 assert!(validate_nsid("com.example.foo.bar"));
788 assert!(validate_nsid("io.github.user.action"));
789
790 // Invalid NSIDs
791 assert!(!validate_nsid("com.example")); // need at least 3 segments
792 assert!(!validate_nsid("COM.EXAMPLE.FOO")); // must be lowercase
793 assert!(!validate_nsid("com.123invalid.foo")); // can't start with digit
794 assert!(!validate_nsid("co")); // too short
795 }
796
797 #[test]
798 fn test_validate_cid() {
799 // Valid CIDs (examples)
800 assert!(validate_cid("QmYwAPJzv5CZsnA625s3Xf2nemtYgPpHdWEz79ojWnPbdG")); // CIDv0
801 assert!(validate_cid("bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku")); // CIDv1
802
803 // Invalid CIDs
804 assert!(!validate_cid(""));
805 assert!(!validate_cid("not-a-cid"));
806 assert!(!validate_cid("Qm123")); // too short
807 }
808
809 #[test]
810 fn test_validate_at_identifier() {
811 // Valid (DIDs)
812 assert!(validate_at_identifier("did:plc:abc123"));
813
814 // Valid (handles)
815 assert!(validate_at_identifier("example.com"));
816 assert!(validate_at_identifier("alice.example.com"));
817
818 // Invalid
819 assert!(!validate_at_identifier("not-valid"));
820 assert!(!validate_at_identifier(""));
821 }
822
823 #[test]
824 fn test_validate_language() {
825 // Valid language codes (BCP 47 / RFC 5646)
826 assert!(validate_language("en"));
827 assert!(validate_language("en-US"));
828 assert!(validate_language("zh-Hans-CN"));
829 assert!(validate_language("fr-CA"));
830 assert!(validate_language("en-GB"));
831 assert!(validate_language("de-DE"));
832
833 // Invalid language codes
834 assert!(!validate_language("e")); // too short
835 assert!(!validate_language("en_US")); // wrong separator (underscore)
836 assert!(!validate_language("")); // empty
837 assert!(!validate_language("123")); // starts with digit
838 assert!(!validate_language("en--US")); // double separator
839 }
840
841 #[test]
842 fn test_validate_tid() {
843 // Valid TIDs (13 chars, base32-sortable)
844 assert!(validate_tid("3jui7kd54zh2y"));
845 assert!(validate_tid("3k2a4dqudbbz2"));
846
847 // Invalid TIDs
848 assert!(!validate_tid("3jui7kd54zh2")); // too short
849 assert!(!validate_tid("3jui7kd54zh2yy")); // too long
850 assert!(!validate_tid("3jui7kd54zh2Y")); // uppercase not allowed
851 assert!(!validate_tid("3jui0kd54zh2y")); // 0 not allowed
852 }
853
854 #[test]
855 fn test_validate_record_key() {
856 // Valid record keys
857 assert!(validate_record_key("3jui7kd54zh2y")); // TID
858 assert!(validate_record_key("my-record-key"));
859 assert!(validate_record_key("key.with.dots"));
860 assert!(validate_record_key("key_with_underscores"));
861 assert!(validate_record_key("key~with~tildes"));
862
863 // Invalid record keys
864 assert!(!validate_record_key("")); // empty
865 assert!(!validate_record_key(&"a".repeat(513))); // too long
866 assert!(!validate_record_key("key with spaces")); // spaces not allowed
867 }
868
869 #[test]
870 fn test_grapheme_counting() {
871 // Unicode grapheme cluster counting test
872 use unicode_segmentation::UnicodeSegmentation;
873
874 let text = "👨👩👧👦"; // Family emoji (1 grapheme cluster)
875 assert_eq!(text.graphemes(true).count(), 1);
876
877 let text = "hello"; // 5 graphemes
878 assert_eq!(text.graphemes(true).count(), 5);
879
880 let text = "नमस्ते"; // Devanagari (3 grapheme clusters)
881 assert_eq!(text.graphemes(true).count(), 3);
882 }
883}