A human-friendly DSL for ATProto Lexicons
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 883 lines 32 kB view raw
1use mlf_lang::ast::*; 2use serde_json::Value as JsonValue; 3use std::fmt; 4use unicode_segmentation::UnicodeSegmentation; 5use regex::Regex; 6use url::Url; 7use time::format_description::well_known::Rfc3339; 8use time::OffsetDateTime; 9use langtag::LangTag; 10 11#[derive(Debug, Clone)] 12pub struct ValidationError { 13 pub path: String, 14 pub message: String, 15} 16 17impl fmt::Display for ValidationError { 18 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 19 write!(f, "{}: {}", self.path, self.message) 20 } 21} 22 23impl std::error::Error for ValidationError {} 24 25pub struct RecordValidator<'a> { 26 lexicon: &'a Lexicon, 27} 28 29impl<'a> RecordValidator<'a> { 30 pub fn new(lexicon: &'a Lexicon) -> Self { 31 Self { lexicon } 32 } 33 34 pub fn validate_record(&self, record: &JsonValue) -> Result<(), Vec<ValidationError>> { 35 let mut errors = Vec::new(); 36 37 // Find the main record definition 38 let main_item = self.find_main_item()?; 39 40 match main_item { 41 Item::Record(record_def) => { 42 // Validate as object with the record's fields 43 self.validate_object(record, &record_def.fields, "$", &mut errors); 44 } 45 Item::Query(_) | Item::Procedure(_) => { 46 errors.push(ValidationError { 47 path: "$".to_string(), 48 message: "Cannot validate records against query/procedure definitions".to_string(), 49 }); 50 } 51 _ => { 52 errors.push(ValidationError { 53 path: "$".to_string(), 54 message: "No record definition found in lexicon".to_string(), 55 }); 56 } 57 } 58 59 if errors.is_empty() { 60 Ok(()) 61 } else { 62 Err(errors) 63 } 64 } 65 66 fn find_main_item(&self) -> Result<&Item, Vec<ValidationError>> { 67 // Look for a record, query, or procedure (main definitions) 68 for item in &self.lexicon.items { 69 match item { 70 Item::Record(_) | Item::Query(_) | Item::Procedure(_) => { 71 return Ok(item); 72 } 73 _ => continue, 74 } 75 } 76 Err(vec![ValidationError { 77 path: "$".to_string(), 78 message: "No main definition found in lexicon".to_string(), 79 }]) 80 } 81 82 fn validate_against_type( 83 &self, 84 value: &JsonValue, 85 ty: &Type, 86 path: &str, 87 errors: &mut Vec<ValidationError>, 88 ) { 89 match ty { 90 Type::Primitive { kind, .. } => { 91 self.validate_primitive(value, *kind, path, errors); 92 } 93 Type::Constrained { base, constraints, .. } => { 94 self.validate_against_type(value, base, path, errors); 95 self.validate_constraints(value, constraints, path, errors); 96 } 97 Type::Object { fields, .. } => { 98 self.validate_object(value, fields, path, errors); 99 } 100 Type::Array { inner, .. } => { 101 self.validate_array(value, inner, path, errors); 102 } 103 Type::Union { types, .. } => { 104 self.validate_union(value, types, path, errors); 105 } 106 Type::Parenthesized { inner, .. } => { 107 self.validate_against_type(value, inner, path, errors); 108 } 109 Type::Reference { path: ref_path, .. } => { 110 // Try to resolve reference 111 if let Some(resolved_type) = self.resolve_reference(ref_path) { 112 self.validate_against_type(value, &resolved_type, path, errors); 113 } else { 114 // Can't resolve, skip validation 115 } 116 } 117 Type::Unknown { .. } => { 118 // Unknown type accepts anything 119 } 120 } 121 } 122 123 fn resolve_reference(&self, path: &Path) -> Option<Type> { 124 // Simple resolution: look for inline/def types with matching name 125 if path.segments.len() == 1 { 126 let name = &path.segments[0].name; 127 for item in &self.lexicon.items { 128 match item { 129 Item::InlineType(i) if i.name.name == *name => { 130 return Some(i.ty.clone()); 131 } 132 Item::DefType(d) if d.name.name == *name => { 133 return Some(d.ty.clone()); 134 } 135 _ => {} 136 } 137 } 138 } 139 None 140 } 141 142 fn validate_primitive( 143 &self, 144 value: &JsonValue, 145 kind: PrimitiveType, 146 path: &str, 147 errors: &mut Vec<ValidationError>, 148 ) { 149 match kind { 150 PrimitiveType::Null => { 151 if !value.is_null() { 152 errors.push(ValidationError { 153 path: path.to_string(), 154 message: "Expected null".to_string(), 155 }); 156 } 157 } 158 PrimitiveType::Boolean => { 159 if !value.is_boolean() { 160 errors.push(ValidationError { 161 path: path.to_string(), 162 message: "Expected boolean".to_string(), 163 }); 164 } 165 } 166 PrimitiveType::Integer => { 167 if let Some(n) = value.as_i64() { 168 // Check JavaScript-safe integer range (-2^53 to 2^53) 169 if n < -(1i64 << 53) || n > (1i64 << 53) { 170 errors.push(ValidationError { 171 path: path.to_string(), 172 message: "Integer out of JavaScript-safe range".to_string(), 173 }); 174 } 175 } else { 176 errors.push(ValidationError { 177 path: path.to_string(), 178 message: "Expected integer".to_string(), 179 }); 180 } 181 } 182 PrimitiveType::String => { 183 if !value.is_string() { 184 errors.push(ValidationError { 185 path: path.to_string(), 186 message: "Expected string".to_string(), 187 }); 188 } 189 } 190 PrimitiveType::Bytes => { 191 // Bytes should be encoded as {"$bytes": "base64-string"} 192 if let Some(obj) = value.as_object() { 193 if let Some(bytes_val) = obj.get("$bytes") { 194 if !bytes_val.is_string() { 195 errors.push(ValidationError { 196 path: path.to_string(), 197 message: "Expected $bytes to be a base64 string".to_string(), 198 }); 199 } 200 } else { 201 errors.push(ValidationError { 202 path: path.to_string(), 203 message: "Expected object with $bytes field".to_string(), 204 }); 205 } 206 } else { 207 errors.push(ValidationError { 208 path: path.to_string(), 209 message: "Expected bytes object with $bytes field".to_string(), 210 }); 211 } 212 } 213 PrimitiveType::Blob => { 214 // Blob should have $type, ref, mimeType, size 215 if let Some(obj) = value.as_object() { 216 let required = ["$type", "ref", "mimeType", "size"]; 217 for field in &required { 218 if !obj.contains_key(*field) { 219 errors.push(ValidationError { 220 path: path.to_string(), 221 message: format!("Blob missing required field: {}", field), 222 }); 223 } 224 } 225 } else { 226 errors.push(ValidationError { 227 path: path.to_string(), 228 message: "Expected blob object".to_string(), 229 }); 230 } 231 } 232 } 233 } 234 235 fn validate_constraints( 236 &self, 237 value: &JsonValue, 238 constraints: &[Constraint], 239 path: &str, 240 errors: &mut Vec<ValidationError>, 241 ) { 242 for constraint in constraints { 243 match constraint { 244 Constraint::MinLength { value: min, .. } => { 245 if let Some(s) = value.as_str() { 246 if s.len() < *min { 247 errors.push(ValidationError { 248 path: path.to_string(), 249 message: format!("String too short: {} bytes (min: {})", s.len(), min), 250 }); 251 } 252 } else if let Some(arr) = value.as_array() { 253 // MinLength can also apply to arrays (element count) 254 if arr.len() < *min { 255 errors.push(ValidationError { 256 path: path.to_string(), 257 message: format!("Array too short: {} elements (min: {})", arr.len(), min), 258 }); 259 } 260 } 261 } 262 Constraint::MaxLength { value: max, .. } => { 263 if let Some(s) = value.as_str() { 264 if s.len() > *max { 265 errors.push(ValidationError { 266 path: path.to_string(), 267 message: format!("String too long: {} bytes (max: {})", s.len(), max), 268 }); 269 } 270 } else if let Some(arr) = value.as_array() { 271 // MaxLength can also apply to arrays (element count) 272 if arr.len() > *max { 273 errors.push(ValidationError { 274 path: path.to_string(), 275 message: format!("Array too long: {} elements (max: {})", arr.len(), max), 276 }); 277 } 278 } 279 } 280 Constraint::MinGraphemes { value: min, .. } => { 281 if let Some(s) = value.as_str() { 282 // Use proper Unicode grapheme cluster counting 283 let count = s.graphemes(true).count(); 284 if count < *min { 285 errors.push(ValidationError { 286 path: path.to_string(), 287 message: format!("String has too few graphemes: {} (min: {})", count, min), 288 }); 289 } 290 } 291 } 292 Constraint::MaxGraphemes { value: max, .. } => { 293 if let Some(s) = value.as_str() { 294 // Use proper Unicode grapheme cluster counting 295 let count = s.graphemes(true).count(); 296 if count > *max { 297 errors.push(ValidationError { 298 path: path.to_string(), 299 message: format!("String has too many graphemes: {} (max: {})", count, max), 300 }); 301 } 302 } 303 } 304 Constraint::Minimum { value: min, .. } => { 305 if let Some(n) = value.as_i64() { 306 if n < *min { 307 errors.push(ValidationError { 308 path: path.to_string(), 309 message: format!("Value too small: {} (min: {})", n, min), 310 }); 311 } 312 } 313 } 314 Constraint::Maximum { value: max, .. } => { 315 if let Some(n) = value.as_i64() { 316 if n > *max { 317 errors.push(ValidationError { 318 path: path.to_string(), 319 message: format!("Value too large: {} (max: {})", n, max), 320 }); 321 } 322 } 323 } 324 Constraint::Enum { values, .. } => { 325 if let Some(s) = value.as_str() { 326 let enum_strings: Vec<String> = values.iter().map(|v| match v { 327 mlf_lang::ast::ValueRef::Literal(lit) => lit.clone(), 328 mlf_lang::ast::ValueRef::Reference(path) => path.to_string(), 329 }).collect(); 330 if !enum_strings.contains(&s.to_string()) { 331 errors.push(ValidationError { 332 path: path.to_string(), 333 message: format!("Value '{}' not in enum: {:?}", s, enum_strings), 334 }); 335 } 336 } 337 } 338 Constraint::Format { value: format, .. } => { 339 if let Some(s) = value.as_str() { 340 self.validate_format(s, format, path, errors); 341 } 342 } 343 Constraint::Accept { mimes, .. } => { 344 // Validate blob mimeType against accept list 345 if let Some(obj) = value.as_object() { 346 if let Some(mime) = obj.get("mimeType").and_then(|v| v.as_str()) { 347 if !mimes.iter().any(|m| m == mime) { 348 errors.push(ValidationError { 349 path: path.to_string(), 350 message: format!("MIME type '{}' not accepted (allowed: {:?})", mime, mimes), 351 }); 352 } 353 } 354 } 355 } 356 Constraint::MaxSize { value: max, .. } => { 357 // Validate blob size 358 if let Some(obj) = value.as_object() { 359 if let Some(size) = obj.get("size").and_then(|v| v.as_u64()) { 360 if size as usize > *max { 361 errors.push(ValidationError { 362 path: path.to_string(), 363 message: format!("Blob size {} exceeds maximum: {}", size, max), 364 }); 365 } 366 } 367 } 368 } 369 Constraint::KnownValues { .. } => { 370 // knownValues is a hint, not enforced 371 } 372 Constraint::Default { .. } => { 373 // Default values are used when field is missing, not for validation 374 } 375 Constraint::Const { .. } => { 376 // Const values are enforced at compile time, not runtime validation 377 } 378 } 379 } 380 } 381 382 fn validate_format( 383 &self, 384 value: &str, 385 format: &str, 386 path: &str, 387 errors: &mut Vec<ValidationError>, 388 ) { 389 let is_valid = match format { 390 "datetime" => validate_datetime(value), 391 "uri" => validate_uri(value), 392 "at-uri" => validate_at_uri(value), 393 "did" => validate_did(value), 394 "handle" => validate_handle(value), 395 "nsid" => validate_nsid(value), 396 "cid" => validate_cid(value), 397 "at-identifier" => validate_at_identifier(value), 398 "language" => validate_language(value), 399 "tid" => validate_tid(value), 400 "record-key" => validate_record_key(value), 401 _ => true, // Unknown format, pass validation 402 }; 403 404 if !is_valid { 405 errors.push(ValidationError { 406 path: path.to_string(), 407 message: format!("Invalid {} format: '{}'", format, value), 408 }); 409 } 410 } 411 412 fn validate_object( 413 &self, 414 value: &JsonValue, 415 fields: &[Field], 416 path: &str, 417 errors: &mut Vec<ValidationError>, 418 ) { 419 if let Some(obj) = value.as_object() { 420 // Check required fields 421 for field in fields { 422 if !field.optional && !obj.contains_key(&field.name.name) { 423 errors.push(ValidationError { 424 path: if path == "$" { 425 field.name.name.clone() 426 } else { 427 format!("{}.{}", path, field.name.name) 428 }, 429 message: "Required field missing".to_string(), 430 }); 431 } else if let Some(field_value) = obj.get(&field.name.name) { 432 let field_path = if path == "$" { 433 field.name.name.clone() 434 } else { 435 format!("{}.{}", path, field.name.name) 436 }; 437 self.validate_against_type(field_value, &field.ty, &field_path, errors); 438 } 439 } 440 } else { 441 errors.push(ValidationError { 442 path: path.to_string(), 443 message: format!("Expected object, got {}", value_type_name(value)), 444 }); 445 } 446 } 447 448 fn validate_array( 449 &self, 450 value: &JsonValue, 451 inner: &Type, 452 path: &str, 453 errors: &mut Vec<ValidationError>, 454 ) { 455 if let Some(arr) = value.as_array() { 456 for (i, item) in arr.iter().enumerate() { 457 let item_path = format!("{}[{}]", path, i); 458 self.validate_against_type(item, inner, &item_path, errors); 459 } 460 } else { 461 errors.push(ValidationError { 462 path: path.to_string(), 463 message: format!("Expected array, got {}", value_type_name(value)), 464 }); 465 } 466 } 467 468 fn validate_union( 469 &self, 470 value: &JsonValue, 471 types: &[Type], 472 path: &str, 473 errors: &mut Vec<ValidationError>, 474 ) { 475 // Try to validate against each type in the union 476 let mut matched = false; 477 478 for ty in types { 479 let mut type_errors = Vec::new(); 480 self.validate_against_type(value, ty, path, &mut type_errors); 481 482 if type_errors.is_empty() { 483 matched = true; 484 break; 485 } 486 } 487 488 if !matched { 489 errors.push(ValidationError { 490 path: path.to_string(), 491 message: format!("Value does not match any type in union ({} variants tried)", types.len()), 492 }); 493 } 494 } 495} 496 497fn value_type_name(value: &JsonValue) -> &'static str { 498 match value { 499 JsonValue::Null => "null", 500 JsonValue::Bool(_) => "boolean", 501 JsonValue::Number(_) => "number", 502 JsonValue::String(_) => "string", 503 JsonValue::Array(_) => "array", 504 JsonValue::Object(_) => "object", 505 } 506} 507 508// Format validators 509 510/// Validate datetime format (RFC 3339 / ISO 8601) 511fn validate_datetime(value: &str) -> bool { 512 // Use time crate for proper RFC 3339 parsing 513 OffsetDateTime::parse(value, &Rfc3339).is_ok() 514} 515 516/// Validate URI format (RFC 3986) 517fn validate_uri(value: &str) -> bool { 518 // Use url crate for proper URI parsing 519 Url::parse(value).is_ok() 520} 521 522/// Validate AT-URI format (at://did:plc:xyz/com.example.foo/record-key) 523fn validate_at_uri(value: &str) -> bool { 524 // AT-URI format: at://authority/collection/rkey 525 // authority is a DID or handle 526 // collection is an NSID 527 // rkey is optional 528 529 if !value.starts_with("at://") { 530 return false; 531 } 532 533 // Strip the scheme 534 let without_scheme = &value[5..]; 535 536 // Split by first slash to get authority and path 537 let (authority, path) = match without_scheme.split_once('/') { 538 Some((auth, p)) => (auth, Some(p)), 539 None => (without_scheme, None), 540 }; 541 542 // Authority must be a DID or handle 543 if !validate_did(authority) && !validate_handle(authority) { 544 return false; 545 } 546 547 // Path validation (if present) 548 if let Some(path_str) = path { 549 if !path_str.is_empty() { 550 // Path should be collection or collection/rkey 551 let parts: Vec<&str> = path_str.split('/').filter(|s| !s.is_empty()).collect(); 552 if parts.is_empty() || parts.len() > 2 { 553 return false; 554 } 555 // Collection should be an NSID 556 if !validate_nsid(parts[0]) { 557 return false; 558 } 559 // Record key validation (if present) 560 if parts.len() == 2 && !validate_record_key(parts[1]) { 561 return false; 562 } 563 } 564 } 565 566 true 567} 568 569/// Validate DID format (did:method:identifier) 570fn validate_did(value: &str) -> bool { 571 // DID format: did:method:method-specific-id 572 // method: lowercase letters, numbers 573 // method-specific-id: alphanumeric plus . - _ : 574 let re = Regex::new( 575 r"^did:[a-z0-9]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$" 576 ).unwrap(); 577 re.is_match(value) 578} 579 580/// Validate handle format (domain name) 581fn validate_handle(value: &str) -> bool { 582 // Handle is a domain name: segment.segment.segment 583 // Each segment: alphanumeric and hyphen, can't start or end with hyphen 584 // Must have at least one dot 585 if !value.contains('.') || value.starts_with('.') || value.ends_with('.') { 586 return false; 587 } 588 589 // Check each segment 590 for segment in value.split('.') { 591 if segment.is_empty() 592 || segment.starts_with('-') 593 || segment.ends_with('-') 594 || segment.len() > 63 { 595 return false; 596 } 597 if !segment.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') { 598 return false; 599 } 600 } 601 602 // Total length check 603 value.len() <= 253 604} 605 606/// Validate NSID format (namespaced identifier) 607fn validate_nsid(value: &str) -> bool { 608 // NSID format: authority.name(.name)* 609 // authority: domain name (reversed) 610 // name: lowercase alphanumeric, max 63 chars per segment 611 // Total: 3-317 chars 612 if value.len() < 3 || value.len() > 317 { 613 return false; 614 } 615 616 let parts: Vec<&str> = value.split('.').collect(); 617 if parts.len() < 3 { 618 return false; 619 } 620 621 // Check each segment 622 for part in &parts { 623 if part.is_empty() || part.len() > 63 { 624 return false; 625 } 626 // NSID segments must be lowercase alphanumeric (and hyphen for domain parts) 627 if !part.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-') { 628 return false; 629 } 630 // Can't start with digit 631 if part.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) { 632 return false; 633 } 634 } 635 636 true 637} 638 639/// Validate CID format (Content Identifier) 640fn validate_cid(value: &str) -> bool { 641 // CID format is complex (multibase encoded multihash) 642 // Basic validation: non-empty, starts with base58btc or base32 prefix 643 // Full validation would require parsing the multibase/multihash 644 if value.is_empty() { 645 return false; 646 } 647 648 // CIDv0: starts with 'Qm' (base58btc) 649 // CIDv1: starts with 'b' (base32) or 'z' (base58btc) followed by version 650 if value.starts_with("Qm") && value.len() == 46 { 651 // CIDv0 - all base58btc chars 652 return value.chars().all(|c| { 653 c.is_ascii_alphanumeric() && c != '0' && c != 'O' && c != 'I' && c != 'l' 654 }); 655 } 656 657 if (value.starts_with('b') || value.starts_with('z')) && value.len() > 10 { 658 // CIDv1 - basic check for valid multibase chars 659 return value.chars().all(|c| c.is_ascii_alphanumeric()); 660 } 661 662 false 663} 664 665/// Validate AT-identifier format (DID or handle) 666fn validate_at_identifier(value: &str) -> bool { 667 validate_did(value) || validate_handle(value) 668} 669 670/// Validate language code format (BCP 47) 671fn validate_language(value: &str) -> bool { 672 // Use langtag crate for proper BCP 47 / RFC 5646 validation 673 LangTag::new(value).is_ok() 674} 675 676/// Validate TID format (Timestamp Identifier) 677fn validate_tid(value: &str) -> bool { 678 // TID: 13 character base32-sortable timestamp 679 // Uses a-z2-7 character set (no 0,1,8,9) 680 if value.len() != 13 { 681 return false; 682 } 683 684 value.chars().all(|c| { 685 matches!(c, 'a'..='z' | '2'..='7') 686 }) 687} 688 689/// Validate record-key format 690fn validate_record_key(value: &str) -> bool { 691 // Record key: alphanumeric, dot, underscore, tilde, hyphen 692 // 1-512 characters 693 // Can be TID or custom key 694 if value.is_empty() || value.len() > 512 { 695 return false; 696 } 697 698 // If it looks like a TID, validate as TID 699 if value.len() == 13 && value.chars().all(|c| matches!(c, 'a'..='z' | '2'..='7')) { 700 return validate_tid(value); 701 } 702 703 // Otherwise, general record key validation 704 value.chars().all(|c| { 705 c.is_ascii_alphanumeric() || c == '.' || c == '_' || c == '~' || c == '-' 706 }) 707} 708 709#[cfg(test)] 710mod tests { 711 use super::*; 712 713 #[test] 714 fn test_validate_datetime() { 715 // Valid datetimes 716 assert!(validate_datetime("2024-01-15T10:30:00Z")); 717 assert!(validate_datetime("2024-01-15T10:30:00.123Z")); 718 assert!(validate_datetime("2024-01-15T10:30:00+05:30")); 719 assert!(validate_datetime("2024-01-15T10:30:00-08:00")); 720 721 // Invalid datetimes 722 assert!(!validate_datetime("2024-01-15")); 723 assert!(!validate_datetime("2024-01-15 10:30:00")); 724 assert!(!validate_datetime("not-a-date")); 725 } 726 727 #[test] 728 fn test_validate_uri() { 729 // Valid URIs 730 assert!(validate_uri("https://example.com")); 731 assert!(validate_uri("http://example.com/path")); 732 assert!(validate_uri("ftp://example.com")); 733 assert!(validate_uri("custom-scheme://something")); 734 735 // Invalid URIs 736 assert!(!validate_uri("not a uri")); 737 assert!(!validate_uri("://missing-scheme")); 738 assert!(!validate_uri("")); 739 } 740 741 #[test] 742 fn test_validate_at_uri() { 743 // Valid AT-URIs 744 assert!(validate_at_uri("at://did:plc:abc123")); 745 assert!(validate_at_uri("at://did:plc:abc123/com.example.foo")); 746 assert!(validate_at_uri("at://did:plc:abc123/com.example.foo/abc123")); 747 assert!(validate_at_uri("at://alice.example.com/com.example.post/abc")); 748 749 // Invalid AT-URIs 750 assert!(!validate_at_uri("https://example.com")); 751 assert!(!validate_at_uri("at://")); 752 assert!(!validate_at_uri("not-at-uri")); 753 } 754 755 #[test] 756 fn test_validate_did() { 757 // Valid DIDs 758 assert!(validate_did("did:plc:abc123xyz")); 759 assert!(validate_did("did:web:example.com")); 760 assert!(validate_did("did:key:abc123")); 761 762 // Invalid DIDs 763 assert!(!validate_did("not-a-did")); 764 assert!(!validate_did("did:")); 765 assert!(!validate_did("did:UPPERCASE:test")); // method must be lowercase 766 } 767 768 #[test] 769 fn test_validate_handle() { 770 // Valid handles 771 assert!(validate_handle("example.com")); 772 assert!(validate_handle("alice.example.com")); 773 assert!(validate_handle("my-site.example.com")); 774 775 // Invalid handles 776 assert!(!validate_handle("nodomainext")); 777 assert!(!validate_handle(".example.com")); 778 assert!(!validate_handle("example.com.")); 779 assert!(!validate_handle("-invalid.com")); 780 assert!(!validate_handle("invalid-.com")); 781 } 782 783 #[test] 784 fn test_validate_nsid() { 785 // Valid NSIDs 786 assert!(validate_nsid("com.example.foo")); 787 assert!(validate_nsid("com.example.foo.bar")); 788 assert!(validate_nsid("io.github.user.action")); 789 790 // Invalid NSIDs 791 assert!(!validate_nsid("com.example")); // need at least 3 segments 792 assert!(!validate_nsid("COM.EXAMPLE.FOO")); // must be lowercase 793 assert!(!validate_nsid("com.123invalid.foo")); // can't start with digit 794 assert!(!validate_nsid("co")); // too short 795 } 796 797 #[test] 798 fn test_validate_cid() { 799 // Valid CIDs (examples) 800 assert!(validate_cid("QmYwAPJzv5CZsnA625s3Xf2nemtYgPpHdWEz79ojWnPbdG")); // CIDv0 801 assert!(validate_cid("bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku")); // CIDv1 802 803 // Invalid CIDs 804 assert!(!validate_cid("")); 805 assert!(!validate_cid("not-a-cid")); 806 assert!(!validate_cid("Qm123")); // too short 807 } 808 809 #[test] 810 fn test_validate_at_identifier() { 811 // Valid (DIDs) 812 assert!(validate_at_identifier("did:plc:abc123")); 813 814 // Valid (handles) 815 assert!(validate_at_identifier("example.com")); 816 assert!(validate_at_identifier("alice.example.com")); 817 818 // Invalid 819 assert!(!validate_at_identifier("not-valid")); 820 assert!(!validate_at_identifier("")); 821 } 822 823 #[test] 824 fn test_validate_language() { 825 // Valid language codes (BCP 47 / RFC 5646) 826 assert!(validate_language("en")); 827 assert!(validate_language("en-US")); 828 assert!(validate_language("zh-Hans-CN")); 829 assert!(validate_language("fr-CA")); 830 assert!(validate_language("en-GB")); 831 assert!(validate_language("de-DE")); 832 833 // Invalid language codes 834 assert!(!validate_language("e")); // too short 835 assert!(!validate_language("en_US")); // wrong separator (underscore) 836 assert!(!validate_language("")); // empty 837 assert!(!validate_language("123")); // starts with digit 838 assert!(!validate_language("en--US")); // double separator 839 } 840 841 #[test] 842 fn test_validate_tid() { 843 // Valid TIDs (13 chars, base32-sortable) 844 assert!(validate_tid("3jui7kd54zh2y")); 845 assert!(validate_tid("3k2a4dqudbbz2")); 846 847 // Invalid TIDs 848 assert!(!validate_tid("3jui7kd54zh2")); // too short 849 assert!(!validate_tid("3jui7kd54zh2yy")); // too long 850 assert!(!validate_tid("3jui7kd54zh2Y")); // uppercase not allowed 851 assert!(!validate_tid("3jui0kd54zh2y")); // 0 not allowed 852 } 853 854 #[test] 855 fn test_validate_record_key() { 856 // Valid record keys 857 assert!(validate_record_key("3jui7kd54zh2y")); // TID 858 assert!(validate_record_key("my-record-key")); 859 assert!(validate_record_key("key.with.dots")); 860 assert!(validate_record_key("key_with_underscores")); 861 assert!(validate_record_key("key~with~tildes")); 862 863 // Invalid record keys 864 assert!(!validate_record_key("")); // empty 865 assert!(!validate_record_key(&"a".repeat(513))); // too long 866 assert!(!validate_record_key("key with spaces")); // spaces not allowed 867 } 868 869 #[test] 870 fn test_grapheme_counting() { 871 // Unicode grapheme cluster counting test 872 use unicode_segmentation::UnicodeSegmentation; 873 874 let text = "👨‍👩‍👧‍👦"; // Family emoji (1 grapheme cluster) 875 assert_eq!(text.graphemes(true).count(), 1); 876 877 let text = "hello"; // 5 graphemes 878 assert_eq!(text.graphemes(true).count(), 5); 879 880 let text = "नमस्ते"; // Devanagari (3 grapheme clusters) 881 assert_eq!(text.graphemes(true).count(), 3); 882 } 883}