···11+---
22+"@atcute/lexicon-doc": patch
33+---
44+55+faster UTF-8 and grapheme length validation
+9-9
packages/lexicons/lexicon-doc/lib/builder.test.ts
···305305 }),
306306 ],
307307 }),
308308- ).toThrow(/com\.example\.test#main\/const:.*can't be shorter than minimum length/);
308308+ ).toThrow(/com\.example\.test#main\/const:.*length \d+, must be 1000 <= len <= Infinity/);
309309 });
310310311311 test('throws when token default value violates maxLength during build', () => {
···326326 }),
327327 ],
328328 }),
329329- ).toThrow(/com\.example\.test#main\/default:.*can't be longer than maximum length/);
329329+ ).toThrow(/com\.example\.test#main\/default:.*length \d+, must be 0 <= len <= 5/);
330330 });
331331332332 test('throws when token enum value violates minLength during build', () => {
···347347 }),
348348 ],
349349 }),
350350- ).toThrow(/com\.example\.test#main\/enum\/0:.*can't be shorter than minimum length/);
350350+ ).toThrow(/com\.example\.test#main\/enum\/0:.*length \d+, must be 1000 <= len <= Infinity/);
351351 });
352352353353 test('throws when token knownValues violates maxLength during build', () => {
···368368 }),
369369 ],
370370 }),
371371- ).toThrow(/com\.example\.test#main\/knownValues\/0:.*can't be longer than maximum length/);
371371+ ).toThrow(/com\.example\.test#main\/knownValues\/0:.*length \d+, must be 0 <= len <= 5/);
372372 });
373373374374 test('throws when token const value does not match format during build', () => {
···501501502502 test('throws when default is shorter than minLength', () => {
503503 expect(() => string({ minLength: 10, default: 'hi' })).toThrow(
504504- 'string/default: value ("hi") can\'t be shorter than minimum length (10)',
504504+ 'string/default: value ("hi") length 2, must be 10 <= len <= Infinity',
505505 );
506506 });
507507508508 test('throws when default is longer than maxLength', () => {
509509 expect(() => string({ maxLength: 5, default: 'hello world' })).toThrow(
510510- 'string/default: value ("hello world") can\'t be longer than maximum length (5)',
510510+ 'string/default: value ("hello world") length 11, must be 0 <= len <= 5',
511511 );
512512 });
513513···581581582582 test('throws when enum value is shorter than minLength', () => {
583583 expect(() => string({ minLength: 5, enum: ['hi', 'hello', 'world'] })).toThrow(
584584- 'string/enum[0]: value ("hi") can\'t be shorter than minimum length (5)',
584584+ 'string/enum[0]: value ("hi") length 2, must be 5 <= len <= Infinity',
585585 );
586586 });
587587588588 test('throws when enum value is longer than maxLength', () => {
589589 expect(() => string({ maxLength: 5, enum: ['hi', 'hello', 'worlds'] })).toThrow(
590590- 'string/enum[2]: value ("worlds") can\'t be longer than maximum length (5)',
590590+ 'string/enum[2]: value ("worlds") length 6, must be 0 <= len <= 5',
591591 );
592592 });
593593594594 test('throws when knownValues value is shorter than minLength', () => {
595595 expect(() => string({ minLength: 5, knownValues: ['hi', 'hello', 'world'] })).toThrow(
596596- 'string/knownValues[0]: value ("hi") can\'t be shorter than minimum length (5)',
596596+ 'string/knownValues[0]: value ("hi") length 2, must be 5 <= len <= Infinity',
597597 );
598598 });
599599
+82-226
packages/lexicons/lexicon-doc/lib/builder.ts
···11import { type Nsid } from '@atcute/lexicons/syntax';
22-33-import { isWithinGraphemeBounds, isWithinUtf8Bounds } from './internal/utils.js';
22+import { getUtf8Length, isUtf8LengthInRange } from '@atcute/uint8array';
33+import { getGraphemeLength, isGraphemeLengthInRange } from '@atcute/util-text';
44import { DELIMITED_MIME_TYPE_RE, KEY_RE, MIME_TYPE_RE, validateStringFormat } from './internal/validation.js';
55import type * as t from './types.js';
66import { formatLexiconRef, type ParsedLexiconRef } from './utils/refs.js';
···250250 }
251251 }
252252253253- {
254254- const bound = isWithinUtf8Bounds(defaultValue, minLength, maxLength);
255255-256256- if (bound === 'min') {
257257- throw new Error(
258258- `string/default: value (${JSON.stringify(defaultValue)}) can't be shorter than minimum length (${minLength})`,
259259- );
260260- }
261261-262262- if (bound === 'max') {
263263- throw new Error(
264264- `string/default: value (${JSON.stringify(defaultValue)}) can't be longer than maximum length (${maxLength})`,
265265- );
266266- }
253253+ if (!isUtf8LengthInRange(defaultValue, minLength, maxLength)) {
254254+ const len = getUtf8Length(defaultValue);
255255+ throw new Error(
256256+ `string/default: value (${JSON.stringify(defaultValue)}) length ${len}, must be ${minLength} <= len <= ${maxLength}`,
257257+ );
267258 }
268259269269- {
270270- const bound = isWithinGraphemeBounds(defaultValue, minGraphemes, maxGraphemes);
271271-272272- if (bound === 'min') {
273273- throw new Error(
274274- `string/default: value (${JSON.stringify(defaultValue)}) can't be shorter than minimum graphemes (${minGraphemes})`,
275275- );
276276- }
277277-278278- if (bound === 'max') {
279279- throw new Error(
280280- `string/default: value (${JSON.stringify(defaultValue)}) can't be longer than maximum graphemes (${maxGraphemes})`,
281281- );
282282- }
260260+ if (!isGraphemeLengthInRange(defaultValue, minGraphemes, maxGraphemes)) {
261261+ const len = getGraphemeLength(defaultValue);
262262+ throw new Error(
263263+ `string/default: value (${JSON.stringify(defaultValue)}) grapheme length ${len}, must be ${minGraphemes} <= len <= ${maxGraphemes}`,
264264+ );
283265 }
284266285267 if (format !== undefined && !validateStringFormat(defaultValue, format)) {
···299281 }
300282301283 if (typeof constValue === 'string') {
302302- {
303303- const bound = isWithinUtf8Bounds(constValue, minLength, maxLength);
304304-305305- if (bound === 'min') {
306306- throw new Error(
307307- `string/const: value (${JSON.stringify(constValue)}) can't be shorter than minimum length (${minLength})`,
308308- );
309309- }
310310-311311- if (bound === 'max') {
312312- throw new Error(
313313- `string/const: value (${JSON.stringify(constValue)}) can't be longer than maximum length (${maxLength})`,
314314- );
315315- }
284284+ if (!isUtf8LengthInRange(constValue, minLength, maxLength)) {
285285+ const len = getUtf8Length(constValue);
286286+ throw new Error(
287287+ `string/const: value (${JSON.stringify(constValue)}) length ${len}, must be ${minLength} <= len <= ${maxLength}`,
288288+ );
316289 }
317290318318- {
319319- const bound = isWithinGraphemeBounds(constValue, minGraphemes, maxGraphemes);
320320-321321- if (bound === 'min') {
322322- throw new Error(
323323- `string/const: value (${JSON.stringify(constValue)}) can't be shorter than minimum graphemes (${minGraphemes})`,
324324- );
325325- }
326326-327327- if (bound === 'max') {
328328- throw new Error(
329329- `string/const: value (${JSON.stringify(constValue)}) can't be longer than maximum graphemes (${maxGraphemes})`,
330330- );
331331- }
291291+ if (!isGraphemeLengthInRange(constValue, minGraphemes, maxGraphemes)) {
292292+ const len = getGraphemeLength(constValue);
293293+ throw new Error(
294294+ `string/const: value (${JSON.stringify(constValue)}) grapheme length ${len}, must be ${minGraphemes} <= len <= ${maxGraphemes}`,
295295+ );
332296 }
333297334298 if (format !== undefined && !validateStringFormat(constValue, format)) {
···348312 const enumValue = enumValues[idx];
349313350314 if (typeof enumValue === 'string') {
351351- {
352352- const bound = isWithinUtf8Bounds(enumValue, minLength, maxLength);
353353-354354- if (bound === 'min') {
355355- throw new Error(
356356- `string/enum[${idx}]: value (${JSON.stringify(enumValue)}) can't be shorter than minimum length (${minLength})`,
357357- );
358358- }
359359-360360- if (bound === 'max') {
361361- throw new Error(
362362- `string/enum[${idx}]: value (${JSON.stringify(enumValue)}) can't be longer than maximum length (${maxLength})`,
363363- );
364364- }
315315+ if (!isUtf8LengthInRange(enumValue, minLength, maxLength)) {
316316+ const len = getUtf8Length(enumValue);
317317+ throw new Error(
318318+ `string/enum[${idx}]: value (${JSON.stringify(enumValue)}) length ${len}, must be ${minLength} <= len <= ${maxLength}`,
319319+ );
365320 }
366321367367- {
368368- const bound = isWithinGraphemeBounds(enumValue, minGraphemes, maxGraphemes);
369369-370370- if (bound === 'min') {
371371- throw new Error(
372372- `string/enum[${idx}]: value (${JSON.stringify(enumValue)}) can't have fewer graphemes than minimum graphemes (${minGraphemes})`,
373373- );
374374- }
375375-376376- if (bound === 'max') {
377377- throw new Error(
378378- `string/enum[${idx}]: value (${JSON.stringify(enumValue)}) can't have more graphemes than maximum graphemes (${maxGraphemes})`,
379379- );
380380- }
322322+ if (!isGraphemeLengthInRange(enumValue, minGraphemes, maxGraphemes)) {
323323+ const graphemeLen = getGraphemeLength(enumValue);
324324+ throw new Error(
325325+ `string/enum[${idx}]: value (${JSON.stringify(enumValue)}) grapheme length ${graphemeLen}, must be ${minGraphemes} <= len <= ${maxGraphemes}`,
326326+ );
381327 }
382328383329 if (format !== undefined && !validateStringFormat(enumValue, format)) {
···394340 const knownValue = knownValues[idx];
395341396342 if (typeof knownValue === 'string') {
397397- {
398398- const bound = isWithinUtf8Bounds(knownValue, minLength, maxLength);
399399-400400- if (bound === 'min') {
401401- throw new Error(
402402- `string/knownValues[${idx}]: value (${JSON.stringify(knownValue)}) can't be shorter than minimum length (${minLength})`,
403403- );
404404- }
405405-406406- if (bound === 'max') {
407407- throw new Error(
408408- `string/knownValues[${idx}]: value (${JSON.stringify(knownValue)}) can't be longer than maximum length (${maxLength})`,
409409- );
410410- }
343343+ if (!isUtf8LengthInRange(knownValue, minLength, maxLength)) {
344344+ const len = getUtf8Length(knownValue);
345345+ throw new Error(
346346+ `string/knownValues[${idx}]: value (${JSON.stringify(knownValue)}) length ${len}, must be ${minLength} <= len <= ${maxLength}`,
347347+ );
411348 }
412349413413- {
414414- const bound = isWithinGraphemeBounds(knownValue, minGraphemes, maxGraphemes);
415415-416416- if (bound === 'min') {
417417- throw new Error(
418418- `string/knownValues[${idx}]: value (${JSON.stringify(knownValue)}) can't have fewer graphemes than minimum graphemes (${minGraphemes})`,
419419- );
420420- }
421421-422422- if (bound === 'max') {
423423- throw new Error(
424424- `string/knownValues[${idx}]: value (${JSON.stringify(knownValue)}) can't have more graphemes than maximum graphemes (${maxGraphemes})`,
425425- );
426426- }
350350+ if (!isGraphemeLengthInRange(knownValue, minGraphemes, maxGraphemes)) {
351351+ const graphemeLen = getGraphemeLength(knownValue);
352352+ throw new Error(
353353+ `string/knownValues[${idx}]: value (${JSON.stringify(knownValue)}) grapheme length ${graphemeLen}, must be ${minGraphemes} <= len <= ${maxGraphemes}`,
354354+ );
427355 }
428356429357 if (format !== undefined && !validateStringFormat(knownValue, format)) {
···495423496424 // validate resolved const value
497425 if (builtConstValue !== undefined && typeof constValue !== 'string') {
498498- {
499499- const bound = isWithinUtf8Bounds(builtConstValue, minLength, maxLength);
500500-501501- if (bound === 'min') {
502502- throw new Error(
503503- `${ctx.dotPath}/const: value (${JSON.stringify(builtConstValue)}) can't be shorter than minimum length (${minLength})`,
504504- );
505505- }
506506-507507- if (bound === 'max') {
508508- throw new Error(
509509- `${ctx.dotPath}/const: value (${JSON.stringify(builtConstValue)}) can't be longer than maximum length (${maxLength})`,
510510- );
511511- }
426426+ if (!isUtf8LengthInRange(builtConstValue, minLength, maxLength)) {
427427+ const len = getUtf8Length(builtConstValue);
428428+ throw new Error(
429429+ `${ctx.dotPath}/const: value (${JSON.stringify(builtConstValue)}) length ${len}, must be ${minLength} <= len <= ${maxLength}`,
430430+ );
512431 }
513432514514- {
515515- const bound = isWithinGraphemeBounds(builtConstValue, minGraphemes, maxGraphemes);
516516-517517- if (bound === 'min') {
518518- throw new Error(
519519- `${ctx.dotPath}/const: value (${JSON.stringify(builtConstValue)}) can't be shorter than minimum graphemes (${minGraphemes})`,
520520- );
521521- }
522522-523523- if (bound === 'max') {
524524- throw new Error(
525525- `${ctx.dotPath}/const: value (${JSON.stringify(builtConstValue)}) can't be longer than maximum graphemes (${maxGraphemes})`,
526526- );
527527- }
433433+ if (!isGraphemeLengthInRange(builtConstValue, minGraphemes, maxGraphemes)) {
434434+ const len = getGraphemeLength(builtConstValue);
435435+ throw new Error(
436436+ `${ctx.dotPath}/const: value (${JSON.stringify(builtConstValue)}) grapheme length ${len}, must be ${minGraphemes} <= len <= ${maxGraphemes}`,
437437+ );
528438 }
529439530440 if (format !== undefined && !validateStringFormat(builtConstValue, format)) {
···544454 throw new Error(`${ctx.dotPath}/default: value must be one of the enum values`);
545455 }
546456547547- {
548548- const bound = isWithinUtf8Bounds(builtDefaultValue, minLength, maxLength);
549549-550550- if (bound === 'min') {
551551- throw new Error(
552552- `${ctx.dotPath}/default: value (${JSON.stringify(builtDefaultValue)}) can't be shorter than minimum length (${minLength})`,
553553- );
554554- }
555555-556556- if (bound === 'max') {
557557- throw new Error(
558558- `${ctx.dotPath}/default: value (${JSON.stringify(builtDefaultValue)}) can't be longer than maximum length (${maxLength})`,
559559- );
560560- }
457457+ if (!isUtf8LengthInRange(builtDefaultValue, minLength, maxLength)) {
458458+ const len = getUtf8Length(builtDefaultValue);
459459+ throw new Error(
460460+ `${ctx.dotPath}/default: value (${JSON.stringify(builtDefaultValue)}) length ${len}, must be ${minLength} <= len <= ${maxLength}`,
461461+ );
561462 }
562463563563- {
564564- const bound = isWithinGraphemeBounds(builtDefaultValue, minGraphemes, maxGraphemes);
565565-566566- if (bound === 'min') {
567567- throw new Error(
568568- `${ctx.dotPath}/default: value (${JSON.stringify(builtDefaultValue)}) can't be shorter than minimum graphemes (${minGraphemes})`,
569569- );
570570- }
571571-572572- if (bound === 'max') {
573573- throw new Error(
574574- `${ctx.dotPath}/default: value (${JSON.stringify(builtDefaultValue)}) can't be longer than maximum graphemes (${maxGraphemes})`,
575575- );
576576- }
464464+ if (!isGraphemeLengthInRange(builtDefaultValue, minGraphemes, maxGraphemes)) {
465465+ const len = getGraphemeLength(builtDefaultValue);
466466+ throw new Error(
467467+ `${ctx.dotPath}/default: value (${JSON.stringify(builtDefaultValue)}) grapheme length ${len}, must be ${minGraphemes} <= len <= ${maxGraphemes}`,
468468+ );
577469 }
578470579471 if (format !== undefined && !validateStringFormat(builtDefaultValue, format)) {
···590482 const builtEnumValue = builtEnumValues[idx];
591483592484 if (typeof enumValue !== 'string') {
593593- {
594594- const bound = isWithinUtf8Bounds(builtEnumValue, minLength, maxLength);
595595-596596- if (bound === 'min') {
597597- throw new Error(
598598- `${ctx.dotPath}/enum/${idx}: value (${JSON.stringify(builtEnumValue)}) can't be shorter than minimum length (${minLength})`,
599599- );
600600- }
601601-602602- if (bound === 'max') {
603603- throw new Error(
604604- `${ctx.dotPath}/enum/${idx}: value (${JSON.stringify(builtEnumValue)}) can't be longer than maximum length (${maxLength})`,
605605- );
606606- }
485485+ if (!isUtf8LengthInRange(builtEnumValue, minLength, maxLength)) {
486486+ const len = getUtf8Length(builtEnumValue);
487487+ throw new Error(
488488+ `${ctx.dotPath}/enum/${idx}: value (${JSON.stringify(builtEnumValue)}) length ${len}, must be ${minLength} <= len <= ${maxLength}`,
489489+ );
607490 }
608491609609- {
610610- const bound = isWithinGraphemeBounds(builtEnumValue, minGraphemes, maxGraphemes);
611611-612612- if (bound === 'min') {
613613- throw new Error(
614614- `${ctx.dotPath}/enum/${idx}: value (${JSON.stringify(builtEnumValue)}) can't have fewer graphemes than minimum graphemes (${minGraphemes})`,
615615- );
616616- }
617617-618618- if (bound === 'max') {
619619- throw new Error(
620620- `${ctx.dotPath}/enum/${idx}: value (${JSON.stringify(builtEnumValue)}) can't have more graphemes than maximum graphemes (${maxGraphemes})`,
621621- );
622622- }
492492+ if (!isGraphemeLengthInRange(builtEnumValue, minGraphemes, maxGraphemes)) {
493493+ const graphemeLen = getGraphemeLength(builtEnumValue);
494494+ throw new Error(
495495+ `${ctx.dotPath}/enum/${idx}: value (${JSON.stringify(builtEnumValue)}) grapheme length ${graphemeLen}, must be ${minGraphemes} <= len <= ${maxGraphemes}`,
496496+ );
623497 }
624498625499 if (format !== undefined && !validateStringFormat(builtEnumValue, format)) {
···638512 const builtKnownValue = builtKnownValues[idx];
639513640514 if (typeof knownValue !== 'string') {
641641- {
642642- const bound = isWithinUtf8Bounds(builtKnownValue, minLength, maxLength);
643643-644644- if (bound === 'min') {
645645- throw new Error(
646646- `${ctx.dotPath}/knownValues/${idx}: value (${JSON.stringify(builtKnownValue)}) can't be shorter than minimum length (${minLength})`,
647647- );
648648- }
649649-650650- if (bound === 'max') {
651651- throw new Error(
652652- `${ctx.dotPath}/knownValues/${idx}: value (${JSON.stringify(builtKnownValue)}) can't be longer than maximum length (${maxLength})`,
653653- );
654654- }
515515+ if (!isUtf8LengthInRange(builtKnownValue, minLength, maxLength)) {
516516+ const len = getUtf8Length(builtKnownValue);
517517+ throw new Error(
518518+ `${ctx.dotPath}/knownValues/${idx}: value (${JSON.stringify(builtKnownValue)}) length ${len}, must be ${minLength} <= len <= ${maxLength}`,
519519+ );
655520 }
656521657657- {
658658- const bound = isWithinGraphemeBounds(builtKnownValue, minGraphemes, maxGraphemes);
659659-660660- if (bound === 'min') {
661661- throw new Error(
662662- `${ctx.dotPath}/knownValues/${idx}: value (${JSON.stringify(builtKnownValue)}) can't have fewer graphemes than minimum graphemes (${minGraphemes})`,
663663- );
664664- }
665665-666666- if (bound === 'max') {
667667- throw new Error(
668668- `${ctx.dotPath}/knownValues/${idx}: value (${JSON.stringify(builtKnownValue)}) can't have more graphemes than maximum graphemes (${maxGraphemes})`,
669669- );
670670- }
522522+ if (!isGraphemeLengthInRange(builtKnownValue, minGraphemes, maxGraphemes)) {
523523+ const graphemeLen = getGraphemeLength(builtKnownValue);
524524+ throw new Error(
525525+ `${ctx.dotPath}/knownValues/${idx}: value (${JSON.stringify(builtKnownValue)}) grapheme length ${graphemeLen}, must be ${minGraphemes} <= len <= ${maxGraphemes}`,
526526+ );
671527 }
672528673529 if (format !== undefined && !validateStringFormat(builtKnownValue, format)) {