a collection of lightweight TypeScript packages for AT Protocol, the protocol powering Bluesky
atproto bluesky typescript npm
101
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(lexicons): add strict blob validation and collectBlobs utility

Mary 94065a15 9e6b3f65

+408 -7
+5
.changeset/strict-blob-validation.md
··· 1 + --- 2 + '@atcute/lexicons': minor 3 + --- 4 + 5 + add strict blob validation (blobSize, blobAccept constraints) and collectBlobs utility
+113
packages/lexicons/lexicons/lib/interfaces/blob.test.ts
··· 1 + import { describe, expect, it } from 'vitest'; 2 + 3 + import { collectBlobs } from './blob.ts'; 4 + 5 + describe('collectBlobs', () => { 6 + const modernBlob = { 7 + $type: 'blob', 8 + ref: { $link: 'bafyreidfayvfuwqa7qlnopdjiqrxzs6blmoeu4rujcjtnci5beludirz2a' }, 9 + mimeType: 'image/png', 10 + size: 1024, 11 + }; 12 + 13 + const legacyBlob = { 14 + cid: 'bafkreidjmlrsggn2shrihfyp4iwlmxdp4dso7iqbkhfrpq6ahm22obop34', 15 + mimeType: 'image/jpeg', 16 + }; 17 + 18 + it('finds modern blobs', () => { 19 + const record = { $type: 'com.example.post', text: 'hello', image: modernBlob }; 20 + const blobs = collectBlobs(record); 21 + 22 + expect(blobs).toEqual([ 23 + { 24 + cid: 'bafyreidfayvfuwqa7qlnopdjiqrxzs6blmoeu4rujcjtnci5beludirz2a', 25 + mimeType: 'image/png', 26 + size: 1024, 27 + }, 28 + ]); 29 + }); 30 + 31 + it('excludes legacy blobs by default', () => { 32 + const record = { $type: 'com.example.post', image: legacyBlob }; 33 + const blobs = collectBlobs(record); 34 + 35 + expect(blobs).toEqual([]); 36 + }); 37 + 38 + it('includes legacy blobs with allowLegacy', () => { 39 + const record = { $type: 'com.example.post', image: legacyBlob }; 40 + const blobs = collectBlobs(record, { allowLegacy: true }); 41 + 42 + expect(blobs).toEqual([ 43 + { 44 + cid: 'bafkreidjmlrsggn2shrihfyp4iwlmxdp4dso7iqbkhfrpq6ahm22obop34', 45 + mimeType: 'image/jpeg', 46 + size: -1, 47 + }, 48 + ]); 49 + }); 50 + 51 + it('finds blobs nested in arrays', () => { 52 + const blob2 = { ...modernBlob }; 53 + const record = { $type: 'com.example.post', images: [modernBlob, blob2] }; 54 + const blobs = collectBlobs(record); 55 + 56 + expect(blobs).toHaveLength(2); 57 + expect(blobs[0].cid).toBe('bafyreidfayvfuwqa7qlnopdjiqrxzs6blmoeu4rujcjtnci5beludirz2a'); 58 + expect(blobs[1].cid).toBe('bafyreidfayvfuwqa7qlnopdjiqrxzs6blmoeu4rujcjtnci5beludirz2a'); 59 + }); 60 + 61 + it('finds blobs in deeply nested objects', () => { 62 + const record = { a: { b: { c: { image: modernBlob } } } }; 63 + const blobs = collectBlobs(record); 64 + 65 + expect(blobs).toHaveLength(1); 66 + }); 67 + 68 + it('finds both modern and legacy blobs with allowLegacy', () => { 69 + const record = { modern: modernBlob, legacy: legacyBlob }; 70 + const blobs = collectBlobs(record, { allowLegacy: true }); 71 + 72 + expect(blobs).toHaveLength(2); 73 + }); 74 + 75 + it('returns empty array for records with no blobs', () => { 76 + const record = { $type: 'com.example.post', text: 'hello' }; 77 + expect(collectBlobs(record)).toEqual([]); 78 + }); 79 + 80 + it('handles null and primitive values', () => { 81 + expect(collectBlobs(null)).toEqual([]); 82 + expect(collectBlobs(undefined)).toEqual([]); 83 + expect(collectBlobs('string')).toEqual([]); 84 + expect(collectBlobs(123)).toEqual([]); 85 + }); 86 + 87 + it('handles cyclic structures without infinite loops', () => { 88 + const record: Record<string, unknown> = { image: modernBlob }; 89 + record.self = record; 90 + 91 + const blobs = collectBlobs(record); 92 + expect(blobs).toHaveLength(1); 93 + }); 94 + 95 + it('only walks own properties', () => { 96 + const proto = { inherited: modernBlob }; 97 + const record = Object.create(proto); 98 + record.$type = 'com.example.post'; 99 + 100 + const blobs = collectBlobs(record); 101 + expect(blobs).toEqual([]); 102 + }); 103 + 104 + it('handles deep structures without stack overflow', () => { 105 + let deep: Record<string, unknown> = { image: modernBlob }; 106 + for (let i = 0; i < 10000; i++) { 107 + deep = { nested: deep }; 108 + } 109 + 110 + const blobs = collectBlobs(deep); 111 + expect(blobs).toHaveLength(1); 112 + }); 113 + });
+78
packages/lexicons/lexicons/lib/interfaces/blob.ts
··· 45 45 Object.keys(v).length === 2 46 46 ); 47 47 }; 48 + 49 + /** 50 + * extracted blob reference from a record 51 + */ 52 + export interface BlobRef { 53 + /** CID string */ 54 + cid: string; 55 + mimeType: string; 56 + /** self-reported size. -1 for legacy blobs */ 57 + size: number; 58 + } 59 + 60 + export interface CollectBlobsOptions { 61 + /** include legacy blob references in results (default: false) */ 62 + allowLegacy?: boolean; 63 + } 64 + 65 + /** 66 + * extracts all blob references from a record object, including in undeclared 67 + * properties. by default only finds modern blobs; set `allowLegacy` to also 68 + * include legacy blob formats. 69 + * @param record record object to walk 70 + * @param options collection options 71 + * @returns array of blob references found 72 + */ 73 + export const collectBlobs = (record: unknown, options?: CollectBlobsOptions): BlobRef[] => { 74 + const allowLegacy = options?.allowLegacy === true; 75 + const blobs: BlobRef[] = []; 76 + const stack: unknown[] = [record]; 77 + const visited = new Set<object>(); 78 + 79 + while (stack.length > 0) { 80 + const value = stack.pop(); 81 + 82 + if (typeof value !== 'object' || value === null) { 83 + continue; 84 + } 85 + if (visited.has(value)) { 86 + continue; 87 + } 88 + visited.add(value); 89 + 90 + if (Array.isArray(value)) { 91 + for (let i = value.length - 1; i >= 0; i--) { 92 + stack.push(value[i]); 93 + } 94 + continue; 95 + } 96 + 97 + if (isBlob(value)) { 98 + blobs.push({ 99 + cid: value.ref.$link, 100 + mimeType: value.mimeType, 101 + size: value.size, 102 + }); 103 + continue; 104 + } 105 + 106 + if (allowLegacy && isLegacyBlob(value)) { 107 + blobs.push({ 108 + cid: value.cid, 109 + mimeType: value.mimeType, 110 + size: -1, 111 + }); 112 + continue; 113 + } 114 + 115 + const keys = Object.keys(value); 116 + for (let i = keys.length - 1; i >= 0; i--) { 117 + const v = (value as Record<string, unknown>)[keys[i]]; 118 + if (v != null) { 119 + stack.push(v); 120 + } 121 + } 122 + } 123 + 124 + return blobs; 125 + };
+9 -1
packages/lexicons/lexicons/lib/interfaces/index.ts
··· 1 - export { isBlob, isLegacyBlob, type Blob, type LegacyBlob } from './blob.ts'; 1 + export { 2 + collectBlobs, 3 + isBlob, 4 + isLegacyBlob, 5 + type Blob, 6 + type BlobRef, 7 + type CollectBlobsOptions, 8 + type LegacyBlob, 9 + } from './blob.ts'; 2 10 export { isBytes, type Bytes } from './bytes.ts'; 3 11 export { isCidLink, type CidLink } from './cid-link.ts';
+83
packages/lexicons/lexicons/lib/validations/index.test.ts
··· 270 270 } 271 271 }); 272 272 273 + describe('strict blob validation', () => { 274 + const modernBlob = { 275 + $type: 'blob', 276 + ref: { $link: 'bafyreidfayvfuwqa7qlnopdjiqrxzs6blmoeu4rujcjtnci5beludirz2a' }, 277 + mimeType: 'image/png', 278 + size: 1024, 279 + }; 280 + 281 + const legacyBlob = { 282 + cid: 'bafkreidjmlrsggn2shrihfyp4iwlmxdp4dso7iqbkhfrpq6ahm22obop34', 283 + mimeType: 'image/jpeg', 284 + }; 285 + 286 + it('rejects legacy blobs in strict mode', () => { 287 + const schema = v.blob(); 288 + 289 + expect(v.is(schema, legacyBlob)).toBe(true); 290 + expect(v.is(schema, legacyBlob, { strict: true })).toBe(false); 291 + expect(v.is(schema, modernBlob, { strict: true })).toBe(true); 292 + }); 293 + 294 + it('validates blobSize constraint in strict mode', () => { 295 + const schema = v.constrain(v.blob(), [v.blobSize(2048)]); 296 + 297 + expect(v.is(schema, modernBlob)).toBe(true); 298 + expect(v.is(schema, modernBlob, { strict: true })).toBe(true); 299 + expect(v.is(schema, { ...modernBlob, size: 4096 }, { strict: true })).toBe(false); 300 + 301 + // without strict, size constraint is not enforced 302 + expect(v.is(schema, { ...modernBlob, size: 4096 })).toBe(true); 303 + }); 304 + 305 + it('reports blobSize issue with details', () => { 306 + const schema = v.constrain(v.blob(), [v.blobSize(512)]); 307 + 308 + const result = v.safeParse(schema, modernBlob, { strict: true }); 309 + assert(!result.ok, 'expected validation issue'); 310 + expect(result.issues).toEqual([{ code: 'invalid_blob_size', maxSize: 512, path: [] }]); 311 + }); 312 + 313 + it('validates blobAccept constraint in strict mode', () => { 314 + const schema = v.constrain(v.blob(), [v.blobAccept(['image/png', 'image/jpeg'])]); 315 + 316 + expect(v.is(schema, modernBlob, { strict: true })).toBe(true); 317 + expect(v.is(schema, { ...modernBlob, mimeType: 'video/mp4' }, { strict: true })).toBe(false); 318 + 319 + // without strict, accept constraint is not enforced 320 + expect(v.is(schema, { ...modernBlob, mimeType: 'video/mp4' })).toBe(true); 321 + }); 322 + 323 + it('supports wildcard MIME type patterns', () => { 324 + const schema = v.constrain(v.blob(), [v.blobAccept(['image/*'])]); 325 + 326 + expect(v.is(schema, { ...modernBlob, mimeType: 'image/png' }, { strict: true })).toBe(true); 327 + expect(v.is(schema, { ...modernBlob, mimeType: 'image/jpeg' }, { strict: true })).toBe(true); 328 + expect(v.is(schema, { ...modernBlob, mimeType: 'video/mp4' }, { strict: true })).toBe(false); 329 + }); 330 + 331 + it('handles case-insensitive MIME type matching', () => { 332 + const schema = v.constrain(v.blob(), [v.blobAccept(['image/PNG'])]); 333 + 334 + expect(v.is(schema, { ...modernBlob, mimeType: 'image/png' }, { strict: true })).toBe(true); 335 + expect(v.is(schema, { ...modernBlob, mimeType: 'image/PNG' }, { strict: true })).toBe(true); 336 + }); 337 + 338 + it('reports blobAccept issue with details', () => { 339 + const accept = ['image/png', 'image/jpeg']; 340 + const schema = v.constrain(v.blob(), [v.blobAccept(accept)]); 341 + 342 + const result = v.safeParse(schema, { ...modernBlob, mimeType: 'video/mp4' }, { strict: true }); 343 + assert(!result.ok, 'expected validation issue'); 344 + expect(result.issues).toEqual([{ code: 'invalid_blob_mime_type', accept, path: [] }]); 345 + }); 346 + 347 + it('combines blobSize and blobAccept constraints', () => { 348 + const schema = v.constrain(v.blob(), [v.blobSize(2048), v.blobAccept(['image/*'])]); 349 + 350 + expect(v.is(schema, modernBlob, { strict: true })).toBe(true); 351 + expect(v.is(schema, { ...modernBlob, size: 4096 }, { strict: true })).toBe(false); 352 + expect(v.is(schema, { ...modernBlob, mimeType: 'video/mp4' }, { strict: true })).toBe(false); 353 + }); 354 + }); 355 + 273 356 describe(`IPLD types`, () => { 274 357 it(`validates bytes type`, () => { 275 358 const schema = v.bytes();
+120 -6
packages/lexicons/lexicons/lib/validations/index.ts
··· 68 68 | { code: 'invalid_string_length'; minLength: number; maxLength: number } 69 69 | { code: 'invalid_array_length'; minLength: number; maxLength: number } 70 70 | { code: 'invalid_bytes_size'; minSize: number; maxSize: number } 71 + | { code: 'invalid_blob_size'; maxSize: number } 72 + | { code: 'invalid_blob_mime_type'; accept: readonly string[] } 71 73 ); 72 74 73 75 export type IssueTree = ··· 85 87 | { code: 'invalid_string_graphemes'; path: Key[]; minGraphemes: number; maxGraphemes: number } 86 88 | { code: 'invalid_string_length'; path: Key[]; minLength: number; maxLength: number } 87 89 | { code: 'invalid_array_length'; path: Key[]; minLength: number; maxLength: number } 88 - | { code: 'invalid_bytes_size'; path: Key[]; minSize: number; maxSize: number }; 90 + | { code: 'invalid_bytes_size'; path: Key[]; minSize: number; maxSize: number } 91 + | { code: 'invalid_blob_size'; path: Key[]; maxSize: number } 92 + | { code: 'invalid_blob_mime_type'; path: Key[]; accept: readonly string[] }; 89 93 90 94 // #__NO_SIDE_EFFECTS__ 91 95 const joinIssues = (left: IssueTree | undefined, right: IssueTree): IssueTree => { ··· 127 131 export const FLAG_EMPTY = 0; 128 132 // Don't continue validation if an error is encountered 129 133 export const FLAG_ABORT_EARLY = 1 << 0; 134 + // Enable strict blob validation (size, MIME type constraints, reject legacy blobs) 135 + export const FLAG_STRICT = 1 << 1; 130 136 131 137 type MatcherResult = undefined | Ok<unknown> | IssueTree; 132 138 type Matcher = (input: unknown, flags: number) => MatcherResult; ··· 313 319 } 314 320 } 315 321 322 + export interface ValidationOptions { 323 + /** enable strict blob validation (size, MIME type constraints, reject legacy blobs) */ 324 + strict?: boolean; 325 + } 326 + 316 327 // #__NO_SIDE_EFFECTS__ 317 328 export const is = <const TSchema extends BaseSchema>( 318 329 schema: TSchema, 319 330 input: unknown, 331 + options?: ValidationOptions, 320 332 ): input is InferInput<TSchema> => { 321 - const r = schema['~run'](input, FLAG_ABORT_EARLY); 333 + let flags = FLAG_ABORT_EARLY; 334 + if (options?.strict) { 335 + flags |= FLAG_STRICT; 336 + } 337 + const r = schema['~run'](input, flags); 322 338 return r === undefined || r.ok; 323 339 }; 324 340 ··· 326 342 export const safeParse = <const TSchema extends BaseSchema>( 327 343 schema: TSchema, 328 344 input: unknown, 345 + options?: ValidationOptions, 329 346 ): ValidationResult<InferOutput<TSchema>> => { 330 - const r = schema['~run'](input, FLAG_EMPTY); 347 + let flags = FLAG_EMPTY; 348 + if (options?.strict) { 349 + flags |= FLAG_STRICT; 350 + } 351 + const r = schema['~run'](input, flags); 331 352 332 353 if (r === undefined) { 333 354 return ok(input as InferOutput<TSchema>); ··· 343 364 export const parse = <const TSchema extends BaseSchema>( 344 365 schema: TSchema, 345 366 input: unknown, 367 + options?: ValidationOptions, 346 368 ): InferOutput<TSchema> => { 347 - const r = schema['~run'](input, FLAG_EMPTY); 369 + let flags = FLAG_EMPTY; 370 + if (options?.strict) { 371 + flags |= FLAG_STRICT; 372 + } 373 + const r = schema['~run'](input, flags); 348 374 349 375 if (r === undefined) { 350 376 return input as InferOutput<TSchema>; ··· 886 912 const BLOB_SCHEMA: BlobSchema = { 887 913 kind: 'schema', 888 914 type: 'blob', 889 - '~run'(input, _flags) { 915 + '~run'(input, flags) { 890 916 if (typeof input !== 'object' || input === null) { 891 917 return ISSUE_EXPECTED_BLOB; 892 918 } ··· 895 921 return undefined; 896 922 } 897 923 898 - if (interfaces.isLegacyBlob(input)) { 924 + if (!(flags & FLAG_STRICT) && interfaces.isLegacyBlob(input)) { 899 925 const blob: interfaces.Blob = { 900 926 $type: 'blob', 901 927 mimeType: input.mimeType, ··· 917 943 export const blob = (): BlobSchema => { 918 944 return BLOB_SCHEMA; 919 945 }; 946 + 947 + // #region Blob constraints 948 + 949 + export interface BlobSizeConstraint< 950 + TMaxSize extends number = number, 951 + > extends BaseConstraint<interfaces.Blob> { 952 + readonly type: 'blob_size'; 953 + readonly maxSize: TMaxSize; 954 + } 955 + 956 + // #__NO_SIDE_EFFECTS__ 957 + export const blobSize = <const TMaxSize extends number>(maxSize: TMaxSize): BlobSizeConstraint<TMaxSize> => { 958 + const issue: IssueLeaf = { 959 + ok: false, 960 + code: 'invalid_blob_size', 961 + maxSize: maxSize, 962 + msg() { 963 + return `blob size must not exceed ${maxSize} bytes`; 964 + }, 965 + }; 966 + 967 + return { 968 + kind: 'constraint', 969 + type: 'blob_size', 970 + maxSize: maxSize, 971 + '~run'(input, flags) { 972 + if (!(flags & FLAG_STRICT)) { 973 + return undefined; 974 + } 975 + if ((input as interfaces.Blob).size > maxSize) { 976 + return issue; 977 + } 978 + return undefined; 979 + }, 980 + }; 981 + }; 982 + 983 + export interface BlobAcceptConstraint extends BaseConstraint<interfaces.Blob> { 984 + readonly type: 'blob_accept'; 985 + readonly accept: readonly string[]; 986 + } 987 + 988 + // #__NO_SIDE_EFFECTS__ 989 + export const blobAccept = (accept: readonly string[]): BlobAcceptConstraint => { 990 + const normalized = accept.map((p) => p.toLowerCase()); 991 + 992 + const issue: IssueLeaf = { 993 + ok: false, 994 + code: 'invalid_blob_mime_type', 995 + accept: accept, 996 + msg() { 997 + return `blob MIME type must match: ${accept.join(', ')}`; 998 + }, 999 + }; 1000 + 1001 + return { 1002 + kind: 'constraint', 1003 + type: 'blob_accept', 1004 + accept: accept, 1005 + '~run'(input, flags) { 1006 + if (!(flags & FLAG_STRICT)) { 1007 + return undefined; 1008 + } 1009 + const mimeType = (input as interfaces.Blob).mimeType.toLowerCase(); 1010 + 1011 + for (let idx = 0, len = normalized.length; idx < len; idx++) { 1012 + const pattern = normalized[idx]; 1013 + 1014 + if (pattern === '*/*') { 1015 + return undefined; 1016 + } 1017 + if (pattern.endsWith('/*')) { 1018 + if (mimeType.startsWith(pattern.slice(0, -1))) { 1019 + return undefined; 1020 + } 1021 + } else { 1022 + if (mimeType === pattern) { 1023 + return undefined; 1024 + } 1025 + } 1026 + } 1027 + 1028 + return issue; 1029 + }, 1030 + }; 1031 + }; 1032 + 1033 + // #endregion 920 1034 921 1035 // #region IPLD bytes schema 922 1036