MIRROR: javascript for 🐜's, a tiny runtime with big ambitions
1import { test, testDeep, testThrows, summary } from './helpers.js';
2
3console.log('TextEncoder/TextDecoder Tests\n');
4
5const encoder = new TextEncoder();
6const decoder = new TextDecoder();
7
8const encoded = encoder.encode('hello');
9test('TextEncoder type', encoded instanceof Uint8Array, true);
10test('TextEncoder length', encoded.length, 5);
11test('TextEncoder byte 0', encoded[0], 104);
12test('TextEncoder byte 1', encoded[1], 101);
13
14const decoded = decoder.decode(encoded);
15test('TextDecoder', decoded, 'hello');
16
17const utf8 = encoder.encode('日本語');
18test('UTF-8 encode length', utf8.length, 9);
19test('UTF-8 decode', decoder.decode(utf8), '日本語');
20
21const emoji = encoder.encode('😀');
22test('emoji encode length', emoji.length, 4);
23test('emoji decode', decoder.decode(emoji), '😀');
24
25const empty = encoder.encode('');
26test('empty encode length', empty.length, 0);
27test('empty decode', decoder.decode(empty), '');
28
29const roundtrip = 'Hello, 世界! 🎉';
30test('roundtrip', decoder.decode(encoder.encode(roundtrip)), roundtrip);
31
32test('TextEncoder.encoding', encoder.encoding, 'utf-8');
33test('TextEncoder requires new', typeof TextEncoder, 'function');
34testThrows('TextEncoder without new throws', () => TextEncoder());
35
36testDeep('encode lone high surrogate', [...encoder.encode('\uD800')], [0xef, 0xbf, 0xbd]);
37testDeep('encode lone low surrogate', [...encoder.encode('\uDC00')], [0xef, 0xbf, 0xbd]);
38testDeep('encode surrogate in string', [...encoder.encode('a\uD800b')], [0x61, 0xef, 0xbf, 0xbd, 0x62]);
39testDeep('encode reversed surrogates', [...encoder.encode('\uDC00\uD800')], [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd]);
40test('encode valid surrogate pair', encoder.encode('\uD834\uDD1E').length, 4); // U+1D11E 𝄞
41
42const dest = new Uint8Array(10);
43const result = encoder.encodeInto('hello', dest);
44test('encodeInto read', result.read, 5);
45test('encodeInto written', result.written, 5);
46test('encodeInto data', decoder.decode(dest.subarray(0, result.written)), 'hello');
47
48const small = new Uint8Array(2);
49const partial = encoder.encodeInto('hello', small);
50test('encodeInto partial written', partial.written, 2);
51test('encodeInto partial read', partial.read, 2);
52
53test('TextDecoder default encoding', new TextDecoder().encoding, 'utf-8');
54test('TextDecoder utf8 alias', new TextDecoder('utf8').encoding, 'utf-8');
55test('TextDecoder case insensitive', new TextDecoder('UTF-8').encoding, 'utf-8');
56test('TextDecoder utf-16le label', new TextDecoder('utf-16le').encoding, 'utf-16le');
57test('TextDecoder utf-16be label', new TextDecoder('utf-16be').encoding, 'utf-16be');
58test('TextDecoder utf-16 alias', new TextDecoder('utf-16').encoding, 'utf-16le');
59testThrows('TextDecoder invalid label', () => new TextDecoder('bogus'));
60testThrows('TextDecoder without new throws', () => TextDecoder());
61
62test('fatal defaults false', new TextDecoder().fatal, false);
63test('fatal option true', new TextDecoder('utf-8', { fatal: true }).fatal, true);
64test('ignoreBOM defaults false', new TextDecoder().ignoreBOM, false);
65test('ignoreBOM option true', new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM, true);
66
67testThrows('fatal on invalid UTF-8', () => {
68 new TextDecoder('utf-8', { fatal: true }).decode(new Uint8Array([0xff]));
69});
70testThrows('fatal on truncated sequence', () => {
71 new TextDecoder('utf-8', { fatal: true }).decode(new Uint8Array([0xc0]));
72});
73testThrows('fatal on overlong', () => {
74 new TextDecoder('utf-8', { fatal: true }).decode(new Uint8Array([0xc0, 0x80]));
75});
76test('non-fatal replacement', new TextDecoder().decode(new Uint8Array([0xff])), '\uFFFD');
77test('non-fatal truncated', new TextDecoder().decode(new Uint8Array([0xc0])), '\uFFFD');
78
79test('UTF-8 BOM stripped by default', new TextDecoder().decode(new Uint8Array([0xef, 0xbb, 0xbf, 0x41])), 'A');
80test('UTF-8 BOM kept with ignoreBOM', new TextDecoder('utf-8', { ignoreBOM: true }).decode(new Uint8Array([0xef, 0xbb, 0xbf, 0x41])), '\uFEFFA');
81
82{
83 const sd = new TextDecoder();
84 let out = '';
85 out += sd.decode(new Uint8Array([0xf0, 0x9f, 0x92]), { stream: true });
86 out += sd.decode(new Uint8Array([0xa9]));
87 test('streaming UTF-8 multi-byte', out, '\u{1F4A9}');
88}
89
90{
91 const sd = new TextDecoder();
92 let out = '';
93 out += sd.decode(new Uint8Array([0xf0]), { stream: true });
94 out += sd.decode(new Uint8Array([0x9f]), { stream: true });
95 out += sd.decode(new Uint8Array([0x92]), { stream: true });
96 out += sd.decode(new Uint8Array([0xa9]));
97 test('streaming UTF-8 byte-at-a-time', out, '\u{1F4A9}');
98}
99
100{
101 const sd = new TextDecoder();
102 let out = '';
103 out += sd.decode(new Uint8Array([0xf0, 0x9f]), { stream: true });
104 out += sd.decode();
105 test('streaming flush incomplete sequence', out, '\uFFFD');
106}
107
108test('UTF-16LE basic', new TextDecoder('utf-16le').decode(new Uint8Array([0x41, 0x00, 0x42, 0x00])), 'AB');
109test('UTF-16LE surrogate pair', new TextDecoder('utf-16le').decode(new Uint8Array([0x34, 0xd8, 0x1e, 0xdd])), '\uD834\uDD1E');
110test('UTF-16LE BOM stripped', new TextDecoder('utf-16le').decode(new Uint8Array([0xff, 0xfe, 0x41, 0x00])), 'A');
111
112test(
113 'UTF-16LE BOM kept with ignoreBOM',
114 new TextDecoder('utf-16le', { ignoreBOM: true }).decode(new Uint8Array([0xff, 0xfe, 0x41, 0x00])),
115 '\uFEFFA'
116);
117
118testThrows('UTF-16LE fatal on odd byte', () => {
119 new TextDecoder('utf-16le', { fatal: true }).decode(new Uint8Array([0x00]));
120});
121
122test('UTF-16LE non-fatal odd byte', new TextDecoder('utf-16le').decode(new Uint8Array([0x00])), '\uFFFD');
123
124{
125 const sd = new TextDecoder('utf-16le');
126 let out = '';
127 out += sd.decode(new Uint8Array([0x41]), { stream: true });
128 out += sd.decode(new Uint8Array([0x00]));
129 test('UTF-16LE streaming split code unit', out, 'A');
130}
131
132{
133 const sd = new TextDecoder('utf-16le');
134 let out = '';
135 out += sd.decode(new Uint8Array([0x34, 0xd8]), { stream: true });
136 out += sd.decode(new Uint8Array([0x1e, 0xdd]));
137 test('UTF-16LE streaming split surrogate pair', out, '\uD834\uDD1E');
138}
139
140test('UTF-16BE basic', new TextDecoder('utf-16be').decode(new Uint8Array([0x00, 0x41, 0x00, 0x42])), 'AB');
141test('UTF-16BE surrogate pair', new TextDecoder('utf-16be').decode(new Uint8Array([0xd8, 0x34, 0xdd, 0x1e])), '\uD834\uDD1E');
142test('UTF-16BE BOM stripped', new TextDecoder('utf-16be').decode(new Uint8Array([0xfe, 0xff, 0x00, 0x41])), 'A');
143
144{
145 const buf = new Uint8Array([0x68, 0x69]).buffer;
146 test('decode ArrayBuffer', new TextDecoder().decode(buf), 'hi');
147}
148
149{
150 const d = new TextDecoder();
151 d.decode(new Uint8Array([0xf0, 0x9f]), { stream: true });
152 const fresh = d.decode(new Uint8Array([0x41]));
153 test('decoder reuse resets', fresh, '\uFFFDA');
154}
155
156testDeep('encode undefined', [...encoder.encode(undefined)], []);
157testDeep('encode no args', [...encoder.encode()], []);
158test('decode no args', decoder.decode(), '');
159
160summary();