examples/spec/textcodec.js at master

themackabu.com / ant
fork
MIRROR: javascript for 🐜's, a tiny runtime with big ambitions
fork
ant / examples / spec / textcodec.js
at master 160 lines 6.8 kB view raw
wrap content
theMackabu bring text codecs up to spec 5w ago
69655248
  1import { test, testDeep, testThrows, summary } from './helpers.js';
  2
  3console.log('TextEncoder/TextDecoder Tests\n');
  4
  5const encoder = new TextEncoder();
  6const decoder = new TextDecoder();
  7
  8const encoded = encoder.encode('hello');
  9test('TextEncoder type', encoded instanceof Uint8Array, true);
 10test('TextEncoder length', encoded.length, 5);
 11test('TextEncoder byte 0', encoded[0], 104);
 12test('TextEncoder byte 1', encoded[1], 101);
 13
 14const decoded = decoder.decode(encoded);
 15test('TextDecoder', decoded, 'hello');
 16
 17const utf8 = encoder.encode('日本語');
 18test('UTF-8 encode length', utf8.length, 9);
 19test('UTF-8 decode', decoder.decode(utf8), '日本語');
 20
 21const emoji = encoder.encode('😀');
 22test('emoji encode length', emoji.length, 4);
 23test('emoji decode', decoder.decode(emoji), '😀');
 24
 25const empty = encoder.encode('');
 26test('empty encode length', empty.length, 0);
 27test('empty decode', decoder.decode(empty), '');
 28
 29const roundtrip = 'Hello, 世界! 🎉';
 30test('roundtrip', decoder.decode(encoder.encode(roundtrip)), roundtrip);
 31
 32test('TextEncoder.encoding', encoder.encoding, 'utf-8');
 33test('TextEncoder requires new', typeof TextEncoder, 'function');
 34testThrows('TextEncoder without new throws', () => TextEncoder());
 35
 36testDeep('encode lone high surrogate', [...encoder.encode('\uD800')], [0xef, 0xbf, 0xbd]);
 37testDeep('encode lone low surrogate', [...encoder.encode('\uDC00')], [0xef, 0xbf, 0xbd]);
 38testDeep('encode surrogate in string', [...encoder.encode('a\uD800b')], [0x61, 0xef, 0xbf, 0xbd, 0x62]);
 39testDeep('encode reversed surrogates', [...encoder.encode('\uDC00\uD800')], [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd]);
 40test('encode valid surrogate pair', encoder.encode('\uD834\uDD1E').length, 4); // U+1D11E 𝄞
 41
 42const dest = new Uint8Array(10);
 43const result = encoder.encodeInto('hello', dest);
 44test('encodeInto read', result.read, 5);
 45test('encodeInto written', result.written, 5);
 46test('encodeInto data', decoder.decode(dest.subarray(0, result.written)), 'hello');
 47
 48const small = new Uint8Array(2);
 49const partial = encoder.encodeInto('hello', small);
 50test('encodeInto partial written', partial.written, 2);
 51test('encodeInto partial read', partial.read, 2);
 52
 53test('TextDecoder default encoding', new TextDecoder().encoding, 'utf-8');
 54test('TextDecoder utf8 alias', new TextDecoder('utf8').encoding, 'utf-8');
 55test('TextDecoder case insensitive', new TextDecoder('UTF-8').encoding, 'utf-8');
 56test('TextDecoder utf-16le label', new TextDecoder('utf-16le').encoding, 'utf-16le');
 57test('TextDecoder utf-16be label', new TextDecoder('utf-16be').encoding, 'utf-16be');
 58test('TextDecoder utf-16 alias', new TextDecoder('utf-16').encoding, 'utf-16le');
 59testThrows('TextDecoder invalid label', () => new TextDecoder('bogus'));
 60testThrows('TextDecoder without new throws', () => TextDecoder());
 61
 62test('fatal defaults false', new TextDecoder().fatal, false);
 63test('fatal option true', new TextDecoder('utf-8', { fatal: true }).fatal, true);
 64test('ignoreBOM defaults false', new TextDecoder().ignoreBOM, false);
 65test('ignoreBOM option true', new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM, true);
 66
 67testThrows('fatal on invalid UTF-8', () => {
 68  new TextDecoder('utf-8', { fatal: true }).decode(new Uint8Array([0xff]));
 69});
 70testThrows('fatal on truncated sequence', () => {
 71  new TextDecoder('utf-8', { fatal: true }).decode(new Uint8Array([0xc0]));
 72});
 73testThrows('fatal on overlong', () => {
 74  new TextDecoder('utf-8', { fatal: true }).decode(new Uint8Array([0xc0, 0x80]));
 75});
 76test('non-fatal replacement', new TextDecoder().decode(new Uint8Array([0xff])), '\uFFFD');
 77test('non-fatal truncated', new TextDecoder().decode(new Uint8Array([0xc0])), '\uFFFD');
 78
 79test('UTF-8 BOM stripped by default', new TextDecoder().decode(new Uint8Array([0xef, 0xbb, 0xbf, 0x41])), 'A');
 80test('UTF-8 BOM kept with ignoreBOM', new TextDecoder('utf-8', { ignoreBOM: true }).decode(new Uint8Array([0xef, 0xbb, 0xbf, 0x41])), '\uFEFFA');
 81
 82{
 83  const sd = new TextDecoder();
 84  let out = '';
 85  out += sd.decode(new Uint8Array([0xf0, 0x9f, 0x92]), { stream: true });
 86  out += sd.decode(new Uint8Array([0xa9]));
 87  test('streaming UTF-8 multi-byte', out, '\u{1F4A9}');
 88}
 89
 90{
 91  const sd = new TextDecoder();
 92  let out = '';
 93  out += sd.decode(new Uint8Array([0xf0]), { stream: true });
 94  out += sd.decode(new Uint8Array([0x9f]), { stream: true });
 95  out += sd.decode(new Uint8Array([0x92]), { stream: true });
 96  out += sd.decode(new Uint8Array([0xa9]));
 97  test('streaming UTF-8 byte-at-a-time', out, '\u{1F4A9}');
 98}
 99
100{
101  const sd = new TextDecoder();
102  let out = '';
103  out += sd.decode(new Uint8Array([0xf0, 0x9f]), { stream: true });
104  out += sd.decode();
105  test('streaming flush incomplete sequence', out, '\uFFFD');
106}
107
108test('UTF-16LE basic', new TextDecoder('utf-16le').decode(new Uint8Array([0x41, 0x00, 0x42, 0x00])), 'AB');
109test('UTF-16LE surrogate pair', new TextDecoder('utf-16le').decode(new Uint8Array([0x34, 0xd8, 0x1e, 0xdd])), '\uD834\uDD1E');
110test('UTF-16LE BOM stripped', new TextDecoder('utf-16le').decode(new Uint8Array([0xff, 0xfe, 0x41, 0x00])), 'A');
111
112test(
113  'UTF-16LE BOM kept with ignoreBOM',
114  new TextDecoder('utf-16le', { ignoreBOM: true }).decode(new Uint8Array([0xff, 0xfe, 0x41, 0x00])),
115  '\uFEFFA'
116);
117
118testThrows('UTF-16LE fatal on odd byte', () => {
119  new TextDecoder('utf-16le', { fatal: true }).decode(new Uint8Array([0x00]));
120});
121
122test('UTF-16LE non-fatal odd byte', new TextDecoder('utf-16le').decode(new Uint8Array([0x00])), '\uFFFD');
123
124{
125  const sd = new TextDecoder('utf-16le');
126  let out = '';
127  out += sd.decode(new Uint8Array([0x41]), { stream: true });
128  out += sd.decode(new Uint8Array([0x00]));
129  test('UTF-16LE streaming split code unit', out, 'A');
130}
131
132{
133  const sd = new TextDecoder('utf-16le');
134  let out = '';
135  out += sd.decode(new Uint8Array([0x34, 0xd8]), { stream: true });
136  out += sd.decode(new Uint8Array([0x1e, 0xdd]));
137  test('UTF-16LE streaming split surrogate pair', out, '\uD834\uDD1E');
138}
139
140test('UTF-16BE basic', new TextDecoder('utf-16be').decode(new Uint8Array([0x00, 0x41, 0x00, 0x42])), 'AB');
141test('UTF-16BE surrogate pair', new TextDecoder('utf-16be').decode(new Uint8Array([0xd8, 0x34, 0xdd, 0x1e])), '\uD834\uDD1E');
142test('UTF-16BE BOM stripped', new TextDecoder('utf-16be').decode(new Uint8Array([0xfe, 0xff, 0x00, 0x41])), 'A');
143
144{
145  const buf = new Uint8Array([0x68, 0x69]).buffer;
146  test('decode ArrayBuffer', new TextDecoder().decode(buf), 'hi');
147}
148
149{
150  const d = new TextDecoder();
151  d.decode(new Uint8Array([0xf0, 0x9f]), { stream: true });
152  const fresh = d.decode(new Uint8Array([0x41]));
153  test('decoder reuse resets', fresh, '\uFFFDA');
154}
155
156testDeep('encode undefined', [...encoder.encode(undefined)], []);
157testDeep('encode no args', [...encoder.encode()], []);
158test('decode no args', decoder.decode(), '');
159
160summary();
Configure Feed

Configure Feed