don't
5
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf(types): improve tid parsing by 1400%

SIMD impl for TID parsing. Requires nightly compiler and the `nightly`
feature to be enabled.

Can parse ~255 million TID/sec on an AMD 5950x.

Signed-off-by: tjh <x@tjh.dev>

tjh 0910f22e 35c10e2a

+133 -18
+4
crates/gordian-types/Cargo.toml
··· 28 28 [[bench]] 29 29 name = "parse_did" 30 30 harness = false 31 + 32 + [[bench]] 33 + name = "parse_tid" 34 + harness = false
+37
crates/gordian-types/benches/parse_tid.rs
··· 1 + use std::iter; 2 + 3 + use criterion::Criterion; 4 + use criterion::Throughput; 5 + use criterion::criterion_group; 6 + use criterion::criterion_main; 7 + use gordian_types::Tid; 8 + use rand::Rng; 9 + use rand::SeedableRng; 10 + use rand::rngs::StdRng; 11 + 12 + fn parse_tid(c: &mut Criterion) { 13 + let mut group = c.benchmark_group("TID"); 14 + 15 + let mut rng = StdRng::seed_from_u64(8612); 16 + let items: Vec<_> = iter::repeat_with(|| { 17 + let seconds: u64 = rng.random_range(..1_000_000); 18 + Tid::from_secs(seconds, rng.random_range(0..1024)).to_string() 19 + }) 20 + .take(1000) 21 + .collect(); 22 + 23 + group.throughput(Throughput::Elements(items.len() as u64)); 24 + group.bench_with_input("Tid::parse", items.as_slice(), |b, tids: &[String]| { 25 + b.iter(|| { 26 + for tid in tids { 27 + // panic!("{tid}"); 28 + assert!(Tid::parse(&tid).is_ok()); 29 + } 30 + }); 31 + }); 32 + 33 + group.finish(); 34 + } 35 + 36 + criterion_group!(benches, parse_tid); 37 + criterion_main!(benches);
+92 -18
crates/gordian-types/src/tid.rs
··· 300 300 /// 301 301 /// Returns an error if `tid` is not a valid TID. 302 302 pub fn parse(tid: &str) -> Result<Tid, Error> { 303 + #[cfg(feature = "nightly")] 304 + fn parse_simd(bytes: &[u8]) -> Option<u64> { 305 + use std::ops::BitAnd as _; 306 + use std::ops::BitOr as _; 307 + use std::simd::Select as _; 308 + use std::simd::prelude::*; 309 + 310 + const BASE32S_NUM_MIN: u8x16 = u8x16::from_slice(b"2222222222222\0\0\0"); 311 + const BASE32S_NUM_MAX: u8x16 = u8x16::from_slice(b"7777777777777\0\0\0"); 312 + const BASE32S_ALPHA_MIN: u8x16 = u8x16::from_slice(b"aaaaaaaaaaaaa\0\0\0"); 313 + const BASE32S_ALPHA_MAX: u8x16 = u8x16::from_slice(b"jzzzzzzzzzzzz\0\0\0"); 314 + 315 + let data = u8x16::load_select_or_default(bytes, Mask::from_bitmask(0x1fff)); 316 + let mask = data.simd_lt(BASE32S_ALPHA_MIN); 317 + let valid = data 318 + .simd_ge(BASE32S_ALPHA_MIN) 319 + .bitand(data.simd_le(BASE32S_ALPHA_MAX)) 320 + .bitor( 321 + data.simd_ge(BASE32S_NUM_MIN) 322 + .bitand(data.simd_le(BASE32S_NUM_MAX)), 323 + ) 324 + .all(); 325 + 326 + if !valid { 327 + return None; 328 + } 329 + 330 + let data = mask.select( 331 + data.saturating_sub(BASE32S_NUM_MIN), 332 + data.saturating_sub(BASE32S_ALPHA_MIN - u8x16::splat(6)), 333 + ); 334 + 335 + #[rustfmt::skip] 336 + // src 337 + // ╭─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────╮ 338 + // │ ╲ │ 0 │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │ 7 │ 8 │ 9 │ 10 │ 11 │ 12 │ 339 + // d ├─────┼─────┼─────┼─────┼─────┼─────┼─────┼─────┼─────┼─────┼─────┼─────┼─────┼─────┤ 340 + // s │ 0 │ < 4 │ > 1 │ │ │ │ │ │ │ │ │ │ │ │ 341 + // t │ 1 │ │ < 7 │ < 2 │ > 3 │ │ │ │ │ │ │ │ │ │ 342 + // │ 2 │ │ │ │ < 5 │ < 0 │ │ │ │ │ │ │ │ │ 343 + // │ 3 │ │ │ │ │ │ < 3 │ > 2 │ │ │ │ │ │ │ 344 + // │ 4 │ │ │ │ │ │ │ < 6 │ < 1 │ > 4 │ │ │ │ │ 345 + // │ 5 │ │ │ │ │ │ │ │ │ < 4 │ > 1 │ │ │ │ 346 + // │ 6 │ │ │ │ │ │ │ │ │ │ < 7 │ < 2 │ > 3 │ │ 347 + // │ 7 │ │ │ │ │ │ │ │ │ │ │ │ < 5 │ > 0 │ 348 + // ╰─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────╯ 349 + 350 + // Lanes 0..=7 are the first left shift from each row above. Lanes 8..=11 are 351 + // additional left shifts from rows 1, 2, 4, and 6. Lane 15 is used for `0`. 352 + let mut l1 = simd_swizzle!(data, [0, 1, 3, 5, 6, 8, 9, 11, 2, 4, 7, 10, 15]); 353 + l1 <<= Simd::from_array([4, 7, 5, 3, 6, 4, 7, 5, 2, 0, 1, 2, 0]); 354 + 355 + // Shuffle lanes 8, 9, 10, and 11 from l1 into final lanes. Lane 12 is `0`. 356 + let l2 = simd_swizzle!(l1, [12, 8, 9, 12, 10, 12, 11, 12]); 357 + 358 + let mut r1 = simd_swizzle!(data, [1, 3, 15, 6, 8, 9, 11, 12]); 359 + r1 >>= Simd::from_array([1, 3, 0, 2, 4, 1, 3, 0]); 360 + 361 + Some(u64::from_be_bytes((l1.resize::<8>(0) | l2 | r1).to_array())) 362 + } 363 + 364 + fn parse_scalar(bytes: &[u8]) -> Result<u64, Error> { 365 + let mut value = 0; 366 + for (idx, character) in bytes.iter().enumerate() { 367 + let pos = 0x1f 368 + & LOOKUP 369 + .iter() 370 + .position(|v| v == character) 371 + .ok_or(Error::Encoding { 372 + position: idx, 373 + found: *character as char, 374 + })? as u64; 375 + 376 + if idx == 0 && pos >= 16 { 377 + return Err(Error::Prefix { 378 + found: *character as char, 379 + }); 380 + } 381 + 382 + value = (value << 5) | pos; 383 + } 384 + 385 + Ok(value) 386 + } 387 + 303 388 let bytes = tid.as_bytes(); 304 389 if bytes.len() != 13 { 305 390 return Err(Error::Length); 306 391 } 307 392 308 - let mut value = 0; 309 - for (idx, character) in bytes.iter().enumerate() { 310 - let pos = 0x1f 311 - & LOOKUP 312 - .iter() 313 - .position(|v| v == character) 314 - .ok_or(Error::Encoding { 315 - position: idx, 316 - found: *character as char, 317 - })? as u64; 393 + #[cfg(feature = "nightly")] 394 + let value = match parse_simd(bytes) { 395 + Some(value) => value, 396 + None => parse_scalar(bytes)?, 397 + }; 318 398 319 - if idx == 0 && pos >= 16 { 320 - return Err(Error::Prefix { 321 - found: *character as char, 322 - }); 323 - } 324 - 325 - value = (value << 5) | pos; 326 - } 399 + #[cfg(not(feature = "nightly"))] 400 + let value = parse_scalar(bytes)?; 327 401 328 402 if value & (1 << 63) != 0 { 329 403 return Err(Error::HighBitSet);