don't
5
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf(types): accelerate did plc validation

Use SIMD where available to validate PLC DIDs.

- Stable rust: AVX2 impl for x86-64 targets. Inspired by @metaflame.dev's
ratproto-did crate. <https://tangled.org/metaflame.dev/ratproto-did>.
- Nightly rust: `std::simd` based impl for all targets. Requires the
`nightly` feature to be enabled.

Improves validation from ~68 million DID/sec to ~475 million DID/sec
on an AMD 5950x.

Signed-off-by: tjh <x@tjh.dev>

tjh 35c10e2a 9db2b2af

+407 -1
+167
Cargo.lock
··· 30 30 ] 31 31 32 32 [[package]] 33 + name = "alloca" 34 + version = "0.4.0" 35 + source = "registry+https://github.com/rust-lang/crates.io-index" 36 + checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" 37 + dependencies = [ 38 + "cc", 39 + ] 40 + 41 + [[package]] 33 42 name = "allocator-api2" 34 43 version = "0.2.21" 35 44 source = "registry+https://github.com/rust-lang/crates.io-index" 36 45 checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" 46 + 47 + [[package]] 48 + name = "anes" 49 + version = "0.1.6" 50 + source = "registry+https://github.com/rust-lang/crates.io-index" 51 + checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 37 52 38 53 [[package]] 39 54 name = "anstream" ··· 364 349 checksum = "6bd91ee7b2422bcb158d90ef4d14f75ef67f340943fc4149891dcce8f8b972a3" 365 350 366 351 [[package]] 352 + name = "cast" 353 + version = "0.3.0" 354 + source = "registry+https://github.com/rust-lang/crates.io-index" 355 + checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 356 + 357 + [[package]] 367 358 name = "cc" 368 359 version = "1.2.55" 369 360 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 398 377 version = "0.2.1" 399 378 source = "registry+https://github.com/rust-lang/crates.io-index" 400 379 checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" 380 + 381 + [[package]] 382 + name = "ciborium" 383 + version = "0.2.2" 384 + source = "registry+https://github.com/rust-lang/crates.io-index" 385 + checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 386 + dependencies = [ 387 + "ciborium-io", 388 + "ciborium-ll", 389 + "serde", 390 + ] 391 + 392 + [[package]] 393 + name = "ciborium-io" 394 + version = "0.2.2" 395 + source = "registry+https://github.com/rust-lang/crates.io-index" 396 + checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 397 + 398 + [[package]] 399 + name = "ciborium-ll" 400 + version = "0.2.2" 401 + source = "registry+https://github.com/rust-lang/crates.io-index" 402 + checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 403 + dependencies = [ 404 + "ciborium-io", 405 + "half", 406 + ] 401 407 402 408 [[package]] 403 409 name = "cipher" ··· 614 566 ] 615 567 616 568 [[package]] 569 + name = "criterion" 570 + version = "0.8.2" 571 + source = "registry+https://github.com/rust-lang/crates.io-index" 572 + checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" 573 + dependencies = [ 574 + "alloca", 575 + "anes", 576 + "cast", 577 + "ciborium", 578 + "clap", 579 + "criterion-plot", 580 + "itertools", 581 + "num-traits", 582 + "oorandom", 583 + "page_size", 584 + "plotters", 585 + "rayon", 586 + "regex", 587 + "serde", 588 + "serde_json", 589 + "tinytemplate", 590 + "walkdir", 591 + ] 592 + 593 + [[package]] 594 + name = "criterion-plot" 595 + version = "0.8.2" 596 + source = "registry+https://github.com/rust-lang/crates.io-index" 597 + checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" 598 + dependencies = [ 599 + "cast", 600 + "itertools", 601 + ] 602 + 603 + [[package]] 617 604 name = "critical-section" 618 605 version = "1.2.0" 619 606 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 696 613 version = "0.8.21" 697 614 source = "registry+https://github.com/rust-lang/crates.io-index" 698 615 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 616 + 617 + [[package]] 618 + name = "crunchy" 619 + version = "0.2.4" 620 + source = "registry+https://github.com/rust-lang/crates.io-index" 621 + checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" 699 622 700 623 [[package]] 701 624 name = "crypto-bigint" ··· 2321 2232 name = "gordian-types" 2322 2233 version = "0.0.0" 2323 2234 dependencies = [ 2235 + "criterion", 2236 + "data-encoding", 2237 + "data-encoding-macro", 2238 + "rand 0.9.2", 2324 2239 "serde", 2325 2240 "serde_json", 2326 2241 "smallstr", ··· 2361 2268 "tokio", 2362 2269 "tokio-util", 2363 2270 "tracing", 2271 + ] 2272 + 2273 + [[package]] 2274 + name = "half" 2275 + version = "2.7.1" 2276 + source = "registry+https://github.com/rust-lang/crates.io-index" 2277 + checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" 2278 + dependencies = [ 2279 + "cfg-if", 2280 + "crunchy", 2281 + "zerocopy", 2364 2282 ] 2365 2283 2366 2284 [[package]] ··· 2860 2756 checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" 2861 2757 2862 2758 [[package]] 2759 + name = "itertools" 2760 + version = "0.13.0" 2761 + source = "registry+https://github.com/rust-lang/crates.io-index" 2762 + checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 2763 + dependencies = [ 2764 + "either", 2765 + ] 2766 + 2767 + [[package]] 2863 2768 name = "itoa" 2864 2769 version = "1.0.17" 2865 2770 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3340 3227 checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" 3341 3228 3342 3229 [[package]] 3230 + name = "oorandom" 3231 + version = "11.1.5" 3232 + source = "registry+https://github.com/rust-lang/crates.io-index" 3233 + checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" 3234 + 3235 + [[package]] 3343 3236 name = "openssl-probe" 3344 3237 version = "0.2.1" 3345 3238 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3403 3284 "primeorder", 3404 3285 "rand_core 0.6.4", 3405 3286 "sha2", 3287 + ] 3288 + 3289 + [[package]] 3290 + name = "page_size" 3291 + version = "0.6.0" 3292 + source = "registry+https://github.com/rust-lang/crates.io-index" 3293 + checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" 3294 + dependencies = [ 3295 + "libc", 3296 + "winapi", 3406 3297 ] 3407 3298 3408 3299 [[package]] ··· 3497 3368 version = "0.3.32" 3498 3369 source = "registry+https://github.com/rust-lang/crates.io-index" 3499 3370 checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 3371 + 3372 + [[package]] 3373 + name = "plotters" 3374 + version = "0.3.7" 3375 + source = "registry+https://github.com/rust-lang/crates.io-index" 3376 + checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" 3377 + dependencies = [ 3378 + "num-traits", 3379 + "plotters-backend", 3380 + "plotters-svg", 3381 + "wasm-bindgen", 3382 + "web-sys", 3383 + ] 3384 + 3385 + [[package]] 3386 + name = "plotters-backend" 3387 + version = "0.3.7" 3388 + source = "registry+https://github.com/rust-lang/crates.io-index" 3389 + checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" 3390 + 3391 + [[package]] 3392 + name = "plotters-svg" 3393 + version = "0.3.7" 3394 + source = "registry+https://github.com/rust-lang/crates.io-index" 3395 + checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" 3396 + dependencies = [ 3397 + "plotters-backend", 3398 + ] 3500 3399 3501 3400 [[package]] 3502 3401 name = "portable-atomic" ··· 4854 4697 dependencies = [ 4855 4698 "displaydoc", 4856 4699 "zerovec", 4700 + ] 4701 + 4702 + [[package]] 4703 + name = "tinytemplate" 4704 + version = "1.2.1" 4705 + source = "registry+https://github.com/rust-lang/crates.io-index" 4706 + checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 4707 + dependencies = [ 4708 + "serde", 4709 + "serde_json", 4857 4710 ] 4858 4711 4859 4712 [[package]]
+11
crates/gordian-types/Cargo.toml
··· 7 7 edition.workspace = true 8 8 publish.workspace = true 9 9 10 + [features] 11 + nightly = [] 12 + 10 13 [dependencies] 11 14 smallstr = { version = "0.3.1" } 12 15 thiserror.workspace = true ··· 19 16 time = { workspace = true, optional = true } 20 17 21 18 [dev-dependencies] 19 + criterion = "0.8.2" 20 + data-encoding.workspace = true 21 + data-encoding-macro = "0.1.19" 22 + rand.workspace = true 22 23 serde_json.workspace = true 24 + 25 + [[bench]] 26 + name = "parse_did" 27 + harness = false
+56
crates/gordian-types/benches/parse_did.rs
··· 1 + use std::iter; 2 + 3 + use criterion::BatchSize; 4 + use criterion::Criterion; 5 + use criterion::Throughput; 6 + use criterion::criterion_group; 7 + use criterion::criterion_main; 8 + use data_encoding::Encoding; 9 + use gordian_types::Did; 10 + use gordian_types::DidBuf; 11 + use rand::Rng; 12 + use rand::SeedableRng; 13 + use rand::rngs::StdRng; 14 + 15 + const BASE32: Encoding = data_encoding_macro::new_encoding! { 16 + symbols: "234567abcdefghijklmnopqrstuvwxyz", 17 + padding: None, 18 + }; 19 + 20 + fn parse_did(c: &mut Criterion) { 21 + let mut group = c.benchmark_group("DID:PLC"); 22 + 23 + let mut rng = StdRng::seed_from_u64(8612); 24 + let items: Vec<_> = iter::repeat_with(|| { 25 + let bytes: [u8; 15] = rng.random(); 26 + format!("did:plc:{}", BASE32.encode(&bytes)) 27 + }) 28 + .take(1000) 29 + .collect(); 30 + 31 + group.throughput(Throughput::Elements(items.len() as u64)); 32 + group.bench_with_input("Did::parse", items.as_slice(), |b, dids: &[String]| { 33 + b.iter(|| { 34 + for did in dids { 35 + assert!(Did::parse(&did).is_ok()); 36 + } 37 + }); 38 + }); 39 + 40 + group.throughput(Throughput::Elements(items.len() as u64)); 41 + group.bench_with_input("String->DidBuf", items.as_slice(), |b, dids: &[String]| { 42 + b.iter_batched( 43 + || dids.to_vec(), 44 + |data| { 45 + let dids: Vec<DidBuf> = data.into_iter().map(|s| s.try_into().unwrap()).collect(); 46 + dids 47 + }, 48 + BatchSize::SmallInput, 49 + ); 50 + }); 51 + 52 + group.finish(); 53 + } 54 + 55 + criterion_group!(benches, parse_did); 56 + criterion_main!(benches);
+170
crates/gordian-types/src/did.rs
··· 136 136 } 137 137 138 138 fn validate_did(did: &str) -> Result<(), Error> { 139 + // Optimized validation path for PLC DIDs. 140 + if validate_did_plc(did) { 141 + return Ok(()); 142 + } 143 + 139 144 let mut parts = did.splitn(3, ':'); 140 145 match (parts.next(), parts.next(), parts.next()) { 141 146 (Some("did"), Some(""), _) => Err(Error::EmptyMethod), ··· 176 171 } 177 172 178 173 // @TODO Validate percent encoding in ident. 174 + } 175 + 176 + fn validate_did_plc(did: &str) -> bool { 177 + if did.len() == 32 { 178 + // x86_64 specific impl for stable rust. 179 + #[cfg(target_arch = "x86_64")] 180 + #[cfg(not(feature = "nightly"))] 181 + if is_x86_feature_detected!("avx2") { 182 + // SAFETY: AVX2 support has been detected and the DID candidate is 32 bytes 183 + // long. 184 + return unsafe { validate_x86_64::validate_did_plc(did) }; 185 + } 186 + 187 + #[cfg(feature = "nightly")] 188 + return validate_simd::validate_did_plc(did); 189 + } 190 + 191 + let Some(ident) = did.strip_prefix("did:plc:") else { 192 + return false; 193 + }; 194 + 195 + ident 196 + .as_bytes() 197 + .iter() 198 + .all(|val: &u8| (b'2'..=b'7').contains(val) || val.is_ascii_lowercase()) 199 + } 200 + 201 + #[cfg(any(feature = "nightly", target_arch = "x86_64"))] 202 + #[repr(align(32))] 203 + #[derive(Clone, Copy)] 204 + struct Aligned32([u8; 32]); 205 + 206 + #[cfg(any(feature = "nightly", target_arch = "x86_64"))] 207 + impl Aligned32 { 208 + const fn new(bytes: &'static [u8; 32]) -> Self { 209 + Self(*bytes) 210 + } 211 + } 212 + 213 + #[cfg(any(feature = "nightly", target_arch = "x86_64"))] 214 + const PLC_ALPHA_MIN: Aligned32 = Aligned32::new(b"did:plc:aaaaaaaaaaaaaaaaaaaaaaaa"); 215 + #[cfg(any(feature = "nightly", target_arch = "x86_64"))] 216 + const PLC_ALPHA_MAX: Aligned32 = Aligned32::new(b"did:plc:zzzzzzzzzzzzzzzzzzzzzzzz"); 217 + #[cfg(any(feature = "nightly", target_arch = "x86_64"))] 218 + const PLC_NUM_MIN: Aligned32 = Aligned32::new(b"did:plc:222222222222222222222222"); 219 + #[cfg(any(feature = "nightly", target_arch = "x86_64"))] 220 + const PLC_NUM_MAX: Aligned32 = Aligned32::new(b"did:plc:777777777777777777777777"); 221 + 222 + #[cfg(feature = "nightly")] 223 + mod validate_simd { 224 + //! Portable simd impl. Requires a nightly compiler. 225 + 226 + use std::ops::BitAnd; 227 + use std::ops::BitOr; 228 + use std::simd::prelude::*; 229 + 230 + use super::PLC_ALPHA_MAX; 231 + use super::PLC_ALPHA_MIN; 232 + use super::PLC_NUM_MAX; 233 + use super::PLC_NUM_MIN; 234 + 235 + impl super::Aligned32 { 236 + #[inline] 237 + const fn as_u8x32(self) -> u8x32 { 238 + u8x32::from_array(self.0) 239 + } 240 + } 241 + 242 + /// SIMD acclerated DID PLC validation. 243 + pub fn validate_did_plc(did: &str) -> bool { 244 + debug_assert_eq!(did.len(), 32); 245 + 246 + // Use the faster avx2 impl when available. 247 + #[cfg(target_arch = "x86_64")] 248 + if is_x86_feature_detected!("avx2") { 249 + // SAFETY: AVX2 support has been detected. 250 + return unsafe { validate_did_plc_avx2(did) }; 251 + } 252 + 253 + let data = u8x32::from_slice(did.as_bytes()); 254 + data.simd_ge(const { PLC_ALPHA_MIN.as_u8x32() }) 255 + .bitand(data.simd_le(const { PLC_ALPHA_MAX.as_u8x32() })) 256 + .bitor( 257 + data.simd_ge(const { PLC_NUM_MIN.as_u8x32() }) 258 + .bitand(data.simd_le(const { PLC_NUM_MAX.as_u8x32() })), 259 + ) 260 + .all() 261 + } 262 + 263 + // AVX2 extensions are not enabled by the default x86_64 profile. Setting the 264 + // avx2 target feature forces the compilier to generate avx2 instructions 265 + // for this function only. 266 + #[cfg(target_arch = "x86_64")] 267 + #[target_feature(enable = "avx2")] 268 + fn validate_did_plc_avx2(did: &str) -> bool { 269 + let data = u8x32::from_slice(did.as_bytes()); 270 + data.simd_ge(const { PLC_ALPHA_MIN.as_u8x32() }) 271 + .bitand(data.simd_le(const { PLC_ALPHA_MAX.as_u8x32() })) 272 + .bitor( 273 + data.simd_ge(const { PLC_NUM_MIN.as_u8x32() }) 274 + .bitand(data.simd_le(const { PLC_NUM_MAX.as_u8x32() })), 275 + ) 276 + .all() 277 + } 278 + } 279 + 280 + #[cfg(not(feature = "nightly"))] 281 + #[cfg(target_arch = "x86_64")] 282 + mod validate_x86_64 { 283 + //! x86_64-specific impl. This is usable with stable rust. Remove when 284 + //! `std::simd` is stablized. 285 + 286 + use std::arch::x86_64::__m256i; 287 + 288 + use super::PLC_ALPHA_MAX; 289 + use super::PLC_ALPHA_MIN; 290 + use super::PLC_NUM_MAX; 291 + use super::PLC_NUM_MIN; 292 + 293 + impl super::Aligned32 { 294 + #[inline] 295 + pub fn as_mm256i(self) -> __m256i { 296 + unsafe { 297 + // SAFETY self.0 is aligned to 32 bytes. 298 + std::arch::x86_64::_mm256_load_si256(self.0.as_ptr().cast()) 299 + } 300 + } 301 + } 302 + 303 + #[target_feature(enable = "avx2")] 304 + pub fn validate_did_plc(did: &str) -> bool { 305 + use std::arch::x86_64; 306 + 307 + debug_assert_eq!(did.len(), 32); 308 + 309 + let data = unsafe { 310 + // SAFETY: did_plc is 32 bytes (256 bits) long. 311 + x86_64::_mm256_loadu_si256(did.as_ptr().cast()) 312 + }; 313 + 314 + let mask = x86_64::_mm256_movemask_epi8(x86_64::_mm256_or_si256( 315 + x86_64::_mm256_and_si256( 316 + x86_64::_mm256_cmpeq_epi8( 317 + x86_64::_mm256_min_epu8(data, PLC_ALPHA_MIN.as_mm256i()), 318 + PLC_ALPHA_MIN.as_mm256i(), 319 + ), 320 + x86_64::_mm256_cmpeq_epi8( 321 + x86_64::_mm256_max_epu8(data, PLC_ALPHA_MAX.as_mm256i()), 322 + PLC_ALPHA_MAX.as_mm256i(), 323 + ), 324 + ), 325 + x86_64::_mm256_and_si256( 326 + x86_64::_mm256_cmpeq_epi8( 327 + x86_64::_mm256_min_epu8(data, PLC_NUM_MIN.as_mm256i()), 328 + PLC_NUM_MIN.as_mm256i(), 329 + ), 330 + x86_64::_mm256_cmpeq_epi8( 331 + x86_64::_mm256_max_epu8(data, PLC_NUM_MAX.as_mm256i()), 332 + PLC_NUM_MAX.as_mm256i(), 333 + ), 334 + ), 335 + )); 336 + 337 + mask == !0 338 + } 179 339 } 180 340 181 341 /// Length in bytes an [`DidBuf`] can be before being allocated on the heap.
+3 -1
crates/gordian-types/src/lib.rs
··· 1 - //! 1 + #![cfg_attr(feature = "nightly", feature(portable_simd))] 2 + 2 3 //! Primitive types in the atmosphere. 4 + 3 5 #[macro_use] 4 6 mod macros; 5 7