A public mirror for the whole atmosphere hubble.microcosm.blue
27
fork

Configure Feed

Select the types of activity you want to include in your feed.

more dictionary size stuff

phil 92687e0c 2b78afd9

+201 -15
+18
space-efficiency-check/examples/sweep.rs
··· 220 220 }); 221 221 } 222 222 223 + // ── Large-block dictionary sweep ─────────────────────────── 224 + // Test larger dictionaries at larger block sizes, with absolute training targets 225 + for &block_kb in &[16, 32, 64] { 226 + for &dict_kb in &[16, 64, 256] { 227 + let dict = dict_kb * 1024; 228 + for &train_kb in &[256, 1024] { 229 + let mult = (train_kb / dict_kb) as i32; 230 + if mult < 1 { continue; } // training must be >= dict size 231 + configs.push(SweepConfig { 232 + block_size: block_kb * 1024, 233 + dict_bytes: dict as i32, 234 + train_mult: Some(mult), 235 + ..base 236 + }); 237 + } 238 + } 239 + } 240 + 223 241 // ── Subcompactions ───────────────────────────────────────── 224 242 for &sub in &[1, 4, 10] { 225 243 configs.push(SweepConfig {
+168 -15
space-efficiency-check/index.html
··· 58 58 59 59 <div class="chart-row"> 60 60 <div class="chart-container"> 61 - <h2>dictionary training vs 4k, 8k blocks <label class="zero-toggle"><input type="checkbox" data-chart="chart-dict"> y from 0</label></h2> 61 + <h2>dictionary training by block size <label class="zero-toggle"><input type="checkbox" data-chart="chart-dict"> y from 0</label></h2> 62 62 <div id="chart-dict"></div> 63 + </div> 64 + <div class="chart-container"> 65 + <h2>dictionary benefit vs block size <label class="zero-toggle"><input type="checkbox" data-chart="chart-dict-benefit"> y from 0</label></h2> 66 + <div id="chart-dict-benefit"></div> 67 + </div> 68 + </div> 69 + 70 + <div class="chart-row"> 71 + <div class="chart-container"> 72 + <h2>dictionary: size reduction vs compaction time <label class="zero-toggle"><input type="checkbox" data-chart="chart-dict-cost"> y from 0</label></h2> 73 + <div id="chart-dict-cost"></div> 63 74 </div> 64 75 <div class="chart-container"> 65 76 <h2>key restart interval <label class="zero-toggle"><input type="checkbox" data-chart="chart-restart"> y from 0</label></h2> ··· 242 253 131072,6,8,16384,16,true,10,17165045658,16369,56.9 243 254 131072,9,8,0,,true,10,16940090482,16155,76.5 244 255 131072,9,8,16384,16,true,10,16940006516,16155,76.7 256 + 16384,3,8,16384,64,true,10,18575595180,17715,46.8 257 + 16384,3,8,65536,4,true,10,18720555347,17853,49.6 258 + 16384,3,8,65536,16,true,10,18636700580,17773,52.1 259 + 16384,3,8,262144,1,true,10,20116140943,19184,62.1 260 + 16384,3,8,262144,4,true,10,18718065045,17850,62.1 261 + 32768,3,8,16384,64,true,10,18037187627,17201,55.5 262 + 32768,3,8,65536,4,true,10,18160440220,17319,61.1 263 + 32768,3,8,65536,16,true,10,18108588895,17269,56.7 264 + 32768,3,8,262144,1,true,10,18702653174,17836,59.6 265 + 32768,3,8,262144,4,true,10,18174889606,17332,68.0 266 + 65536,3,8,16384,64,true,10,17666766140,16848,48.3 267 + 65536,3,8,65536,4,true,10,17676764811,16857,27.2 268 + 65536,3,8,65536,16,true,10,17718370930,16897,51.0 269 + 65536,3,8,262144,1,true,10,17676607265,16857,27.5 270 + 65536,3,8,262144,4,true,10,17806456807,16981,65.2 245 271 `; 246 272 247 273 // ── parse ───────────────────────────────────────────────────────── ··· 477 503 // ── chart 3: dictionary training sweep (with block size slider) ─── 478 504 function chartDict() { 479 505 const traces = []; 480 - const traceBlock = []; // which block size each trace belongs to 481 - const colors = { 'None': '#45b7d1', '4': '#4ecdc4', '16': '#f7b731', '64': '#fc5c65' }; 482 - const blockSizes = [4, 8]; 506 + const traceBlock = []; 507 + const trainColors = { 'None': '#45b7d1', '1': '#888', '4': '#4ecdc4', '8': '#26de81', 508 + '16': '#f7b731', '32': '#fd9644', '64': '#fc5c65' }; 509 + 510 + // find all block sizes that have dict data 511 + const blockSizes = [...new Set( 512 + data.filter(d => d.dict_bytes > 0 && d.zstd_level === 3 && d.restart_interval === 8) 513 + .map(d => d.block_size / 1024) 514 + )].sort((a, b) => a - b); 515 + 516 + // collect all dict sizes across all block sizes for consistent x-axis 517 + const allDictSizes = [...new Set( 518 + data.filter(d => d.dict_bytes > 0 && d.zstd_level === 3 && d.restart_interval === 8) 519 + .map(d => d.dict_bytes) 520 + )].sort((a, b) => a - b); 521 + const xLabels = allDictSizes.map(d => d >= 1024 ? (d / 1024) + 'k' : d + ''); 483 522 484 523 for (const blockKb of blockSizes) { 485 524 const bs = blockKb * 1024; ··· 495 534 rows.sort((a, b) => a.dict_bytes - b.dict_bytes); 496 535 const label = tm === 'None' ? 'default' : tm + 'x'; 497 536 traces.push({ 498 - x: rows.map(d => d.dict_bytes / 1024 + 'k'), 537 + x: rows.map(d => d.dict_bytes >= 1024 ? (d.dict_bytes / 1024) + 'k' : d.dict_bytes + ''), 499 538 y: rows.map(gib), 500 - name: `${blockKb}k block, train=${label}`, 539 + name: `${blockKb}k, train=${label}`, 501 540 mode: 'lines+markers', 502 - line: { color: colors[tm] || '#888', dash: blockKb === 4 ? 'solid' : 'dot', width: 2.5 }, 541 + line: { color: trainColors[tm] || '#888', width: 2.5 }, 503 542 marker: { size: 7 }, 504 543 hovertemplate: `${blockKb}k block, dict=%{x}, train=${label}<br>%{y:.2f} GiB<extra></extra>`, 505 544 }); 506 545 traceBlock.push(blockKb); 507 546 } 508 547 509 - // add no-dict baseline 548 + // no-dict baseline 510 549 const baseline = data.find(d => 511 550 d.block_size === bs && d.dict_bytes === 0 && d.zstd_level === 3 && 512 551 d.restart_interval === 8 && d.opt_filters && d.subcompactions === 10 513 552 ); 514 553 if (baseline) { 515 554 traces.push({ 516 - x: ['1k', '4k', '16k', '64k'], 517 - y: Array(4).fill(gib(baseline)), 518 - name: `${blockKb}k block, no dict`, 555 + x: xLabels, 556 + y: Array(xLabels.length).fill(gib(baseline)), 557 + name: `${blockKb}k, no dict`, 519 558 mode: 'lines', 520 - line: { color: '#ccc', dash: blockKb === 4 ? 'solid' : 'dot', width: 1 }, 559 + line: { color: '#ccc', width: 1 }, 521 560 hoverinfo: 'skip', 522 561 }); 523 562 traceBlock.push(blockKb); 524 563 } 525 564 } 526 565 527 - // slider steps 528 - const fadedColors = { 'None': '#45b7d130', '4': '#4ecdc430', '16': '#f7b73130', '64': '#fc5c6530' }; 566 + // slider 529 567 const steps = [{ 530 - label: 'both', 568 + label: 'all', 531 569 method: 'restyle', 532 570 args: [{ 533 571 'line.width': traceBlock.map(() => 2.5), ··· 562 600 })); 563 601 } 564 602 603 + // ── chart 3b: dictionary benefit by block size ──────────────────── 604 + function chartDictBenefit() { 605 + // for each block size, find no-dict baseline and best dict config 606 + const blocks = [...new Set(data.map(d => d.block_size))].sort((a, b) => a - b); 607 + const dictSizes = [16384, 65536, 262144]; 608 + const colors = { 16384: '#45b7d1', 65536: '#f7b731', 262144: '#fc5c65' }; 609 + 610 + const traces = []; 611 + 612 + for (const dictBytes of dictSizes) { 613 + const xs = []; 614 + const ys = []; 615 + const texts = []; 616 + 617 + for (const bs of blocks) { 618 + const baseline = data.find(d => 619 + d.block_size === bs && d.dict_bytes === 0 && d.zstd_level === 3 && 620 + d.restart_interval === 8 && d.opt_filters && d.subcompactions === 10 621 + ); 622 + if (!baseline) continue; 623 + 624 + // find best (smallest) result with this dict size at this block size 625 + const dictRows = data.filter(d => 626 + d.block_size === bs && d.dict_bytes === dictBytes && d.zstd_level === 3 && 627 + d.restart_interval === 8 && d.opt_filters && d.subcompactions === 10 628 + ); 629 + if (dictRows.length === 0) continue; 630 + 631 + const best = dictRows.reduce((a, b) => a.size_bytes < b.size_bytes ? a : b); 632 + const pct = ((baseline.size_bytes - best.size_bytes) / baseline.size_bytes * 100); 633 + 634 + xs.push(bs / 1024 + 'k'); 635 + ys.push(+pct.toFixed(2)); 636 + const trainLabel = best.train_mult === null ? 'default' : best.train_mult + 'x'; 637 + texts.push(`train=${trainLabel}`); 638 + } 639 + 640 + if (xs.length > 0) { 641 + const dictLabel = dictBytes >= 1024 ? (dictBytes / 1024) + 'k' : dictBytes + ''; 642 + traces.push({ 643 + x: xs, 644 + y: ys, 645 + name: `dict=${dictLabel}`, 646 + mode: 'lines+markers', 647 + line: { color: colors[dictBytes] || '#888' }, 648 + marker: { size: 8 }, 649 + text: texts, 650 + hovertemplate: `%{x} block, dict=${dictLabel}<br>%{y:.1f}% smaller<br>%{text}<extra></extra>`, 651 + }); 652 + } 653 + } 654 + 655 + Plotly.newPlot('chart-dict-benefit', traces, plotLayout({ 656 + xaxis: { title: 'Block Size', type: 'category' }, 657 + yaxis: { title: 'Size Reduction (%)' }, 658 + })); 659 + } 660 + 661 + // ── chart 3c: dictionary cost (size reduction vs compaction time) ─ 662 + function chartDictCost() { 663 + const blockColors = { 664 + 4096: '#fc5c65', 8192: '#f7b731', 16384: '#45b7d1', 665 + 32768: '#4ecdc4', 65536: '#a55eea', 131072: '#888', 666 + }; 667 + const blocks = [...new Set(data.map(d => d.block_size))].sort((a, b) => a - b); 668 + const traces = []; 669 + 670 + for (const bs of blocks) { 671 + const baseline = data.find(d => 672 + d.block_size === bs && d.dict_bytes === 0 && d.zstd_level === 3 && 673 + d.restart_interval === 8 && d.opt_filters && d.subcompactions === 10 674 + ); 675 + if (!baseline) continue; 676 + 677 + const dictRows = data.filter(d => 678 + d.block_size === bs && d.dict_bytes > 0 && d.zstd_level === 3 && 679 + d.restart_interval === 8 && d.opt_filters && d.subcompactions === 10 680 + ); 681 + if (dictRows.length === 0) continue; 682 + 683 + const bsLabel = (bs / 1024) + 'k'; 684 + traces.push({ 685 + x: dictRows.map(d => (d.compact_secs - baseline.compact_secs) / baseline.compact_secs * 100), 686 + y: dictRows.map(d => (baseline.size_bytes - d.size_bytes) / baseline.size_bytes * 100), 687 + name: bsLabel, 688 + mode: 'markers', 689 + marker: { size: 7, color: blockColors[bs] || '#888' }, 690 + text: dictRows.map(d => { 691 + const dictLabel = d.dict_bytes >= 1024 ? (d.dict_bytes / 1024) + 'k' : d.dict_bytes; 692 + const trainLabel = d.train_mult === null ? 'default' : d.train_mult + 'x'; 693 + return `dict=${dictLabel}, train=${trainLabel}`; 694 + }), 695 + hovertemplate: `${bsLabel} block<br>%{text}<br>%{y:.1f}% smaller, +%{x:.0f}% time<extra></extra>`, 696 + }); 697 + 698 + // add the no-dict origin point 699 + traces.push({ 700 + x: [0], y: [0], 701 + name: bsLabel + ' (no dict)', 702 + mode: 'markers', 703 + marker: { size: 9, color: blockColors[bs], symbol: 'x' }, 704 + showlegend: false, 705 + hovertemplate: `${bsLabel} block, no dict<extra></extra>`, 706 + }); 707 + } 708 + 709 + Plotly.newPlot('chart-dict-cost', traces, plotLayout({ 710 + xaxis: { title: 'Extra Compaction Time (%)' }, 711 + yaxis: { title: 'Size Reduction (%)' }, 712 + legend: { title: { text: 'Block Size' } }, 713 + })); 714 + } 715 + 565 716 // ── chart 4: restart interval ───────────────────────────────────── 566 717 function chartRestart() { 567 718 const traces = []; ··· 761 912 chartBlockSize(); 762 913 chartPareto(); 763 914 chartDict(); 915 + chartDictBenefit(); 916 + chartDictCost(); 764 917 chartRestart(); 765 918 chartSubcomp(); 766 919 setupFilters();
+15
space-efficiency-check/sweep-results.csv
··· 135 135 131072,6,8,16384,16,true,10,17165045658,16369,56.9 136 136 131072,9,8,0,,true,10,16940090482,16155,76.5 137 137 131072,9,8,16384,16,true,10,16940006516,16155,76.7 138 + 16384,3,8,16384,64,true,10,18575595180,17715,46.8 139 + 16384,3,8,65536,4,true,10,18720555347,17853,49.6 140 + 16384,3,8,65536,16,true,10,18636700580,17773,52.1 141 + 16384,3,8,262144,1,true,10,20116140943,19184,62.1 142 + 16384,3,8,262144,4,true,10,18718065045,17850,62.1 143 + 32768,3,8,16384,64,true,10,18037187627,17201,55.5 144 + 32768,3,8,65536,4,true,10,18160440220,17319,61.1 145 + 32768,3,8,65536,16,true,10,18108588895,17269,56.7 146 + 32768,3,8,262144,1,true,10,18702653174,17836,59.6 147 + 32768,3,8,262144,4,true,10,18174889606,17332,68.0 148 + 65536,3,8,16384,64,true,10,17666766140,16848,48.3 149 + 65536,3,8,65536,4,true,10,17676764811,16857,27.2 150 + 65536,3,8,65536,16,true,10,17718370930,16897,51.0 151 + 65536,3,8,262144,1,true,10,17676607265,16857,27.5 152 + 65536,3,8,262144,4,true,10,17806456807,16981,65.2