prep smol-epub for crates.io · pds.dad/pumpkin-os@54972a8

+6 -2

smol-epub/Cargo.toml

··· 1 1 [package] 2 2 name = "smol-epub" 3 + version = "0.1.0" 3 4 edition = "2024" 4 5 rust-version = "1.88" 5 - version = "0.1.0" 6 - description = "Minimal no_std EPUB parser with streaming image decoders" 6 + description = "Minimal no_std EPUB parser with streaming decompression, HTML stripping, and optional 1-bit image decoders" 7 7 license = "MIT OR Apache-2.0" 8 + repository = "https://github.com/pluots/smol-epub" 9 + keywords = ["epub", "no_std", "embedded", "e-ink", "ebook"] 10 + categories = ["no-std", "parser-implementations", "embedded"] 11 + readme = "README.md" 8 12 9 13 [dependencies] 10 14 miniz_oxide = { version = "0.8", default-features = false, features = ["with-alloc"] }

+119

smol-epub/README.md

··· 1 + # smol-epub 2 + 3 + Minimal `no_std` EPUB parser with streaming decompression, HTML stripping, 4 + CSS resolution, and optional 1-bit image decoders. 5 + 6 + Designed for memory-constrained embedded targets (≥ 140 KB heap), but works 7 + anywhere `alloc` is available. 8 + 9 + ## Features 10 + 11 + | Module | Purpose | 12 + |--------|---------| 13 + | `zip` | ZIP central-directory parser, streaming DEFLATE extraction | 14 + | `xml` | Minimal XML tag / attribute scanner (EPUB metadata) | 15 + | `css` | CSS property parser for EPUB stylesheets | 16 + | `epub` | EPUB structure: `container.xml` → OPF → spine / metadata / TOC | 17 + | `html_strip` | Single-pass, streaming HTML-to-styled-text converter | 18 + | `cache` | Chapter decompress-and-strip pipeline with cache metadata | 19 + | `png` | PNG decoder → 1-bit Floyd–Steinberg dithered bitmap *(feature `images`)* | 20 + | `jpeg` | JPEG decoder → 1-bit Floyd–Steinberg dithered bitmap *(feature `images`)* | 21 + 22 + ## Feature flags 23 + 24 + | Flag | Default | Description | 25 + |------|---------|-------------| 26 + | `images` | ✓ | Enable `png` and `jpeg` image decoders | 27 + 28 + ## Quick start 29 + 30 + ```rust 31 + use smol_epub::zip::{self, ZipIndex}; 32 + use smol_epub::epub::{self, EpubMeta, EpubSpine, EpubToc}; 33 + 34 + // 1. Build ZIP index from the EPUB file's central directory 35 + let mut zip = ZipIndex::new(); 36 + let (cd_offset, cd_size) = ZipIndex::parse_eocd(&tail_buf, file_size)?; 37 + // ... read the central directory bytes into `cd_buf` ... 38 + zip.parse_central_directory(&cd_buf)?; 39 + 40 + // 2. Parse EPUB structure 41 + let container = zip::extract_entry( 42 + zip.entry(zip.find("META-INF/container.xml").unwrap()), 43 + zip.entry(zip.find("META-INF/container.xml").unwrap()).local_offset, 44 + |off, buf| read_fn(off, buf), 45 + )?; 46 + let mut opf_path = [0u8; epub::OPF_PATH_CAP]; 47 + let opf_len = epub::parse_container(&container, &mut opf_path)?; 48 + 49 + // 3. Extract metadata and reading-order spine 50 + let mut meta = EpubMeta::new(); 51 + let mut spine = EpubSpine::new(); 52 + epub::parse_opf(&opf_data, opf_dir, &zip, &mut meta, &mut spine)?; 53 + println!("{} by {}", meta.title_str(), meta.author_str()); 54 + 55 + // 4. Optionally parse the table of contents 56 + let mut toc = EpubToc::new(); 57 + if let Some(src) = epub::find_toc_source(&opf_data, opf_dir, &zip) { 58 + epub::parse_toc(src, &toc_data, toc_dir, &spine, &zip, &mut toc); 59 + } 60 + 61 + // 5. Stream-decompress + HTML-strip a chapter 62 + let bytes_written = smol_epub::cache::stream_strip_entry( 63 + &entry, local_offset, 64 + |off, buf| read_fn(off, buf), // read closure 65 + |chunk| { output.extend(chunk); Ok(()) }, // output closure 66 + )?; 67 + ``` 68 + 69 + ## Streaming I/O model 70 + 71 + All functions that read from an external byte source accept a generic 72 + closure: 73 + 74 + ```rust 75 + FnMut(offset: u32, buf: &mut [u8]) -> Result<usize, E> 76 + ``` 77 + 78 + This works with SD cards, flash memory, `std::fs::File`, in-memory buffers, 79 + or any other random-access byte store — the crate never assumes a specific 80 + storage backend. 81 + 82 + ## Image decoders 83 + 84 + The `png` and `jpeg` modules decode images to 1-bit monochrome bitmaps 85 + using Floyd–Steinberg dithering, ideal for e-ink displays. Three decoder 86 + variants are provided for each format: 87 + 88 + | Function | Input | 89 + |----------|-------| 90 + | `decode_{png,jpeg}_fit` | In-memory `&[u8]` buffer | 91 + | `decode_{png,jpeg}_streaming` | Stored (uncompressed) ZIP entry via read closure | 92 + | `decode_{png,jpeg}_deflate_streaming` | DEFLATE-compressed ZIP entry via read closure | 93 + 94 + All variants accept `max_w` / `max_h` parameters and integer-downscale 95 + the image to fit. 96 + 97 + ## Memory budget 98 + 99 + Typical peak heap usage on an embedded target: 100 + 101 + | Operation | Peak heap | 102 + |-----------|-----------| 103 + | ZIP index parse | ~5 KB | 104 + | Chapter stream-strip (DEFLATE) | ~51 KB | 105 + | PNG streaming decode | ~90 KB | 106 + | JPEG streaming decode | ~30 KB | 107 + | JPEG DEFLATE streaming decode | ~79 KB | 108 + 109 + Stack usage is kept low throughout; large structs like `DecompressorOxide` 110 + (~11 KB) are always heap-allocated via `Box`. 111 + 112 + ## License 113 + 114 + Licensed under either of 115 + 116 + - [MIT license](http://opensource.org/licenses/MIT) 117 + - [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) 118 + 119 + at your option.

+33 -9

smol-epub/src/cache.rs

··· 1 - // EPUB chapter cache: streaming decompress + HTML strip to SD. 2 - // No persistent heap; ~51KB temp per chapter. Cache dir: _XXXXXXX/ 3 - // with META.BIN + CHnnn.TXT files. 1 + //! EPUB chapter cache: streaming decompress + HTML strip pipeline. 2 + //! 3 + //! No persistent heap; ≈ 51 KB temporary per chapter. 4 + //! Cache directory layout uses 8.3-safe names: `_XXXXXXX/` with 5 + //! `META.BIN` + `CHnnn.TXT` files. 4 6 5 7 use alloc::boxed::Box; 6 8 use alloc::vec::Vec; ··· 12 14 const CACHE_VERSION: u8 = 1; 13 15 const META_HEADER: usize = 16; 14 16 17 + /// Maximum number of chapters that can be tracked in a single cache. 15 18 pub const MAX_CACHE_CHAPTERS: usize = 256; 19 + /// Maximum byte size of a `META.BIN` file (header + one `u32` per chapter). 16 20 pub const META_MAX_SIZE: usize = META_HEADER + 4 * MAX_CACHE_CHAPTERS; 17 21 18 22 const WINDOW_SIZE: usize = 32768; // DEFLATE sliding window ··· 20 24 const STRIP_BUF_SIZE: usize = 4096; // strip output accumulator 21 25 const FLUSH_THRESHOLD: usize = STRIP_BUF_SIZE - 128; 22 26 27 + /// Compute the FNV-1a hash of `data`. 23 28 #[inline] 24 29 pub fn fnv1a(data: &[u8]) -> u32 { 25 30 let mut h: u32 = 0x811c_9dc5; ··· 30 35 h 31 36 } 32 37 33 - // 8.3 cache dir name: '_' + 7 hex digits of lower 28 bits of hash 38 + /// Generate an 8.3-safe cache directory name from a hash. 39 + /// 40 + /// Format: `_` followed by 7 uppercase hex digits of the lower 28 bits. 34 41 pub fn dir_name_for_hash(name_hash: u32) -> [u8; 8] { 35 42 let h = name_hash & 0x0FFF_FFFF; 36 43 let mut buf = [0u8; 8]; ··· 46 53 buf 47 54 } 48 55 56 + /// Interpret an 8-byte directory name buffer as a UTF-8 `&str`. 49 57 #[inline] 50 58 pub fn dir_name_str(buf: &[u8; 8]) -> &str { 51 59 core::str::from_utf8(buf).unwrap_or("_0000000") 52 60 } 53 61 54 - // 8.3 chapter filename: CH000.TXT to CH255.TXT 62 + /// Generate an 8.3-safe chapter filename: `CH000.TXT` through `CH255.TXT`. 55 63 pub fn chapter_file_name(idx: u16) -> [u8; 9] { 56 64 debug_assert!(idx < 1000, "chapter index out of 3-digit range"); 57 65 let mut n = *b"CH000.TXT"; ··· 61 69 n 62 70 } 63 71 72 + /// Interpret a 9-byte chapter filename buffer as a UTF-8 `&str`. 64 73 #[inline] 65 74 pub fn chapter_file_str(buf: &[u8; 9]) -> &str { 66 75 core::str::from_utf8(buf).unwrap_or("CH000.TXT") 67 76 } 68 77 78 + /// Filename used for the cache metadata file. 69 79 pub const META_FILE: &str = "META.BIN"; 70 80 71 - // encode cache metadata into buf; return bytes written 81 + /// Encode cache metadata into `buf`; returns the number of bytes written. 82 + /// 83 + /// The metadata header stores a magic value, version, the EPUB file size, 84 + /// a name hash, and a `u32` size for each cached chapter. 72 85 pub fn encode_cache_meta( 73 86 epub_size: u32, 74 87 name_hash: u32, ··· 100 113 total 101 114 } 102 115 103 - // parse and validate META.BIN; write chapter sizes into out slice 116 + /// Parse and validate a `META.BIN` blob. 117 + /// 118 + /// On success, writes individual chapter sizes into `chapter_sizes_out` 119 + /// and returns the number of chapters. Returns an error if the magic, 120 + /// version, EPUB size, name hash, or chapter count do not match. 104 121 pub fn parse_cache_meta( 105 122 data: &[u8], 106 123 epub_size: u32, ··· 154 171 Ok(count) 155 172 } 156 173 157 - // stream-decompress ZIP entry, strip HTML, emit plain-text chunks; ~47KB temp 174 + /// Stream-decompress a ZIP entry, strip HTML, and emit plain-text chunks. 175 + /// 176 + /// `read_fn(offset, buf)` reads raw bytes from the underlying store. 177 + /// `output_fn(chunk)` receives stripped plain-text output incrementally. 178 + /// 179 + /// Returns the total number of bytes written through `output_fn`. 180 + /// Peak temporary memory ≈ 47 KB (decompressor + sliding window + strip 181 + /// buffers). 158 182 pub fn stream_strip_entry<E>( 159 183 entry: &ZipEntry, 160 184 local_offset: u32, ··· 289 313 Ok(_) => { 290 314 comp_left = 0; 291 315 } 292 - Err(_) => return Err("cache: SD read failed during deflate"), 316 + Err(_) => return Err("cache: read failed during deflate"), 293 317 } 294 318 } 295 319

+65 -14

smol-epub/src/css.rs

··· 1 - // Minimal CSS parser for EPUB stylesheets. 2 - // Selectors: tag, .class, tag.class, grouped. Combinators reduced to 3 - // rightmost simple selector. @-rules and pseudo-classes skipped. 4 - // Rule table stack-allocated: MAX_CSS_RULES x ~16B = 2KB. 1 + //! Minimal CSS parser for EPUB stylesheets. 2 + //! 3 + //! Selectors: tag, `.class`, `tag.class`, grouped. Combinators are 4 + //! reduced to the rightmost simple selector. `@`-rules and 5 + //! pseudo-classes are skipped. 6 + //! 7 + //! Rule table is stack-allocated: `MAX_CSS_RULES` × ~16 B = 2 KB. 5 8 9 + /// Maximum number of CSS rules the parser will store. 6 10 pub const MAX_CSS_RULES: usize = 128; 7 11 8 - // property flag bits (which fields in StyleProps are explicitly set) 12 + // ── property flag bits (which fields in StyleProps are explicitly set) ── 9 13 14 + /// Flag: `font-weight` is explicitly set. 10 15 pub const PROP_FONT_WEIGHT: u16 = 1 << 0; 16 + /// Flag: `font-style` is explicitly set. 11 17 pub const PROP_FONT_STYLE: u16 = 1 << 1; 18 + /// Flag: `text-align` is explicitly set. 12 19 pub const PROP_TEXT_ALIGN: u16 = 1 << 2; 20 + /// Flag: `text-indent` is explicitly set. 13 21 pub const PROP_TEXT_INDENT: u16 = 1 << 3; 22 + /// Flag: `margin-left` is explicitly set. 14 23 pub const PROP_MARGIN_LEFT: u16 = 1 << 4; 24 + /// Flag: `margin-right` is explicitly set. 15 25 pub const PROP_MARGIN_RIGHT: u16 = 1 << 5; 26 + /// Flag: `margin-top` is explicitly set. 16 27 pub const PROP_MARGIN_TOP: u16 = 1 << 6; 28 + /// Flag: `margin-bottom` is explicitly set. 17 29 pub const PROP_MARGIN_BOTTOM: u16 = 1 << 7; 30 + /// Flag: `display` is explicitly set. 18 31 pub const PROP_DISPLAY: u16 = 1 << 8; 32 + /// Flag: `text-decoration` is explicitly set. 19 33 pub const PROP_TEXT_DECORATION: u16 = 1 << 9; 20 34 21 - // property value constants 35 + // ── property value constants ──────────────────────────────────────── 22 36 23 - // font-weight 37 + /// `font-weight: normal`. 24 38 pub const FW_NORMAL: u8 = 0; 39 + /// `font-weight: bold`. 25 40 pub const FW_BOLD: u8 = 1; 26 41 27 - // font-style 42 + /// `font-style: normal`. 28 43 pub const FS_NORMAL: u8 = 0; 44 + /// `font-style: italic`. 29 45 pub const FS_ITALIC: u8 = 1; 30 46 31 - // text-align 47 + /// `text-align: left`. 32 48 pub const TA_LEFT: u8 = 0; 49 + /// `text-align: center`. 33 50 pub const TA_CENTER: u8 = 1; 51 + /// `text-align: right`. 34 52 pub const TA_RIGHT: u8 = 2; 53 + /// `text-align: justify`. 35 54 pub const TA_JUSTIFY: u8 = 3; 36 55 37 - // display 56 + /// `display` not explicitly set (inherit / default). 38 57 pub const DISP_DEFAULT: u8 = 0; 58 + /// `display: none`. 39 59 pub const DISP_NONE: u8 = 1; 60 + /// `display: block`. 40 61 pub const DISP_BLOCK: u8 = 2; 62 + /// `display: inline`. 41 63 pub const DISP_INLINE: u8 = 3; 42 64 43 - // text-decoration (bitmask) 65 + /// `text-decoration: none`. 44 66 pub const TD_NONE: u8 = 0; 67 + /// `text-decoration: underline`. 45 68 pub const TD_UNDERLINE: u8 = 1; 69 + /// `text-decoration: line-through`. 46 70 pub const TD_LINE_THROUGH: u8 = 2; 47 71 48 - // resolved CSS properties; `set` tracks which are explicitly specified. 49 - // Lengths in quarter-em units (i8): 1em = 4, 0.5em = 2, 2em = 8. 50 - 72 + /// Resolved CSS properties for a single element. 73 + /// 74 + /// The `set` bitmask tracks which fields have been explicitly specified 75 + /// by a stylesheet rule. Lengths are stored in **quarter-em** units 76 + /// (`i8`): 1 em = 4, 0.5 em = 2, 2 em = 8. 51 77 #[derive(Clone, Copy)] 52 78 pub struct StyleProps { 79 + /// Bitmask of `PROP_*` flags indicating which fields are set. 53 80 pub set: u16, 81 + /// `font-weight` — see [`FW_NORMAL`], [`FW_BOLD`]. 54 82 pub font_weight: u8, 83 + /// `font-style` — see [`FS_NORMAL`], [`FS_ITALIC`]. 55 84 pub font_style: u8, 85 + /// `text-align` — see [`TA_LEFT`], [`TA_CENTER`], etc. 56 86 pub text_align: u8, 87 + /// `text-indent` in quarter-em units. 57 88 pub text_indent: i8, 89 + /// `margin-left` in quarter-em units. 58 90 pub margin_left: i8, 91 + /// `margin-right` in quarter-em units. 59 92 pub margin_right: i8, 93 + /// `margin-top` in quarter-em units. 60 94 pub margin_top: i8, 95 + /// `margin-bottom` in quarter-em units. 61 96 pub margin_bottom: i8, 97 + /// `display` — see [`DISP_DEFAULT`], [`DISP_NONE`], etc. 62 98 pub display: u8, 99 + /// `text-decoration` bitmask — see [`TD_NONE`], [`TD_UNDERLINE`], etc. 63 100 pub text_decoration: u8, 64 101 } 65 102 66 103 impl StyleProps { 104 + /// A `StyleProps` with no fields set and all values at their defaults. 67 105 pub const EMPTY: Self = Self { 68 106 set: 0, 69 107 font_weight: FW_NORMAL, ··· 102 140 } 103 141 104 142 #[inline] 143 + /// Returns `true` if `font-weight` is set to bold. 105 144 pub fn is_bold(&self) -> bool { 106 145 self.set & PROP_FONT_WEIGHT != 0 && self.font_weight == FW_BOLD 107 146 } 108 147 109 148 #[inline] 149 + /// Returns `true` if `font-style` is set to italic. 110 150 pub fn is_italic(&self) -> bool { 111 151 self.set & PROP_FONT_STYLE != 0 && self.font_style == FS_ITALIC 112 152 } 113 153 114 154 #[inline] 155 + /// Returns `true` if `display` is set to `none`. 115 156 pub fn is_hidden(&self) -> bool { 116 157 self.set & PROP_DISPLAY != 0 && self.display == DISP_NONE 117 158 } ··· 156 197 } 157 198 158 199 // parsed CSS rule table, stack-allocated (~2KB) 200 + /// Parsed CSS rule table (stack-allocated, up to [`MAX_CSS_RULES`] entries). 159 201 pub struct CssRules { 160 202 rules: [CssRule; MAX_CSS_RULES], 161 203 count: usize, ··· 168 210 } 169 211 170 212 impl CssRules { 213 + /// Create an empty rule table. 171 214 pub const fn new() -> Self { 172 215 Self { 173 216 rules: [CssRule::EMPTY; MAX_CSS_RULES], ··· 175 218 } 176 219 } 177 220 221 + /// Remove all parsed rules. 178 222 pub fn clear(&mut self) { 179 223 self.count = 0; 180 224 } 181 225 182 226 #[inline] 227 + /// Number of rules currently stored. 183 228 pub fn len(&self) -> usize { 184 229 self.count 185 230 } 186 231 187 232 #[inline] 233 + /// Returns `true` if no rules have been parsed. 188 234 pub fn is_empty(&self) -> bool { 189 235 self.count == 0 190 236 } 191 237 192 238 // parse stylesheet; may be called multiple times to accumulate rules 239 + /// Parse a CSS stylesheet and append rules to the table. 193 240 pub fn parse(&mut self, css: &[u8]) { 194 241 let mut pos: usize = 0; 195 242 ··· 241 288 } 242 289 243 290 // resolve effective style for tag + class; merged by specificity 291 + /// Resolve the effective style for an element given its tag and class names. 244 292 pub fn resolve(&self, tag_name: &[u8], class_name: &[u8]) -> StyleProps { 245 293 let tid = tag_id(tag_name); 246 294 let chash = if class_name.is_empty() { ··· 262 310 } 263 311 264 312 // resolve by pre-computed tag ID and class hash 313 + /// Resolve the effective style using precomputed tag-id and class-hash. 265 314 pub fn resolve_by_id(&self, tid: u8, chash: u16) -> StyleProps { 266 315 let mut result = StyleProps::EMPTY; 267 316 let mut best = [0u8; 16]; ··· 540 589 // tag ID mapping: lowercase tag name -> compact u8 for selector matching. 541 590 // 0 = unknown/any; known tags get stable IDs. 542 591 592 + /// Map an HTML tag name to a compact numeric id used by [`CssRules::resolve_by_id`]. 543 593 pub fn tag_id(name: &[u8]) -> u8 { 544 594 match name { 545 595 b"p" => 1, ··· 590 640 // class hash: FNV-1a folded to 16 bits. 591 641 // 0 reserved for "no class constraint"; hash of 0 is mapped to 1. 592 642 643 + /// Compute a 16-bit hash of a CSS class name for [`CssRules::resolve_by_id`]. 593 644 pub fn class_hash(name: &[u8]) -> u16 { 594 645 let mut h: u32 = 0x811c_9dc5; 595 646 for &b in name {

+74 -17

smol-epub/src/epub.rs

··· 1 - // EPUB structure parser: container.xml -> OPF -> spine + metadata. 2 - // container.xml gives the OPF path; the OPF gives metadata, a 3 - // manifest (id->href), and a spine (ordered idrefs). Spine idrefs 4 - // are resolved through the manifest to ZIP entry indices. 1 + //! EPUB structure parser: `container.xml` → OPF → spine + metadata. 2 + //! 3 + //! `container.xml` gives the OPF path; the OPF gives metadata, a 4 + //! manifest (`id` → `href`), and a spine (ordered `idref`s). Spine 5 + //! references are resolved through the manifest to ZIP entry indices. 5 6 6 7 use alloc::vec::Vec; 7 8 8 9 use crate::xml; 9 10 use crate::zip::ZipIndex; 10 11 12 + /// Maximum byte length of an EPUB title. 11 13 pub const TITLE_CAP: usize = 96; 14 + /// Maximum byte length of an EPUB author name. 12 15 pub const AUTHOR_CAP: usize = 64; 16 + /// Maximum number of spine entries (reading-order items). 13 17 pub const MAX_SPINE: usize = 256; 18 + /// Maximum byte length of the OPF file path inside the ZIP. 14 19 pub const OPF_PATH_CAP: usize = 256; 15 20 21 + /// EPUB book metadata (title and author), stored inline with fixed-size buffers. 16 22 pub struct EpubMeta { 23 + /// Raw UTF-8 bytes of the title (up to [`TITLE_CAP`] bytes). 17 24 pub title: [u8; TITLE_CAP], 25 + /// Number of valid bytes in [`title`](Self::title). 18 26 pub title_len: u8, 27 + /// Raw UTF-8 bytes of the author name (up to [`AUTHOR_CAP`] bytes). 19 28 pub author: [u8; AUTHOR_CAP], 29 + /// Number of valid bytes in [`author`](Self::author). 20 30 pub author_len: u8, 21 31 } 22 32 ··· 27 37 } 28 38 29 39 impl EpubMeta { 40 + /// Create a new, empty `EpubMeta`. 30 41 pub const fn new() -> Self { 31 42 Self { 32 43 title: [0u8; TITLE_CAP], ··· 36 47 } 37 48 } 38 49 50 + /// Return the title as a `&str`, or `""` if it is not valid UTF-8. 39 51 pub fn title_str(&self) -> &str { 40 52 core::str::from_utf8(&self.title[..self.title_len as usize]).unwrap_or("") 41 53 } 42 54 55 + /// Return the author as a `&str`, or `""` if it is not valid UTF-8. 43 56 pub fn author_str(&self) -> &str { 44 57 core::str::from_utf8(&self.author[..self.author_len as usize]).unwrap_or("") 45 58 } ··· 57 70 } 58 71 } 59 72 73 + /// The EPUB reading-order spine: an ordered list of ZIP entry indices. 60 74 pub struct EpubSpine { 75 + /// ZIP entry indices in reading order. 61 76 pub items: [u16; MAX_SPINE], 77 + /// Number of valid entries in [`items`](Self::items). 62 78 pub count: u16, 63 79 } 64 80 ··· 69 85 } 70 86 71 87 impl EpubSpine { 88 + /// Create a new, empty spine. 72 89 pub const fn new() -> Self { 73 90 Self { 74 91 items: [0u16; MAX_SPINE], ··· 77 94 } 78 95 79 96 #[inline] 97 + /// Number of items in the spine. 80 98 pub fn len(&self) -> usize { 81 99 self.count as usize 82 100 } 83 101 84 102 #[inline] 103 + /// Returns `true` if the spine contains no items. 85 104 pub fn is_empty(&self) -> bool { 86 105 self.count == 0 87 106 } 88 107 } 89 108 90 - // table of contents 109 + // ── table of contents ─────────────────────────────────────────────── 91 110 111 + /// Maximum number of entries in the table of contents. 92 112 pub const MAX_TOC: usize = 128; 113 + /// Maximum byte length of a single TOC entry title. 93 114 pub const TOC_TITLE_CAP: usize = 48; 94 115 116 + /// A single entry in the EPUB table of contents. 95 117 #[derive(Clone, Copy)] 96 118 pub struct TocEntry { 119 + /// Raw UTF-8 bytes of the entry title. 97 120 pub title: [u8; TOC_TITLE_CAP], 121 + /// Number of valid bytes in [`title`](Self::title). 98 122 pub title_len: u8, 99 - // index into EpubSpine::items; 0xFFFF = unresolved 123 + /// Index into [`EpubSpine::items`]; `0xFFFF` means unresolved. 100 124 pub spine_idx: u16, 101 125 } 102 126 103 127 impl TocEntry { 128 + /// An empty, unresolved TOC entry. 104 129 pub const EMPTY: Self = Self { 105 130 title: [0u8; TOC_TITLE_CAP], 106 131 title_len: 0, 107 132 spine_idx: 0xFFFF, 108 133 }; 109 134 135 + /// Return the entry title as a `&str`, or `""` if not valid UTF-8. 110 136 pub fn title_str(&self) -> &str { 111 137 core::str::from_utf8(&self.title[..self.title_len as usize]).unwrap_or("") 112 138 } 113 139 } 114 140 141 + /// EPUB table of contents (flat list of [`TocEntry`] items). 115 142 pub struct EpubToc { 143 + /// TOC entries in document order. 116 144 pub entries: [TocEntry; MAX_TOC], 145 + /// Number of valid entries. 117 146 pub count: u16, 118 147 } 119 148 ··· 124 153 } 125 154 126 155 impl EpubToc { 156 + /// Create a new, empty table of contents. 127 157 pub const fn new() -> Self { 128 158 Self { 129 159 entries: [TocEntry::EMPTY; MAX_TOC], ··· 131 161 } 132 162 } 133 163 164 + /// Remove all entries. 134 165 pub fn clear(&mut self) { 135 166 self.count = 0; 136 167 } 137 168 138 169 #[inline] 170 + /// Number of entries in the TOC. 139 171 pub fn len(&self) -> usize { 140 172 self.count as usize 141 173 } 142 174 143 175 #[inline] 176 + /// Returns `true` if the TOC contains no entries. 144 177 pub fn is_empty(&self) -> bool { 145 178 self.count == 0 146 179 } ··· 159 192 } 160 193 } 161 194 162 - // where the TOC data lives inside the EPUB ZIP 195 + /// Identifies where the table-of-contents data lives inside the EPUB ZIP. 163 196 #[derive(Clone, Copy, Debug)] 164 197 pub enum TocSource { 165 - Ncx(usize), // EPUB 2 166 - Nav(usize), // EPUB 3 198 + /// EPUB 2 NCX document (ZIP entry index). 199 + Ncx(usize), 200 + /// EPUB 3 Navigation Document (ZIP entry index). 201 + Nav(usize), 167 202 } 168 203 169 204 impl TocSource { 205 + /// Return the ZIP entry index regardless of variant. 170 206 pub fn zip_index(&self) -> usize { 171 207 match *self { 172 208 TocSource::Ncx(i) | TocSource::Nav(i) => i, ··· 175 211 } 176 212 177 213 // parse container.xml to find the OPF path; write into out 214 + /// Parse `META-INF/container.xml` and extract the OPF file path. 215 + /// 216 + /// Writes the path into `out` and returns its byte length. 178 217 pub fn parse_container(data: &[u8], out: &mut [u8; OPF_PATH_CAP]) -> Result<usize, &'static str> { 179 218 let mut found_len: Option<usize> = None; 180 219 ··· 192 231 found_len.ok_or("epub: no rootfile full-path in container.xml") 193 232 } 194 233 195 - // parse OPF: extract metadata and build the reading-order spine as ZIP entry indices. 196 - // Two-pass, zero heap: phase 1 collects idref byte offsets (MAX_SPINE*4 = 1KB stack); 197 - // phase 2 resolves each idref to a manifest href and then a ZIP index. 234 + /// Parse an OPF document: extract metadata and build the reading-order spine. 235 + /// 236 + /// Two-pass, zero heap: phase 1 collects `idref` byte offsets 237 + /// (`MAX_SPINE` × 4 = 1 KB stack); phase 2 resolves each `idref` 238 + /// through the manifest to a ZIP entry index. 198 239 pub fn parse_opf( 199 240 opf: &[u8], 200 241 opf_dir: &str, ··· 284 325 } 285 326 286 327 // locate TOC in ZIP: EPUB 3 nav first, EPUB 2 NCX fallback 328 + /// Search the OPF manifest for a table-of-contents source. 329 + /// 330 + /// Tries, in order: EPUB 3 `<item properties="nav">`, EPUB 2 331 + /// `<spine toc="id">`, and a media-type fallback for NCX files. 287 332 pub fn find_toc_source(opf: &[u8], opf_dir: &str, zip: &ZipIndex) -> Option<TocSource> { 288 333 let mut path_buf = [0u8; 512]; 289 334 ··· 400 445 None 401 446 } 402 447 403 - // dispatch TOC parse by format (NCX vs nav) 448 + /// Parse a TOC document (NCX or Navigation Document) into `toc`. 449 + /// 450 + /// Dispatches to [`parse_ncx_toc`] or [`parse_nav_toc`] based on 451 + /// the [`TocSource`] variant. 404 452 pub fn parse_toc( 405 453 source: TocSource, 406 454 data: &[u8], ··· 415 463 } 416 464 } 417 465 418 - // parse EPUB 2 NCX into flat TOC entries (nested navPoints flattened) 466 + /// Parse an EPUB 2 NCX document into flat TOC entries. 467 + /// 468 + /// Nested `<navPoint>` elements are flattened into a linear list. 419 469 pub fn parse_ncx_toc( 420 470 ncx: &[u8], 421 471 ncx_dir: &str, ··· 497 547 } 498 548 } 499 549 500 - // parse EPUB 3 nav document; extract <a> entries, flatten nested <ol> 550 + /// Parse an EPUB 3 Navigation Document into flat TOC entries. 551 + /// 552 + /// Extracts `<a>` elements from the `<nav epub:type="toc">` region 553 + /// and flattens nested `<ol>` lists. 501 554 pub fn parse_nav_toc( 502 555 nav: &[u8], 503 556 nav_dir: &str, ··· 787 840 if start >= end { &[] } else { &data[start..end] } 788 841 } 789 842 790 - // -- path helpers -- 843 + // ── path helpers ──────────────────────────────────────────────────── 791 844 845 + /// Resolve a relative `href` against `base_dir`, writing the result 846 + /// into `out`. Returns the number of bytes written. 847 + /// 848 + /// Handles `../` segments, leading `./`, and absolute paths. 792 849 pub fn resolve_path(base_dir: &str, href: &str, out: &mut [u8; 512]) -> usize { 793 850 let href = href.split('#').next().unwrap_or(href); 794 851 ··· 893 950 } 894 951 } 895 952 896 - // check if filename looks like an EPUB (.epub or .epu for FAT 8.3 truncation) 953 + /// Check if a filename looks like an EPUB (`.epub` or `.epu` for FAT 8.3 truncation). 897 954 pub fn is_epub_filename(name: &str) -> bool { 898 955 let b = name.as_bytes(); 899 956

+40 -12

smol-epub/src/html_strip.rs

··· 1 - // Single-pass HTML to styled-text converter for EPUB XHTML. 2 - // HtmlStripStream: streaming feed/finish; emits 2-byte [MARKER, tag] style codes. 3 - // strip_html_inplace(): in-place variant for container.xml/OPF/TOC. 4 - // Marker: [0x01, tag]. Inline: B/b I/i. Block: H/h Q/q S(hr). 1 + //! Single-pass HTML to styled-text converter for EPUB XHTML. 2 + //! 3 + //! [`HtmlStripStream`]: streaming `feed`/`finish` interface; emits 2-byte 4 + //! `[MARKER, tag]` style codes inline with plain text. 5 + //! 6 + //! [`strip_html_inplace`]: in-place variant for `container.xml` / OPF / TOC. 7 + //! 8 + //! Marker encoding: `[0x01, tag]`. Inline: `B`/`b` `I`/`i`. 9 + //! Block: `H`/`h` `Q`/`q` `S` (hr). Image: `P` (path follows). 5 10 6 11 use alloc::vec::Vec; 7 12 8 - pub const MARKER: u8 = 0x01; // escape byte for style markers 13 + /// Escape byte that introduces a 2-byte style marker in the output stream. 14 + pub const MARKER: u8 = 0x01; 9 15 16 + /// Style tag: bold **on** (`[MARKER, BOLD_ON]`). 10 17 pub const BOLD_ON: u8 = b'B'; 18 + /// Style tag: bold **off** (`[MARKER, BOLD_OFF]`). 11 19 pub const BOLD_OFF: u8 = b'b'; 20 + /// Style tag: italic **on** (`[MARKER, ITALIC_ON]`). 12 21 pub const ITALIC_ON: u8 = b'I'; 22 + /// Style tag: italic **off** (`[MARKER, ITALIC_OFF]`). 13 23 pub const ITALIC_OFF: u8 = b'i'; 24 + /// Style tag: heading **on** (`[MARKER, HEADING_ON]`). 14 25 pub const HEADING_ON: u8 = b'H'; 26 + /// Style tag: heading **off** (`[MARKER, HEADING_OFF]`). 15 27 pub const HEADING_OFF: u8 = b'h'; 28 + /// Style tag: block-quote **on** (`[MARKER, QUOTE_ON]`). 16 29 pub const QUOTE_ON: u8 = b'Q'; 30 + /// Style tag: block-quote **off** (`[MARKER, QUOTE_OFF]`). 17 31 pub const QUOTE_OFF: u8 = b'q'; 18 32 19 - // Standalone 33 + /// Style tag: thematic break / horizontal rule (`[MARKER, BREAK]`). 20 34 pub const BREAK: u8 = b'S'; 21 - pub const IMG_REF: u8 = b'P'; // image ref: [MARKER, IMG_REF, len, path...] 35 + /// Style tag: inline image reference (`[MARKER, IMG_REF, len, path…]`). 36 + pub const IMG_REF: u8 = b'P'; 22 37 38 + /// Returns `true` if `b` is the [`MARKER`] escape byte. 23 39 #[inline] 24 40 pub const fn is_marker(b: u8) -> bool { 25 41 b == MARKER ··· 64 80 } 65 81 } 66 82 67 - // stateful streaming HTML-to-styled-text converter; ~80 bytes of state 83 + /// Stateful, streaming HTML-to-styled-text converter (~80 bytes of state). 84 + /// 85 + /// Feed chunks of EPUB XHTML via [`feed`](Self::feed), then call 86 + /// [`finish`](Self::finish) to flush any trailing state. The output is 87 + /// plain text interspersed with 2-byte `[MARKER, tag]` style codes. 68 88 pub struct HtmlStripStream { 69 89 phase: Phase, 70 90 ··· 116 136 } 117 137 118 138 impl HtmlStripStream { 139 + /// Create a new stream in its initial state. 119 140 pub const fn new() -> Self { 120 141 Self { 121 142 phase: Phase::Text, ··· 147 168 } 148 169 } 149 170 150 - // process a chunk of HTML; returns (consumed, written); call again if input not fully consumed 171 + /// Process a chunk of HTML input. 172 + /// 173 + /// Returns `(consumed, written)`. If `consumed < input.len()`, call 174 + /// again with the remaining input (the output buffer was full). 151 175 pub fn feed(&mut self, input: &[u8], output: &mut [u8]) -> (usize, usize) { 152 176 let ilen = input.len(); 153 177 let olen = output.len(); ··· 572 596 } 573 597 } 574 598 575 - // flush pending state; append terminal newline if content was produced; return bytes written 599 + /// Flush any pending state and append a terminal newline if content 600 + /// was produced. Returns the number of bytes written to `output`. 576 601 pub fn finish(&mut self, output: &mut [u8]) -> usize { 577 602 let mut op: usize = 0; 578 603 ··· 756 781 } 757 782 } 758 783 759 - // in-place HTML stripper: operates on a complete buffer, produces plain text 760 - // without style markers. write cursor never passes read cursor (w <= r always). 784 + /// Strip HTML tags from a complete buffer **in place**, producing plain text 785 + /// without style markers. 786 + /// 787 + /// The write cursor never passes the read cursor, so no extra allocation 788 + /// is needed. 761 789 pub fn strip_html_inplace(buf: &mut Vec<u8>) { 762 790 let len = buf.len(); 763 791 if len == 0 {

+60 -16

smol-epub/src/jpeg.rs

··· 1 - // Baseline JPEG decoder for e-ink display. 2 - // Streams MCU-row-by-row via ChunkReader (4KB chunks from SD); peak RAM ~30KB. 3 - // Luminance (Y) only; chrominance Huffman-decoded to advance bitstream, discarded. 4 - // Progressive JPEG (SOF2) partially supported: first scan only (DC + low-freq AC). 5 - // Full progressive not feasible: ~1.5MB coefficient buffer exceeds ESP32-C3 heap. 6 - // Output: png::DecodedImage, packed 1-bit MSB-first, Floyd-Steinberg dithered. 1 + //! Minimal baseline JPEG decoder producing 1-bit Floyd–Steinberg dithered bitmaps. 2 + //! 3 + //! Streams MCU-row-by-row via 4 KB chunked reads; peak RAM ≈ 30 KB. 4 + //! Luminance (Y) channel only — chrominance is Huffman-decoded to 5 + //! advance the bitstream, then discarded. 6 + //! 7 + //! Progressive JPEG (SOF2) is partially supported: first scan only 8 + //! (DC + low-frequency AC). 9 + //! 10 + //! Output is packed 1-bit MSB-first, row-major — see [`DecodedImage`](crate::DecodedImage). 7 11 8 12 extern crate alloc; 9 13 ··· 11 15 use alloc::vec; 12 16 use alloc::vec::Vec; 13 17 14 - use crate::png::DecodedImage; 18 + use crate::DecodedImage; 15 19 16 20 // JPEG marker bytes 17 21 ··· 34 38 // header bytes to read for marker parsing; large APP/EXIF segments skipped by length 35 39 const HEADER_READ: usize = 32768; 36 40 37 - // chunk size for streaming SD reads during MCU decode 41 + // chunk size for streaming reads during MCU decode 38 42 const CHUNK_SIZE: usize = 4096; 39 43 40 44 // DEFLATE sliding-window size for streaming ZIP decompression ··· 175 179 // reads from SD via closure, buffering 4KB chunks 176 180 struct ChunkReader<F> { 177 181 read_fn: F, 178 - offset: u32, // absolute SD offset of next byte to fetch 182 + offset: u32, // absolute offset of next byte to fetch 179 183 end: u32, // end-of-data offset (exclusive) 180 184 buf: [u8; CHUNK_SIZE], 181 185 pos: usize, ··· 234 238 // peak heap: ~47KB (11KB decompressor + 32KB window + 4KB read buf). 235 239 struct DeflateReader<F> { 236 240 read_fn: F, 237 - file_pos: u32, // absolute SD offset of next compressed byte 241 + file_pos: u32, // absolute offset of next compressed byte 238 242 comp_left: usize, // compressed bytes remaining in ZIP entry 239 243 rbuf: Vec<u8>, // compressed-data read buffer 240 244 in_avail: usize, // valid bytes in rbuf ··· 515 519 // public API 516 520 517 521 // decode a baseline JPEG from an in-memory buffer 522 + /// Decode a JPEG from an in-memory buffer to a 1-bit dithered bitmap. 523 + /// 524 + /// The image is integer-downscaled so the result fits within 525 + /// `max_w` × `max_h` pixels. 518 526 pub fn decode_jpeg_fit(data: &[u8], max_w: u16, max_h: u16) -> Result<DecodedImage, &'static str> { 519 527 let st = parse_markers(data)?; 520 528 ··· 524 532 decode_baseline(&st, BitReader::new(reader), max_w, max_h) 525 533 } 526 534 527 - // decode a JPEG by streaming 4KB chunks from SD. 528 - // read_fn(abs_offset, buf) -> Ok(bytes_read). progressive = first scan only. 529 - pub fn decode_jpeg_sd<F>( 535 + /// Decode a JPEG from a **stored** (uncompressed) ZIP entry by streaming 536 + /// 4 KB chunks through `read_fn`. 537 + /// 538 + /// `read_fn(offset, buf)` reads bytes at the given absolute offset and 539 + /// returns the number of bytes actually read. Progressive JPEGs are 540 + /// decoded using the first scan only. 541 + pub fn decode_jpeg_streaming<F>( 530 542 mut read_fn: F, 531 543 data_offset: u32, 532 544 data_size: u32, ··· 559 571 decode_baseline(&st, BitReader::new(reader), max_w, max_h) 560 572 } 561 573 562 - // decode a DEFLATE-compressed JPEG from SD, streaming both decompression and MCU decode. 563 - // peak heap: ~79KB (47KB deflate reader + 32KB header buf + ~30KB decode bufs). 564 - pub fn decode_jpeg_deflate_sd<F>( 574 + /// Backward-compatible alias for [`decode_jpeg_streaming`]. 575 + pub fn decode_jpeg_sd<F>( 576 + read_fn: F, 577 + data_offset: u32, 578 + data_size: u32, 579 + max_w: u16, 580 + max_h: u16, 581 + ) -> Result<DecodedImage, &'static str> 582 + where 583 + F: FnMut(u32, &mut [u8]) -> Result<usize, &'static str>, 584 + { 585 + decode_jpeg_streaming(read_fn, data_offset, data_size, max_w, max_h) 586 + } 587 + 588 + /// Decode a JPEG from a **DEFLATE-compressed** ZIP entry by streaming 589 + /// reads through `read_fn`. 590 + /// 591 + /// Both ZIP decompression and MCU decode are streamed concurrently, 592 + /// so the full entry is never held in memory. Peak heap ≈ 79 KB. 593 + pub fn decode_jpeg_deflate_streaming<F>( 565 594 read_fn: F, 566 595 data_offset: u32, 567 596 comp_size: u32, ··· 614 643 drop(hdr); 615 644 616 645 decode_baseline(&st, BitReader::new(deflate), max_w, max_h) 646 + } 647 + 648 + /// Backward-compatible alias for [`decode_jpeg_deflate_streaming`]. 649 + pub fn decode_jpeg_deflate_sd<F>( 650 + read_fn: F, 651 + data_offset: u32, 652 + comp_size: u32, 653 + uncomp_size: u32, 654 + max_w: u16, 655 + max_h: u16, 656 + ) -> Result<DecodedImage, &'static str> 657 + where 658 + F: FnMut(u32, &mut [u8]) -> Result<usize, &'static str>, 659 + { 660 + decode_jpeg_deflate_streaming(read_fn, data_offset, comp_size, uncomp_size, max_w, max_h) 617 661 } 618 662 619 663 // baseline decode core (generic over byte source)

+116 -9

smol-epub/src/lib.rs

··· 1 - // smol-epub: minimal no_std EPUB parser with streaming image decoders. 2 - // zip: ZIP central directory parser, streaming DEFLATE extraction 3 - // xml: minimal XML tag/attribute scanner for EPUB metadata 4 - // css: minimal CSS parser for EPUB stylesheet resolution 5 - // epub: EPUB structure parser (container.xml, OPF spine, TOC) 6 - // html_strip: single-pass HTML to styled-text converter (streaming) 7 - // cache: EPUB chapter cache: streaming decompress + strip 8 - // png: PNG decoder, 1-bit Floyd-Steinberg dithered bitmap 9 - // jpeg: JPEG decoder, 1-bit Floyd-Steinberg dithered bitmap 1 + //! # smol-epub 2 + //! 3 + //! Minimal `no_std` EPUB parser with streaming decompression, HTML 4 + //! stripping, CSS resolution, and optional 1-bit image decoders. 5 + //! 6 + //! Designed for memory-constrained embedded targets (≥ 140 KB heap), 7 + //! but works anywhere `alloc` is available. 8 + //! 9 + //! ## Modules 10 + //! 11 + //! | Module | Purpose | 12 + //! |--------|---------| 13 + //! | [`zip`] | ZIP central-directory parser, streaming DEFLATE extraction | 14 + //! | [`xml`] | Minimal XML tag / attribute scanner (EPUB metadata) | 15 + //! | [`css`] | CSS property parser for EPUB stylesheets | 16 + //! | [`epub`] | EPUB structure: `container.xml` → OPF → spine / metadata / TOC | 17 + //! | [`html_strip`] | Single-pass, streaming HTML-to-styled-text converter | 18 + //! | [`cache`] | Chapter decompress-and-strip pipeline with cache metadata | 19 + //! | [`png`] | PNG decoder → 1-bit Floyd–Steinberg dithered bitmap *(feature `images`)* | 20 + //! | [`jpeg`] | JPEG decoder → 1-bit Floyd–Steinberg dithered bitmap *(feature `images`)* | 21 + //! 22 + //! ## Feature flags 23 + //! 24 + //! | Flag | Default | Description | 25 + //! |------|---------|-------------| 26 + //! | `images` | ✓ | Enable [`png`] and [`jpeg`] image decoders | 27 + //! 28 + //! ## Streaming I/O model 29 + //! 30 + //! Functions that read from an external byte source accept a generic 31 + //! closure with signature: 32 + //! 33 + //! ```text 34 + //! FnMut(offset: u32, buf: &mut [u8]) -> Result<usize, E> 35 + //! ``` 36 + //! 37 + //! This works with SD cards, flash, `std::fs::File`, in-memory 38 + //! buffers, or any other random-access byte store. 39 + //! 40 + //! ## Quick start 41 + //! 42 + //! ```rust,ignore 43 + //! use smol_epub::zip::ZipIndex; 44 + //! use smol_epub::epub::{self, EpubMeta, EpubSpine, EpubToc}; 45 + //! 46 + //! // 1. Build ZIP index from the file's central directory 47 + //! let mut zip = ZipIndex::new(); 48 + //! // ... parse_eocd, read CD, parse_central_directory ... 49 + //! 50 + //! // 2. Parse EPUB structure 51 + //! let container = smol_epub::zip::extract_entry(/* ... */)?; 52 + //! let mut opf_path = [0u8; epub::OPF_PATH_CAP]; 53 + //! let opf_len = epub::parse_container(&container, &mut opf_path)?; 54 + //! 55 + //! // 3. Extract metadata and reading-order spine 56 + //! let mut meta = EpubMeta::new(); 57 + //! let mut spine = EpubSpine::new(); 58 + //! epub::parse_opf(&opf_data, opf_dir, &zip, &mut meta, &mut spine)?; 59 + //! 60 + //! // 4. Optionally parse the table of contents 61 + //! let mut toc = EpubToc::new(); 62 + //! if let Some(src) = epub::find_toc_source(&opf_data, opf_dir, &zip) { 63 + //! epub::parse_toc(src, &toc_data, toc_dir, &spine, &zip, &mut toc); 64 + //! } 65 + //! 66 + //! // 5. Stream-decompress + HTML-strip chapters via cache module 67 + //! let bytes_written = smol_epub::cache::stream_strip_entry( 68 + //! &entry, local_offset, read_fn, output_fn, 69 + //! )?; 70 + //! ``` 10 71 11 72 #![no_std] 73 + #![warn(missing_docs)] 12 74 13 75 extern crate alloc; 76 + 77 + use alloc::vec::Vec; 78 + 79 + // ── public modules ────────────────────────────────────────────────── 14 80 15 81 pub mod cache; 16 82 pub mod css; ··· 23 89 pub mod jpeg; 24 90 #[cfg(feature = "images")] 25 91 pub mod png; 92 + 93 + // ── shared types ──────────────────────────────────────────────────── 94 + 95 + /// A decoded 1-bit monochrome image, packed MSB-first, row-major. 96 + /// 97 + /// A **set** bit (1) represents black (ink); a **clear** bit (0) represents 98 + /// white (paper). This convention matches most e-ink controllers directly. 99 + /// 100 + /// Produced by the [`png`] and [`jpeg`] decoders when the `images` 101 + /// feature is enabled. 102 + /// 103 + /// # Layout 104 + /// 105 + /// ```text 106 + /// stride = ceil(width / 8) bytes per row 107 + /// data.len() == stride * height 108 + /// ``` 109 + /// 110 + /// Pixel (x, y) is bit `(7 - x % 8)` of byte `data[y * stride + x / 8]`. 111 + #[derive(Clone)] 112 + pub struct DecodedImage { 113 + /// Image width in pixels. 114 + pub width: u16, 115 + /// Image height in pixels. 116 + pub height: u16, 117 + /// Packed 1-bit pixel data, `stride * height` bytes. 118 + pub data: Vec<u8>, 119 + /// Bytes per row (`ceil(width / 8)`). 120 + pub stride: usize, 121 + } 122 + 123 + impl core::fmt::Debug for DecodedImage { 124 + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 125 + f.debug_struct("DecodedImage") 126 + .field("width", &self.width) 127 + .field("height", &self.height) 128 + .field("stride", &self.stride) 129 + .field("data_len", &self.data.len()) 130 + .finish() 131 + } 132 + }

+80 -49

smol-epub/src/png.rs

··· 1 - // Minimal PNG decoder for monochrome e-ink display. 2 - // Decodes to 1-bit Floyd-Steinberg dithered bitmap; streams row-by-row 3 - // through miniz_oxide; peak RAM ~90KB (32KB dict + 11KB decomp + bitmap). 4 - // Colour types: 0=greyscale, 2=RGB, 3=palette, 4=grey+alpha, 6=RGBA. 5 - // Interlaced (Adam7) rejected; rare in EPUB and doubles code complexity. 6 - // Output packed 1-bit MSB-first, row-major; pass to StripBuffer::blit_1bpp. 1 + //! Minimal PNG decoder producing 1-bit Floyd–Steinberg dithered bitmaps. 2 + //! 3 + //! Streams row-by-row through `miniz_oxide`; peak RAM ≈ 90 KB 4 + //! (32 KB dictionary + 11 KB decompressor + output bitmap). 5 + //! 6 + //! Supported colour types: greyscale, RGB, palette, grey+alpha, RGBA. 7 + //! Interlaced (Adam7) images are rejected (rare in EPUB content and 8 + //! would double code complexity). 9 + //! 10 + //! Output is packed 1-bit MSB-first, row-major — see [`DecodedImage`](crate::DecodedImage). 7 11 8 12 extern crate alloc; 9 13 10 14 use alloc::boxed::Box; 11 15 use alloc::vec; 12 16 use alloc::vec::Vec; 17 + 18 + use crate::DecodedImage; 13 19 14 20 // PNG constants 15 21 ··· 37 43 // miniz_oxide LZ dictionary size; must be a power of two >= 32768 38 44 const DICT_SIZE: usize = 32_768; 39 45 40 - // public types 41 - 42 - // decoded 1-bit image, packed MSB-first, row-major. 43 - // set bit = black (ink); clear bit = white (paper). 44 - pub struct DecodedImage { 45 - pub width: u16, 46 - pub height: u16, 47 - pub data: Vec<u8>, // stride * height bytes 48 - pub stride: usize, // bytes per row: ceil(width / 8) 49 - } 50 - 51 - // backward-compatible alias 46 + /// Backward-compatible alias for [`DecodedImage`](crate::DecodedImage). 52 47 pub type PngImage = DecodedImage; 53 48 54 - // decode a PNG buffer to a 1-bit dithered bitmap; 55 - // images wider or taller than max_w/max_h are nearest-neighbour down-scaled 56 - pub fn decode_png(data: &[u8]) -> Result<DecodedImage, &'static str> { 57 - decode_png_fit(data, 800, 480) 58 - } 59 - 60 - // decode, scaling down by integer factor so result fits inside max_w x max_h 49 + /// Decode a PNG from an in-memory buffer to a 1-bit dithered bitmap. 50 + /// 51 + /// The image is integer-downscaled so the result fits within 52 + /// `max_w` × `max_h` pixels. 61 53 pub fn decode_png_fit(data: &[u8], max_w: u16, max_h: u16) -> Result<DecodedImage, &'static str> { 62 54 let header = parse_ihdr(data)?; 63 55 let idat = collect_idat(data)?; ··· 223 215 }) 224 216 } 225 217 226 - // streaming PNG decoders: decode PNG images from ZIP entries without 227 - // extracting to a contiguous buffer; IDAT fed directly into zlib row-by-row 218 + // ── streaming PNG decoders ────────────────────────────────────────── 219 + // Decode PNG images from ZIP entries without extracting to a contiguous 220 + // buffer; IDAT data is fed directly into zlib row-by-row. 228 221 229 - // chunk size for streaming SD reads 230 - const SD_READ_BUF: usize = 4096; 222 + /// Read-chunk size used by the streaming decoders (bytes). 223 + const STREAMING_READ_BUF: usize = 4096; 231 224 232 - // DEFLATE sliding-window for outer ZIP decompression 225 + /// DEFLATE sliding-window for outer ZIP decompression (bytes). 233 226 const ZIP_DEFLATE_WINDOW: usize = 32_768; 234 227 235 228 // sequential byte source for streaming PNG decoder ··· 247 240 } 248 241 } 249 242 250 - // reads sequentially from a STORED ZIP entry on SD 251 - struct SdSource<F> { 243 + // reads sequentially from a STORED ZIP entry via a user-supplied closure 244 + struct StoredSource<F> { 252 245 read_fn: F, 253 246 offset: u32, 254 247 end: u32, 255 248 } 256 249 257 - impl<F: FnMut(u32, &mut [u8]) -> Result<usize, &'static str>> ReadExact for SdSource<F> { 250 + impl<F: FnMut(u32, &mut [u8]) -> Result<usize, &'static str>> ReadExact for StoredSource<F> { 258 251 fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), &'static str> { 259 252 let mut done = 0usize; 260 253 while done < buf.len() { ··· 283 276 } 284 277 } 285 278 286 - // reads sequentially from a DEFLATE-compressed ZIP entry on SD 279 + // reads sequentially from a DEFLATE-compressed ZIP entry via a user-supplied closure 287 280 struct DeflateSource<F> { 288 281 read_fn: F, 289 282 file_pos: u32, ··· 316 309 window.resize(ZIP_DEFLATE_WINDOW, 0); 317 310 318 311 let mut rbuf = Vec::new(); 319 - rbuf.try_reserve_exact(SD_READ_BUF) 312 + rbuf.try_reserve_exact(STREAMING_READ_BUF) 320 313 .map_err(|_| "png: OOM for DEFLATE read buffer")?; 321 - rbuf.resize(SD_READ_BUF, 0); 314 + rbuf.resize(STREAMING_READ_BUF, 0); 322 315 323 316 Ok(Self { 324 317 read_fn, ··· 343 336 return Ok(()); 344 337 } 345 338 346 - if self.in_avail < SD_READ_BUF && self.comp_left > 0 { 347 - let space = SD_READ_BUF - self.in_avail; 339 + if self.in_avail < STREAMING_READ_BUF && self.comp_left > 0 { 340 + let space = STREAMING_READ_BUF - self.in_avail; 348 341 let want = space.min(self.comp_left); 349 342 match (self.read_fn)( 350 343 self.file_pos, ··· 358 351 Ok(_) => { 359 352 self.comp_left = 0; 360 353 } 361 - Err(_) => return Err("png: SD read failed during DEFLATE"), 354 + Err(_) => return Err("png: read failed during DEFLATE"), 362 355 } 363 356 } 364 357 ··· 422 415 } 423 416 424 417 // decode a PNG from a STORED ZIP entry by streaming from SD 425 - pub fn decode_png_sd<F>( 418 + /// Decode a PNG from a **stored** (uncompressed) ZIP entry by streaming 419 + /// reads through `read_fn`. 420 + /// 421 + /// `read_fn(offset, buf)` reads bytes at the given absolute offset and 422 + /// returns the number of bytes actually read. 423 + pub fn decode_png_streaming<F>( 426 424 read_fn: F, 427 425 data_offset: u32, 428 426 data_size: u32, ··· 432 430 where 433 431 F: FnMut(u32, &mut [u8]) -> Result<usize, &'static str>, 434 432 { 435 - let mut src = SdSource { 433 + let mut src = StoredSource { 436 434 read_fn, 437 435 offset: data_offset, 438 436 end: data_offset + data_size, ··· 440 438 decode_png_from(&mut src, max_w, max_h) 441 439 } 442 440 443 - // decode a PNG from a DEFLATE-compressed ZIP entry by streaming 444 - pub fn decode_png_deflate_sd<F>( 441 + /// Backward-compatible alias for [`decode_png_streaming`]. 442 + pub fn decode_png_sd<F>( 443 + read_fn: F, 444 + data_offset: u32, 445 + data_size: u32, 446 + max_w: u16, 447 + max_h: u16, 448 + ) -> Result<DecodedImage, &'static str> 449 + where 450 + F: FnMut(u32, &mut [u8]) -> Result<usize, &'static str>, 451 + { 452 + decode_png_streaming(read_fn, data_offset, data_size, max_w, max_h) 453 + } 454 + 455 + /// Decode a PNG from a **DEFLATE-compressed** ZIP entry by streaming 456 + /// reads through `read_fn`. 457 + /// 458 + /// Both ZIP decompression and PNG IDAT inflation are streamed 459 + /// concurrently, so the full entry is never held in memory. 460 + pub fn decode_png_deflate_streaming<F>( 445 461 read_fn: F, 446 462 data_offset: u32, 447 463 comp_size: u32, ··· 455 471 decode_png_from(&mut src, max_w, max_h) 456 472 } 457 473 458 - // core streaming PNG decoder; generic over byte source. 459 - // reads chunks sequentially, feeds IDAT into zlib row-by-row; never holds full PNG in RAM. 474 + /// Backward-compatible alias for [`decode_png_deflate_streaming`]. 475 + pub fn decode_png_deflate_sd<F>( 476 + read_fn: F, 477 + data_offset: u32, 478 + comp_size: u32, 479 + max_w: u16, 480 + max_h: u16, 481 + ) -> Result<DecodedImage, &'static str> 482 + where 483 + F: FnMut(u32, &mut [u8]) -> Result<usize, &'static str>, 484 + { 485 + decode_png_deflate_streaming(read_fn, data_offset, comp_size, max_w, max_h) 486 + } 487 + 488 + /// Core streaming PNG decoder; generic over byte source. 489 + /// Reads chunks sequentially, feeds IDAT into zlib row-by-row; 490 + /// never holds the full PNG in RAM. 460 491 fn decode_png_from<R: ReadExact>( 461 492 src: &mut R, 462 493 max_w: u16, ··· 591 622 let mut out_y: usize = 0; 592 623 593 624 // feed IDAT chunks into zlib row-by-row 594 - let mut idat_buf = [0u8; SD_READ_BUF]; 625 + let mut idat_buf = [0u8; STREAMING_READ_BUF]; 595 626 let mut in_avail: usize = 0; 596 627 let mut idat_chunk_left = first_idat_len; 597 628 let mut more_idat = true; 598 629 599 630 loop { 600 631 // top up input buffer from the IDAT stream 601 - while in_avail < SD_READ_BUF { 632 + while in_avail < STREAMING_READ_BUF { 602 633 if idat_chunk_left > 0 { 603 - let space = SD_READ_BUF - in_avail; 634 + let space = STREAMING_READ_BUF - in_avail; 604 635 let want = idat_chunk_left.min(space); 605 636 src.read_exact(&mut idat_buf[in_avail..in_avail + want])?; 606 637 in_avail += want; ··· 685 716 if !has_more && in_avail == 0 { 686 717 return Err("png: truncated IDAT stream"); 687 718 } 688 - if consumed == 0 && produced == 0 && in_avail >= SD_READ_BUF { 719 + if consumed == 0 && produced == 0 && in_avail >= STREAMING_READ_BUF { 689 720 return Err("png: IDAT decompression stuck"); 690 721 } 691 722 }

+13 -5

smol-epub/src/xml.rs

··· 1 - // Minimal XML tag/attribute scanner for EPUB metadata. 2 - // Not a general parser; handles container.xml and OPF only. 3 - // Single-pass, forward-only, namespace-aware, lenient. 1 + //! Minimal XML tag/attribute scanner for EPUB metadata. 2 + //! 3 + //! Not a general-purpose XML parser — handles `container.xml` and OPF 4 + //! documents only. Single-pass, forward-only, namespace-aware, lenient. 4 5 6 + /// Extract the value of an attribute from a raw XML opening-tag byte slice. 7 + /// 8 + /// `tag_bytes` should start at the tag name (after `<`) and end before `>`. 9 + /// Returns `None` if the attribute is not found. 5 10 pub fn get_attr<'a>(tag_bytes: &'a [u8], attr_name: &[u8]) -> Option<&'a [u8]> { 6 11 let mut pos = 0; 7 12 let len = tag_bytes.len(); ··· 69 74 None 70 75 } 71 76 72 - // text of first element matching tag_name (namespace-aware) 77 + /// Return the text content of the first element whose local name matches 78 + /// `tag_name` (namespace-aware: `dc:title` matches `title`). 73 79 pub fn tag_text<'a>(data: &'a [u8], tag_name: &[u8]) -> Option<&'a [u8]> { 74 80 let mut pos = 0; 75 81 ··· 121 127 None 122 128 } 123 129 124 - // invoke cb for every opening tag matching tag_name (namespace-aware) 130 + /// Invoke `cb` for every opening tag whose local name matches `tag_name` 131 + /// (namespace-aware). The callback receives the tag body bytes (from the 132 + /// tag name up to but not including `>`). 125 133 pub fn for_each_tag<'a>(data: &'a [u8], tag_name: &[u8], mut cb: impl FnMut(&'a [u8])) { 126 134 let mut pos = 0; 127 135

+40 -7

smol-epub/src/zip.rs

··· 1 - // ZIP central directory parser and streaming entry extraction. 2 - // ZipIndex: 256 entries inline (~5KB); names heap-allocated during parse. 3 - // DEFLATE in 4KB chunks; try_reserve throughout for graceful OOM. 1 + //! ZIP central-directory parser and streaming entry extraction. 2 + //! 3 + //! [`ZipIndex`] holds up to 256 entries inline (~5 KB); entry names are 4 + //! heap-allocated during parse. DEFLATE decompression streams in 4 KB 5 + //! chunks; `try_reserve` is used throughout for graceful OOM handling. 4 6 5 7 use alloc::boxed::Box; 6 8 use alloc::vec; ··· 12 14 const CD_SIG: u32 = 0x0201_4b50; 13 15 const LOCAL_SIG: u32 = 0x0403_4b50; 14 16 17 + /// ZIP compression method: stored (no compression). 15 18 pub const METHOD_STORED: u16 = 0; 19 + /// ZIP compression method: DEFLATE. 16 20 pub const METHOD_DEFLATE: u16 = 8; 17 21 18 22 #[inline] ··· 25 29 u32::from_le_bytes([d[o], d[o + 1], d[o + 2], d[o + 3]]) 26 30 } 27 31 32 + /// A single entry in the ZIP central directory. 28 33 #[derive(Clone, Copy)] 29 34 pub struct ZipEntry { 35 + /// Byte offset into the name pool where this entry's name starts. 30 36 pub name_start: u16, 37 + /// Length of the entry name in bytes. 31 38 pub name_len: u16, 39 + /// Byte offset of the local file header in the ZIP file. 32 40 pub local_offset: u32, 41 + /// Compressed size in bytes. 33 42 pub comp_size: u32, 43 + /// Uncompressed size in bytes. 34 44 pub uncomp_size: u32, 45 + /// Compression method ([`METHOD_STORED`] or [`METHOD_DEFLATE`]). 35 46 pub method: u16, 36 47 } 37 48 ··· 46 57 }; 47 58 } 48 59 60 + /// Maximum number of entries the [`ZipIndex`] can hold. 49 61 pub const MAX_ENTRIES: usize = 256; 50 62 63 + /// In-memory index of a ZIP archive's central directory. 64 + /// 65 + /// Holds up to [`MAX_ENTRIES`] entries inline (~5 KB); entry names are 66 + /// stored in a single heap-allocated byte pool. 51 67 pub struct ZipIndex { 52 68 entries: [ZipEntry; MAX_ENTRIES], 53 69 count: u16, ··· 61 77 } 62 78 63 79 impl ZipIndex { 80 + /// Create a new, empty index. 64 81 pub const fn new() -> Self { 65 82 Self { 66 83 entries: [ZipEntry::EMPTY; MAX_ENTRIES], ··· 69 86 } 70 87 } 71 88 89 + /// Remove all entries and free the name pool. 72 90 pub fn clear(&mut self) { 73 91 self.count = 0; 74 92 self.names = Vec::new(); 75 93 } 76 94 77 - // parse EOCD from file tail; return (cd_offset, cd_size) 95 + /// Parse the End-of-Central-Directory record from the last bytes of a 96 + /// ZIP file. Returns `(cd_offset, cd_size)`. 97 + /// 98 + /// `tail` should be the final ≤ 65557 bytes of the file (22 bytes is 99 + /// the minimum for a ZIP with no comment). 78 100 pub fn parse_eocd(tail: &[u8], file_size: u32) -> Result<(u32, u32), &'static str> { 79 101 if tail.len() < 22 { 80 102 return Err("zip: tail too short for EOCD"); ··· 101 123 Ok((cd_offset, cd_size)) 102 124 } 103 125 104 - // parse central directory into entry index 126 + /// Parse a central-directory blob into this index, replacing any 127 + /// previously stored entries. 105 128 pub fn parse_central_directory(&mut self, cd: &[u8]) -> Result<(), &'static str> { 106 129 self.count = 0; 107 130 self.names.clear(); ··· 158 181 Ok(()) 159 182 } 160 183 184 + /// Number of entries in the index. 161 185 #[inline] 162 186 pub fn count(&self) -> usize { 163 187 self.count as usize 164 188 } 165 189 190 + /// Return a reference to the entry at `idx`. Panics if out of range. 166 191 #[inline] 167 192 pub fn entry(&self, idx: usize) -> &ZipEntry { 168 193 assert!(idx < self.count as usize); 169 194 &self.entries[idx] 170 195 } 171 196 197 + /// Return the filename of the entry at `idx` as a `&str`. 172 198 pub fn entry_name(&self, idx: usize) -> &str { 173 199 let e = self.entry(idx); 174 200 let start = e.name_start as usize; ··· 176 202 core::str::from_utf8(&self.names[start..end]).unwrap_or("") 177 203 } 178 204 205 + /// Find an entry by exact (case-sensitive) name. Returns its index. 179 206 pub fn find(&self, name: &str) -> Option<usize> { 180 207 let name_bytes = name.as_bytes(); 181 208 for i in 0..self.count as usize { ··· 189 216 None 190 217 } 191 218 219 + /// Find an entry by case-insensitive ASCII name. Returns its index. 192 220 pub fn find_icase(&self, name: &str) -> Option<usize> { 193 221 let target = name.as_bytes(); 194 222 for i in 0..self.count as usize { ··· 203 231 None 204 232 } 205 233 206 - // bytes past local file header to entry data 234 + /// Given the first 30+ bytes of a local file header, return the number 235 + /// of bytes to skip past the header to reach the entry's data. 207 236 pub fn local_header_data_skip(header: &[u8]) -> Result<u32, &'static str> { 208 237 if header.len() < 30 { 209 238 return Err("zip: local header too short"); ··· 217 246 } 218 247 } 219 248 220 - // entry extraction 249 + // ── entry extraction ──────────────────────────────────────────────── 221 250 251 + /// Extract a complete ZIP entry into a heap-allocated `Vec<u8>`. 252 + /// 253 + /// Supports both stored and DEFLATE-compressed entries. The `read_fn` 254 + /// closure reads bytes at a given absolute offset. 222 255 pub fn extract_entry<E, F>( 223 256 entry: &ZipEntry, 224 257 local_offset: u32,

+1 -1

src/apps/reader.rs

··· 26 26 use crate::fonts; 27 27 use crate::ui::quick_menu::QuickAction; 28 28 use crate::ui::{Alignment, BUTTON_BAR_H, CONTENT_TOP, Region, StackFmt}; 29 + use smol_epub::DecodedImage; 29 30 use smol_epub::cache; 30 31 use smol_epub::epub::{self, EpubMeta, EpubSpine, EpubToc, TocSource}; 31 32 use smol_epub::html_strip::{ 32 33 BOLD_OFF, BOLD_ON, HEADING_OFF, HEADING_ON, IMG_REF, ITALIC_OFF, ITALIC_ON, MARKER, QUOTE_OFF, 33 34 QUOTE_ON, 34 35 }; 35 - use smol_epub::png::DecodedImage; 36 36 use smol_epub::zip::{self, ZipIndex}; 37 37 38 38 const MARGIN: u16 = 8;

Configure Feed

Configure Feed