scripts/build_html.py at master · edouard.paris/crosspoint-reader

edouard.paris / crosspoint-reader
fork
A fork of https://github.com/crosspoint-reader/crosspoint-reader
fork
crosspoint-reader / scripts / build_html.py
at master 93 lines 3.8 kB view raw
wrap content
pablohc feat: integrated epub optimizer (#1224) 5w ago
7d56810e
 1import os
 2import re
 3import gzip
 4
 5SRC_DIR = "src"
 6
 7def minify_html(html: str) -> str:
 8    # Tags where whitespace should be preserved
 9    preserve_tags = ['pre', 'code', 'textarea', 'script', 'style']
10    preserve_regex = '|'.join(preserve_tags)
11
12    # Protect preserve blocks with placeholders
13    preserve_blocks = []
14    def preserve(match):
15        preserve_blocks.append(match.group(0))
16        return f"__PRESERVE_BLOCK_{len(preserve_blocks)-1}__"
17
18    html = re.sub(rf'<({preserve_regex})[\s\S]*?</\1>', preserve, html, flags=re.IGNORECASE)
19
20    # Remove HTML comments
21    html = re.sub(r'<!--.*?-->', '', html, flags=re.DOTALL)
22
23    # Collapse all whitespace between tags
24    html = re.sub(r'>\s+<', '><', html)
25
26    # Collapse multiple spaces inside tags
27    html = re.sub(r'\s+', ' ', html)
28
29    # Restore preserved blocks
30    for i, block in enumerate(preserve_blocks):
31        html = html.replace(f"__PRESERVE_BLOCK_{i}__", block)
32
33    return html.strip()
34
35def sanitize_identifier(name: str) -> str:
36    """Sanitize a filename to create a valid C identifier.
37
38    C identifiers must:
39    - Start with a letter or underscore
40    - Contain only letters, digits, and underscores
41    """
42    # Replace non-alphanumeric characters (including hyphens) with underscores
43    sanitized = re.sub(r'[^a-zA-Z0-9_]', '_', name)
44    # Prefix with underscore if starts with a digit
45    if sanitized and sanitized[0].isdigit():
46        sanitized = f"_{sanitized}"
47    return sanitized
48
49for root, _, files in os.walk(SRC_DIR):
50    for file in files:
51        if file.endswith(".html") or file.endswith(".js"):
52            file_path = os.path.join(root, file)
53            with open(file_path, "r", encoding="utf-8") as f:
54                content = f.read()
55
56            # Only minify HTML files; JS files are typically pre-minified (e.g., jszip.min.js)
57            if file.endswith(".html"):
58                processed = minify_html(content)
59            else:
60                processed = content
61
62            # Compress with gzip (compresslevel 9 is maximum compression)
63            # IMPORTANT: we don't use brotli because Firefox doesn't support brotli with insecured context (only supported on HTTPS)
64            compressed = gzip.compress(processed.encode('utf-8'), compresslevel=9)
65
66            # Create valid C identifier from filename
67            # Use appropriate suffix based on file type
68            suffix = "Html" if file.endswith(".html") else "Js"
69            base_name = sanitize_identifier(f"{os.path.splitext(file)[0]}{suffix}")
70            header_path = os.path.join(root, f"{base_name}.generated.h")
71
72            with open(header_path, "w", encoding="utf-8") as h:
73                h.write(f"// THIS FILE IS AUTOGENERATED, DO NOT EDIT MANUALLY\n\n")
74                h.write(f"#pragma once\n")
75                h.write(f"#include <cstddef>\n\n")
76
77                # Write the compressed data as a byte array
78                h.write(f"constexpr char {base_name}[] PROGMEM = {{\n")
79
80                # Write bytes in rows of 16
81                for i in range(0, len(compressed), 16):
82                    chunk = compressed[i:i+16]
83                    hex_values = ', '.join(f'0x{b:02x}' for b in chunk)
84                    h.write(f"  {hex_values},\n")
85
86                h.write(f"}};\n\n")
87                h.write(f"constexpr size_t {base_name}CompressedSize = {len(compressed)};\n")
88                h.write(f"constexpr size_t {base_name}OriginalSize = {len(processed)};\n")
89
90            print(f"Generated: {header_path}")
91            print(f"  Original: {len(content)} bytes")
92            print(f"  Minified: {len(processed)} bytes ({100*len(processed)/len(content):.1f}%)")
93            print(f"  Compressed: {len(compressed)} bytes ({100*len(compressed)/len(content):.1f}%)")
Configure Feed

Configure Feed