A fork of https://github.com/crosspoint-reader/crosspoint-reader
1import os
2import re
3import gzip
4
5SRC_DIR = "src"
6
7def minify_html(html: str) -> str:
8 # Tags where whitespace should be preserved
9 preserve_tags = ['pre', 'code', 'textarea', 'script', 'style']
10 preserve_regex = '|'.join(preserve_tags)
11
12 # Protect preserve blocks with placeholders
13 preserve_blocks = []
14 def preserve(match):
15 preserve_blocks.append(match.group(0))
16 return f"__PRESERVE_BLOCK_{len(preserve_blocks)-1}__"
17
18 html = re.sub(rf'<({preserve_regex})[\s\S]*?</\1>', preserve, html, flags=re.IGNORECASE)
19
20 # Remove HTML comments
21 html = re.sub(r'<!--.*?-->', '', html, flags=re.DOTALL)
22
23 # Collapse all whitespace between tags
24 html = re.sub(r'>\s+<', '><', html)
25
26 # Collapse multiple spaces inside tags
27 html = re.sub(r'\s+', ' ', html)
28
29 # Restore preserved blocks
30 for i, block in enumerate(preserve_blocks):
31 html = html.replace(f"__PRESERVE_BLOCK_{i}__", block)
32
33 return html.strip()
34
35def sanitize_identifier(name: str) -> str:
36 """Sanitize a filename to create a valid C identifier.
37
38 C identifiers must:
39 - Start with a letter or underscore
40 - Contain only letters, digits, and underscores
41 """
42 # Replace non-alphanumeric characters (including hyphens) with underscores
43 sanitized = re.sub(r'[^a-zA-Z0-9_]', '_', name)
44 # Prefix with underscore if starts with a digit
45 if sanitized and sanitized[0].isdigit():
46 sanitized = f"_{sanitized}"
47 return sanitized
48
49for root, _, files in os.walk(SRC_DIR):
50 for file in files:
51 if file.endswith(".html") or file.endswith(".js"):
52 file_path = os.path.join(root, file)
53 with open(file_path, "r", encoding="utf-8") as f:
54 content = f.read()
55
56 # Only minify HTML files; JS files are typically pre-minified (e.g., jszip.min.js)
57 if file.endswith(".html"):
58 processed = minify_html(content)
59 else:
60 processed = content
61
62 # Compress with gzip (compresslevel 9 is maximum compression)
63 # IMPORTANT: we don't use brotli because Firefox doesn't support brotli with insecured context (only supported on HTTPS)
64 compressed = gzip.compress(processed.encode('utf-8'), compresslevel=9)
65
66 # Create valid C identifier from filename
67 # Use appropriate suffix based on file type
68 suffix = "Html" if file.endswith(".html") else "Js"
69 base_name = sanitize_identifier(f"{os.path.splitext(file)[0]}{suffix}")
70 header_path = os.path.join(root, f"{base_name}.generated.h")
71
72 with open(header_path, "w", encoding="utf-8") as h:
73 h.write(f"// THIS FILE IS AUTOGENERATED, DO NOT EDIT MANUALLY\n\n")
74 h.write(f"#pragma once\n")
75 h.write(f"#include <cstddef>\n\n")
76
77 # Write the compressed data as a byte array
78 h.write(f"constexpr char {base_name}[] PROGMEM = {{\n")
79
80 # Write bytes in rows of 16
81 for i in range(0, len(compressed), 16):
82 chunk = compressed[i:i+16]
83 hex_values = ', '.join(f'0x{b:02x}' for b in chunk)
84 h.write(f" {hex_values},\n")
85
86 h.write(f"}};\n\n")
87 h.write(f"constexpr size_t {base_name}CompressedSize = {len(compressed)};\n")
88 h.write(f"constexpr size_t {base_name}OriginalSize = {len(processed)};\n")
89
90 print(f"Generated: {header_path}")
91 print(f" Original: {len(content)} bytes")
92 print(f" Minified: {len(processed)} bytes ({100*len(processed)/len(content):.1f}%)")
93 print(f" Compressed: {len(compressed)} bytes ({100*len(compressed)/len(content):.1f}%)")