A fork of https://github.com/crosspoint-reader/crosspoint-reader
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 93 lines 3.8 kB view raw
1import os 2import re 3import gzip 4 5SRC_DIR = "src" 6 7def minify_html(html: str) -> str: 8 # Tags where whitespace should be preserved 9 preserve_tags = ['pre', 'code', 'textarea', 'script', 'style'] 10 preserve_regex = '|'.join(preserve_tags) 11 12 # Protect preserve blocks with placeholders 13 preserve_blocks = [] 14 def preserve(match): 15 preserve_blocks.append(match.group(0)) 16 return f"__PRESERVE_BLOCK_{len(preserve_blocks)-1}__" 17 18 html = re.sub(rf'<({preserve_regex})[\s\S]*?</\1>', preserve, html, flags=re.IGNORECASE) 19 20 # Remove HTML comments 21 html = re.sub(r'<!--.*?-->', '', html, flags=re.DOTALL) 22 23 # Collapse all whitespace between tags 24 html = re.sub(r'>\s+<', '><', html) 25 26 # Collapse multiple spaces inside tags 27 html = re.sub(r'\s+', ' ', html) 28 29 # Restore preserved blocks 30 for i, block in enumerate(preserve_blocks): 31 html = html.replace(f"__PRESERVE_BLOCK_{i}__", block) 32 33 return html.strip() 34 35def sanitize_identifier(name: str) -> str: 36 """Sanitize a filename to create a valid C identifier. 37 38 C identifiers must: 39 - Start with a letter or underscore 40 - Contain only letters, digits, and underscores 41 """ 42 # Replace non-alphanumeric characters (including hyphens) with underscores 43 sanitized = re.sub(r'[^a-zA-Z0-9_]', '_', name) 44 # Prefix with underscore if starts with a digit 45 if sanitized and sanitized[0].isdigit(): 46 sanitized = f"_{sanitized}" 47 return sanitized 48 49for root, _, files in os.walk(SRC_DIR): 50 for file in files: 51 if file.endswith(".html") or file.endswith(".js"): 52 file_path = os.path.join(root, file) 53 with open(file_path, "r", encoding="utf-8") as f: 54 content = f.read() 55 56 # Only minify HTML files; JS files are typically pre-minified (e.g., jszip.min.js) 57 if file.endswith(".html"): 58 processed = minify_html(content) 59 else: 60 processed = content 61 62 # Compress with gzip (compresslevel 9 is maximum compression) 63 # IMPORTANT: we don't use brotli because Firefox doesn't support brotli with insecured context (only supported on HTTPS) 64 compressed = gzip.compress(processed.encode('utf-8'), compresslevel=9) 65 66 # Create valid C identifier from filename 67 # Use appropriate suffix based on file type 68 suffix = "Html" if file.endswith(".html") else "Js" 69 base_name = sanitize_identifier(f"{os.path.splitext(file)[0]}{suffix}") 70 header_path = os.path.join(root, f"{base_name}.generated.h") 71 72 with open(header_path, "w", encoding="utf-8") as h: 73 h.write(f"// THIS FILE IS AUTOGENERATED, DO NOT EDIT MANUALLY\n\n") 74 h.write(f"#pragma once\n") 75 h.write(f"#include <cstddef>\n\n") 76 77 # Write the compressed data as a byte array 78 h.write(f"constexpr char {base_name}[] PROGMEM = {{\n") 79 80 # Write bytes in rows of 16 81 for i in range(0, len(compressed), 16): 82 chunk = compressed[i:i+16] 83 hex_values = ', '.join(f'0x{b:02x}' for b in chunk) 84 h.write(f" {hex_values},\n") 85 86 h.write(f"}};\n\n") 87 h.write(f"constexpr size_t {base_name}CompressedSize = {len(compressed)};\n") 88 h.write(f"constexpr size_t {base_name}OriginalSize = {len(processed)};\n") 89 90 print(f"Generated: {header_path}") 91 print(f" Original: {len(content)} bytes") 92 print(f" Minified: {len(processed)} bytes ({100*len(processed)/len(content):.1f}%)") 93 print(f" Compressed: {len(compressed)} bytes ({100*len(compressed)/len(content):.1f}%)")