the home site for me: also iteration 3 or 4 of my site
4
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add 404 matcher

+158 -3
+149
static/js/404-matcher.js
··· 1 + // Taken from Vale's 404 Guesser 2 + // https://vale.rocks/assets/scripts/404-guesser.js 3 + // which was based on Gwern's 404 Error Page URL Suggester 4 + // https://gwern.net/static/js/404-guesser.js 5 + 6 + class URLSuggester { 7 + constructor() { 8 + this.maxDistance = 8; 9 + this.urls = []; 10 + } 11 + 12 + async initialize() { 13 + try { 14 + const sitemapText = await this.fetchSitemap(); 15 + if (sitemapText) { 16 + this.urls = this.parseUrls(sitemapText); 17 + const currentPath = window.location.pathname; 18 + if (!currentPath.endsWith("/404")) { 19 + const suggestions = this.findSimilarUrls(currentPath); 20 + this.injectSuggestions(currentPath, suggestions); 21 + } 22 + } 23 + } catch (error) { 24 + console.error("Error initializing URL suggester:", error); 25 + } 26 + } 27 + 28 + async fetchSitemap() { 29 + try { 30 + const response = await fetch("/sitemap.xml"); 31 + return await response.text(); 32 + } catch (error) { 33 + console.error("Error fetching sitemap:", error); 34 + return null; 35 + } 36 + } 37 + 38 + parseUrls(sitemapText) { 39 + const parser = new DOMParser(); 40 + const xmlDoc = parser.parseFromString(sitemapText, "text/xml"); 41 + const urlNodes = xmlDoc.getElementsByTagName("url"); 42 + return Array.from(urlNodes).map( 43 + (node) => 44 + new URL(node.getElementsByTagName("loc")[0].textContent).pathname, 45 + ); 46 + } 47 + 48 + boundedLevenshteinDistance(a, b, maxDistance) { 49 + if (Math.abs(a.length - b.length) > maxDistance) return maxDistance + 1; 50 + const matrix = Array(b.length + 1) 51 + .fill(null) 52 + .map((_, i) => [i]); 53 + for (let j = 1; j <= a.length; j++) { 54 + matrix[0][j] = j; 55 + } 56 + for (let i = 1; i <= b.length; i++) { 57 + let minDistance = maxDistance + 1; 58 + for (let j = 1; j <= a.length; j++) { 59 + if (b.charAt(i - 1) === a.charAt(j - 1)) { 60 + matrix[i][j] = matrix[i - 1][j - 1]; 61 + } else { 62 + matrix[i][j] = Math.min( 63 + matrix[i - 1][j - 1] + 1, 64 + matrix[i][j - 1] + 1, 65 + matrix[i - 1][j] + 1, 66 + ); 67 + } 68 + minDistance = Math.min(minDistance, matrix[i][j]); 69 + } 70 + if (minDistance > maxDistance) { 71 + return maxDistance + 1; 72 + } 73 + } 74 + return matrix[b.length][a.length]; 75 + } 76 + 77 + findSimilarUrls(targetUrl) { 78 + const targetPath = new URL(targetUrl, location.origin).pathname; 79 + 80 + if (targetPath.startsWith("/posts/")) { 81 + const exactMatch = this.urls.find((url) => url === targetPath); 82 + if (exactMatch) { 83 + return [location.origin + exactMatch]; 84 + } 85 + } 86 + 87 + const potentialMatches = this.urls.filter( 88 + (url) => 89 + Math.abs(url.length - targetPath.length) <= this.maxDistance && 90 + !url.endsWith("/404.html"), 91 + ); 92 + 93 + const similarUrls = potentialMatches 94 + .map((url) => ({ 95 + url, 96 + distance: this.boundedLevenshteinDistance( 97 + url, 98 + targetPath, 99 + this.maxDistance, 100 + ), 101 + })) 102 + .filter((item) => item.distance <= this.maxDistance) 103 + .sort((a, b) => a.distance - b.distance); 104 + 105 + const seenUrls = new Set(); 106 + const uniqueSimilarUrls = similarUrls 107 + .filter((item) => { 108 + if (seenUrls.has(item.url)) return false; 109 + seenUrls.add(item.url); 110 + return true; 111 + }) 112 + .slice(0, 10); 113 + 114 + return uniqueSimilarUrls.map((item) => location.origin + item.url); 115 + } 116 + 117 + injectSuggestions(currentPath, suggestions) { 118 + const app = document.querySelector("#suggestions"); 119 + if (!app) return; 120 + 121 + if (suggestions.length > 0) { 122 + const p = document.createElement("p"); 123 + 124 + p.innerHTML = "I did however find some URLs that might be relevant?"; 125 + app.appendChild(p); 126 + 127 + for (const url of suggestions) { 128 + const a = document.createElement("a"); 129 + const cleanUrl = url.replace(/\.html$/, ""); 130 + a.href = cleanUrl; 131 + a.textContent = cleanUrl; 132 + app.appendChild(a); 133 + } 134 + 135 + const endText = document.createElement("p"); 136 + app.appendChild(endText); 137 + } else { 138 + const p = document.createElement("p"); 139 + p.innerHTML = `Couldn't find any URLs similar to <code>${currentPath}</code>. I guess it's time to find something new`; 140 + app.appendChild(p); 141 + } 142 + 143 + app.className = "url-suggestions"; 144 + } 145 + } 146 + 147 + document.addEventListener("DOMContentLoaded", () => { 148 + new URLSuggester().initialize(); 149 + });
+9 -3
templates/404.html
··· 1 1 {% extends "base.html" %} {% block content %} 2 2 3 3 <div 4 + id="suggestions" 4 5 style=" 5 6 display: flex; 6 7 flex-direction: column; ··· 10 11 " 11 12 > 12 13 <p><strong>I think you stumbled on something non existent :)</strong></p> 13 - <p><i id="redirect">Redirecting you back home in 5</i></p> 14 14 </div> 15 15 16 - <script> 16 + {% set jsHash = get_hash(path="js/404-matcher.js", sha_type=256, base64=true) %} 17 + <script 18 + src="{{ get_url(path='js/404-matcher.js?' ~ jsHash, trailing_slash=false) | safe }}" 19 + defer 20 + ></script> 21 + 22 + <!-- <script> 17 23 const link = document.getElementById("redirect"); 18 24 19 25 // count down to redirect ··· 26 32 window.location.href = "/"; 27 33 } 28 34 }, 1000); 29 - </script> 35 + </script> --> 30 36 31 37 {% endblock content %}