A server-side link shortening service powered by Linkat
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

Update encoding.ts

Add comments

authored by

Ewan Croft and committed by
GitHub
8f263672 803bd0ca

+61 -33
+61 -33
src/lib/utils/encoding.ts
··· 1 - import { SHORTCODE } from '$lib/constants'; 2 - import { parse, getDomain } from 'tldts'; 1 + import { SHORTCODE } from '$lib/constants'; // Import constants for shortcode configuration 2 + import { parse, getDomain } from 'tldts'; // Import utility functions for domain parsing 3 3 4 - const BASE_CHARS = SHORTCODE.CHARS; 5 - const BASE = BASE_CHARS.length; 4 + // Constants related to the character set for encoding 5 + const BASE_CHARS = SHORTCODE.CHARS; // Charset for the shortcode encoding 6 + const BASE = BASE_CHARS.length; // Base (the number of unique characters in the charset) 6 7 8 + // Hashes a given string into a bigint value 7 9 function hashString(text: string): bigint { 8 - let hash = 1469598103934665603n; 10 + let hash = 1469598103934665603n; // FNV-1a hash initialisation value 9 11 for (let i = 0; i < text.length; i++) { 10 - const char = BigInt(text.charCodeAt(i)); 11 - hash = (hash ^ char) * 1099511628211n; 12 + const char = BigInt(text.charCodeAt(i)); // Convert each character to a bigint 13 + hash = (hash ^ char) * 1099511628211n; // FNV-1a hashing algorithm 12 14 } 13 - return hash < 0n ? -hash : hash; 15 + return hash < 0n ? -hash : hash; // Ensure the hash is positive 14 16 } 15 17 18 + // Converts a number (bigint) into a base encoded string with a given length 16 19 function toBase(num: bigint, length: number, seed = ''): string { 17 - let encoded = ''; 20 + let encoded = ''; // The resulting encoded string 18 21 let n = num; 19 22 for (let i = 0; i < length; i++) { 20 23 let rem: bigint; 24 + // Calculate remainder and divide to get the next digit 21 25 if (n > 0n) { 22 26 rem = n % BigInt(BASE); 23 27 n = n / BigInt(BASE); 24 28 } else { 29 + // Fallback if number is 0, use a hash for deterministic behaviour 25 30 const fallback = hashString(num.toString() + '::' + seed + '::' + i.toString()); 26 31 rem = fallback % BigInt(BASE); 27 32 } 28 - encoded = BASE_CHARS[Number(rem)] + encoded; 33 + encoded = BASE_CHARS[Number(rem)] + encoded; // Prepend the character for this base value 29 34 } 30 35 return encoded; 31 36 } 32 37 38 + // Normalises a URL by ensuring it's well-formed and canonical 33 39 function normaliseUrl(url: string): string { 34 40 try { 41 + // Ensure the URL starts with 'https://' and parse it 35 42 const parsed = new URL(url.startsWith('http') ? url : `https://${url}`); 36 - parsed.hash = ''; 43 + parsed.hash = ''; // Remove hash fragment 37 44 45 + // Sort URL query parameters alphabetically 38 46 const sortedParams = [...parsed.searchParams.entries()].sort((a, b) => 39 47 a[0].localeCompare(b[0]) 40 48 ); 41 - parsed.search = ''; 42 - for (const [key, value] of sortedParams) parsed.searchParams.append(key, value); 49 + parsed.search = ''; // Clear existing search parameters 50 + for (const [key, value] of sortedParams) parsed.searchParams.append(key, value); // Rebuild query string 43 51 44 - parsed.hostname = parsed.hostname.toLowerCase(); 45 - parsed.protocol = 'https:'; 46 - return parsed.toString(); 52 + parsed.hostname = parsed.hostname.toLowerCase(); // Convert hostname to lowercase 53 + parsed.protocol = 'https:'; // Ensure HTTPS protocol is used 54 + return parsed.toString(); // Return the normalised URL as a string 47 55 } catch (e) { 56 + // If URL parsing fails, return the original URL (trimmed) 48 57 return url.trim(); 49 58 } 50 59 } 51 60 61 + // Extracts the base domain from a URL 52 62 function getBaseDomain(url: string): string { 53 63 try { 64 + // Use tldts library to get the domain from the URL 54 65 const domain = getDomain(url, { allowPrivateDomains: false }); 55 66 if (domain) return domain.toLowerCase(); 56 67 68 + // Fallback to manual parsing if tldts fails 57 69 const parsed = parse(url, { extractHostname: true }); 58 70 return (parsed.hostname ?? '').toLowerCase(); 59 71 } catch (e) { 72 + // Return an empty string if domain extraction fails 60 73 return ''; 61 74 } 62 75 } 63 76 77 + // Main function to encode a URL into a shortcode of specified length 64 78 export function encodeUrl(url: string, length: number = SHORTCODE.DEFAULT_LENGTH): string { 79 + // Validate and adjust the length of the shortcode 65 80 if (!Number.isInteger(length) || length < 3) length = SHORTCODE.DEFAULT_LENGTH; 66 81 67 - const DOMAIN_PREFIX_LENGTH = 2; 82 + const DOMAIN_PREFIX_LENGTH = 2; // Number of characters used for the domain prefix 68 83 84 + // Normalise the URL and extract the base domain 69 85 const normalised = normaliseUrl(url); 70 86 const apex = getBaseDomain(normalised) || ''; 71 87 88 + // Hash the domain to generate a prefix 72 89 const domainHash = hashString(apex || normalised); 73 90 const domainPrefix = toBase(domainHash, DOMAIN_PREFIX_LENGTH, 'domain'); 74 91 92 + // Calculate the remaining length for the URL core and tail 75 93 const remaining = Math.max(1, length - DOMAIN_PREFIX_LENGTH); 76 94 77 - let hostname = ''; 95 + let hostname = ''; // The hostname portion of the URL 78 96 try { 79 - hostname = new URL(normalised).hostname.toLowerCase(); 97 + hostname = new URL(normalised).hostname.toLowerCase(); // Try to extract hostname from normalised URL 80 98 } catch (e) { 99 + // Fallback if URL parsing fails 81 100 try { 82 101 hostname = new URL(url.startsWith('http') ? url : `https://${url}`).hostname.toLowerCase(); 83 102 } catch { 84 - hostname = ''; 103 + hostname = ''; // If both parsing attempts fail, leave hostname empty 85 104 } 86 105 } 87 106 88 107 let subLevels: string[] = []; 108 + // If there is a subdomain, split it into separate levels 89 109 if (apex && hostname && hostname !== apex) { 90 - const sub = hostname.replace(new RegExp(`\.${apex}$`), ''); 91 - subLevels = sub.split('.'); 110 + const sub = hostname.replace(new RegExp(`\.${apex}$`), ''); // Remove the apex domain 111 + subLevels = sub.split('.'); // Split subdomains by '.' 92 112 } 93 113 114 + // URL core length is determined based on the remaining space after the domain prefix 94 115 const MIN_URL_CORE = 1; 95 116 const MIN_TAIL = 1; 96 - const tailLength = remaining; 117 + const tailLength = remaining; // Length allocated to the tail portion of the shortcode 97 118 119 + // Hash the normalised URL for the URL core portion of the shortcode 98 120 const urlHash = hashString(normalised + '::url'); 99 - const urlCoreLength = remaining - subLevels.length; 121 + const urlCoreLength = remaining - subLevels.length; // Account for subdomain levels 100 122 const urlCore = toBase(urlHash, Math.max(MIN_URL_CORE, urlCoreLength), 'url'); 101 123 124 + // Generate subdomain-based tail (if applicable) 102 125 const subTail: string[] = []; 103 - const reversedSubLevels = subLevels.slice().reverse(); 126 + const reversedSubLevels = subLevels.slice().reverse(); // Reverse the subdomain levels for encoding 104 127 for (let i = 0; i < reversedSubLevels.length; i++) { 105 - const h = hashString(reversedSubLevels[i] + '::sub'); 106 - subTail.push(toBase(h, 1, 'sub' + i)); 128 + const h = hashString(reversedSubLevels[i] + '::sub'); // Hash the subdomain level 129 + subTail.push(toBase(h, 1, 'sub' + i)); // Add to subTail 107 130 } 108 131 132 + // If no subdomain tail is generated, use a fallback hash for the tail 109 133 let tail = subTail.join(''); 110 134 if (!tail) { 111 135 const fallbackHash = hashString(normalised + '::fallback'); 112 136 tail = toBase(fallbackHash, tailLength, 'sub'); 113 137 } 114 138 139 + // Combine domain prefix, URL core, and tail to form the final shortcode 115 140 let out = domainPrefix + urlCore + tail; 116 - if (out.length > length) out = out.slice(0, length); 141 + if (out.length > length) out = out.slice(0, length); // Trim to the desired length 117 142 if (out.length < length) { 143 + // Pad the shortcode if it is too short 118 144 let pad = ''; 119 145 let i = 0; 120 146 while (out.length + pad.length < length) { ··· 122 148 pad += toBase(h, Math.min(4, length - out.length - pad.length), 'pad2' + i); 123 149 i++; 124 150 } 125 - out += pad.slice(0, length - out.length); 151 + out += pad.slice(0, length - out.length); // Append padding to reach the correct length 126 152 } 127 153 128 - // --- LOGGING MAX COMBINATIONS --- 129 - const maxCombinations = BigInt(BASE) ** BigInt(length); 154 + // --- LOGGING MAX COMBINATIONS --- (for debugging purposes) 155 + const maxCombinations = BigInt(BASE) ** BigInt(length); // Calculate the max possible combinations for the shortcode 130 156 console.log(`[Shortcode Info] URL: ${url}`); 131 157 console.log(`[Shortcode Info] Length: ${length}, Charset: ${BASE} chars`); 132 158 console.log(`[Shortcode Info] Max possible combinations: ${maxCombinations.toString()}`); ··· 134 160 `[Shortcode Info] Domain prefix: ${domainPrefix}, URL core: ${urlCore}, Subdomain tail: ${tail}` 135 161 ); 136 162 137 - return out; 163 + return out; // Return the final encoded shortcode 138 164 } 139 165 166 + // Function to validate if a given shortcode is valid (contains only alphanumeric characters) 140 167 export function isValidShortcode(code: string): boolean { 141 168 return /^[0-9a-zA-Z]+$/.test(code); 142 169 } 143 170 171 + // Function to calculate the maximum number of possible combinations for a shortcode of a given length 144 172 export function getMaxCombinations(length: number): number { 145 - return Math.pow(BASE, length); 173 + return Math.pow(BASE, length); // BASE raised to the power of length 146 174 }