A container registry that uses the AT Protocol for manifest storage and S3 for blob storage.
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

better logic for relative urls

+92 -9
+50 -9
pkg/appview/readme/fetcher.go
··· 7 7 "io" 8 8 "net/http" 9 9 "net/url" 10 + "regexp" 10 11 "strings" 11 12 "time" 12 13 ··· 192 193 return f.renderMarkdown(content, "") 193 194 } 194 195 196 + // Regex patterns for matching relative URLs that need rewriting 197 + // These match src="..." or href="..." where the URL is relative (not absolute, not data:, not #anchor) 198 + var ( 199 + // Match src="filename" where filename doesn't start with http://, https://, //, /, #, data:, or mailto: 200 + relativeSrcPattern = regexp.MustCompile(`src="([^"/:][^"]*)"`) 201 + // Match href="filename" where filename doesn't start with http://, https://, //, /, #, data:, or mailto: 202 + relativeHrefPattern = regexp.MustCompile(`href="([^"/:][^"]*)"`) 203 + ) 204 + 195 205 // rewriteRelativeURLs converts relative URLs to absolute URLs 196 206 func rewriteRelativeURLs(html, baseURL string) string { 197 207 if baseURL == "" { ··· 203 213 return html 204 214 } 205 215 206 - // Simple string replacement for common patterns 207 - // This is a basic implementation - for production, consider using an HTML parser 216 + // Handle root-relative URLs (starting with /) first 217 + // Must be done before bare relative URLs to avoid double-processing 218 + if base.Scheme != "" && base.Host != "" { 219 + root := fmt.Sprintf("%s://%s/", base.Scheme, base.Host) 220 + // Replace src="/" and href="/" but not src="//" (protocol-relative URLs) 221 + html = strings.ReplaceAll(html, `src="/`, fmt.Sprintf(`src="%s`, root)) 222 + html = strings.ReplaceAll(html, `href="/`, fmt.Sprintf(`href="%s`, root)) 223 + } 224 + 225 + // Handle explicit relative paths (./something and ../something) 208 226 html = strings.ReplaceAll(html, `src="./`, fmt.Sprintf(`src="%s`, baseURL)) 209 227 html = strings.ReplaceAll(html, `href="./`, fmt.Sprintf(`href="%s`, baseURL)) 210 228 html = strings.ReplaceAll(html, `src="../`, fmt.Sprintf(`src="%s../`, baseURL)) 211 229 html = strings.ReplaceAll(html, `href="../`, fmt.Sprintf(`href="%s../`, baseURL)) 212 230 213 - // Handle root-relative URLs (starting with /) 214 - if base.Scheme != "" && base.Host != "" { 215 - root := fmt.Sprintf("%s://%s/", base.Scheme, base.Host) 216 - // Replace src="/" and href="/" but not src="//" (absolute URLs) 217 - html = strings.ReplaceAll(html, `src="/`, fmt.Sprintf(`src="%s`, root)) 218 - html = strings.ReplaceAll(html, `href="/`, fmt.Sprintf(`href="%s`, root)) 219 - } 231 + // Handle bare relative URLs (e.g., src="image.png" without ./ prefix) 232 + // Skip URLs that are already absolute (start with http://, https://, or //) 233 + // Skip anchors (#), data URLs (data:), and mailto links 234 + html = relativeSrcPattern.ReplaceAllStringFunc(html, func(match string) string { 235 + // Extract the URL from src="..." 236 + url := match[5 : len(match)-1] // Remove 'src="' and '"' 237 + 238 + // Skip if already processed or is a special URL type 239 + if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") || 240 + strings.HasPrefix(url, "//") || strings.HasPrefix(url, "#") || 241 + strings.HasPrefix(url, "data:") || strings.HasPrefix(url, "mailto:") { 242 + return match 243 + } 244 + 245 + return fmt.Sprintf(`src="%s%s"`, baseURL, url) 246 + }) 247 + 248 + html = relativeHrefPattern.ReplaceAllStringFunc(html, func(match string) string { 249 + // Extract the URL from href="..." 250 + url := match[6 : len(match)-1] // Remove 'href="' and '"' 251 + 252 + // Skip if already processed or is a special URL type 253 + if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") || 254 + strings.HasPrefix(url, "//") || strings.HasPrefix(url, "#") || 255 + strings.HasPrefix(url, "data:") || strings.HasPrefix(url, "mailto:") { 256 + return match 257 + } 258 + 259 + return fmt.Sprintf(`href="%s%s"`, baseURL, url) 260 + }) 220 261 221 262 return html 222 263 }
+42
pkg/appview/readme/fetcher_test.go
··· 145 145 baseURL: "https://example.com/docs/", 146 146 expected: `<img src="https://example.com//cdn.example.com/image.png">`, 147 147 }, 148 + { 149 + name: "bare relative src (no ./ prefix)", 150 + html: `<img src="image.png">`, 151 + baseURL: "https://example.com/docs/", 152 + expected: `<img src="https://example.com/docs/image.png">`, 153 + }, 154 + { 155 + name: "bare relative href (no ./ prefix)", 156 + html: `<a href="page.html">link</a>`, 157 + baseURL: "https://example.com/docs/", 158 + expected: `<a href="https://example.com/docs/page.html">link</a>`, 159 + }, 160 + { 161 + name: "bare relative with path", 162 + html: `<img src="images/logo.png">`, 163 + baseURL: "https://example.com/docs/", 164 + expected: `<img src="https://example.com/docs/images/logo.png">`, 165 + }, 166 + { 167 + name: "anchor links unchanged", 168 + html: `<a href="#section">link</a>`, 169 + baseURL: "https://example.com/docs/", 170 + expected: `<a href="#section">link</a>`, 171 + }, 172 + { 173 + name: "data URLs unchanged", 174 + html: `<img src="data:image/png;base64,abc123">`, 175 + baseURL: "https://example.com/docs/", 176 + expected: `<img src="data:image/png;base64,abc123">`, 177 + }, 178 + { 179 + name: "mailto links unchanged", 180 + html: `<a href="mailto:test@example.com">email</a>`, 181 + baseURL: "https://example.com/docs/", 182 + expected: `<a href="mailto:test@example.com">email</a>`, 183 + }, 184 + { 185 + name: "mixed bare and prefixed relative URLs", 186 + html: `<img src="slices_and_lucy.png"><a href="./other.md">link</a>`, 187 + baseURL: "https://github.com/user/repo/blob/main/", 188 + expected: `<img src="https://github.com/user/repo/blob/main/slices_and_lucy.png"><a href="https://github.com/user/repo/blob/main/other.md">link</a>`, 189 + }, 148 190 } 149 191 150 192 for _, tt := range tests {