[READ-ONLY] a fast, modern browser for the npm registry
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix: strip more html from package description (#460)

Co-authored-by: Daniel Roe <daniel@roe.dev>

authored by

abeer0
Daniel Roe
and committed by
GitHub
af778fbe a74ef7f0

+255 -8
+39 -5
app/components/MarkdownText.vue
··· 3 3 text: string 4 4 /** When true, renders link text without the anchor tag (useful when inside another link) */ 5 5 plain?: boolean 6 + /** Package name to strip from the beginning of the description (if present) */ 7 + packageName?: string 6 8 }>() 7 9 8 - // Escape HTML to prevent XSS 9 - function escapeHtml(text: string): string { 10 - return text 10 + // Strip markdown image badges from text 11 + function stripMarkdownImages(text: string): string { 12 + // Remove linked images: [![alt](image-url)](link-url) - handles incomplete URLs too 13 + // Using {0,500} instead of * to prevent ReDoS on pathological inputs 14 + text = text.replace(/\[!\[[^\]]{0,500}\]\([^)]{0,2000}\)\]\([^)]{0,2000}\)?/g, '') 15 + // Remove standalone images: ![alt](url) 16 + text = text.replace(/!\[[^\]]{0,500}\]\([^)]{0,2000}\)/g, '') 17 + // Remove any leftover empty links or broken markdown link syntax 18 + text = text.replace(/\[\]\([^)]{0,2000}\)?/g, '') 19 + return text.trim() 20 + } 21 + 22 + // Strip HTML tags and escape remaining HTML to prevent XSS 23 + function stripAndEscapeHtml(text: string): string { 24 + // First strip markdown image badges 25 + let stripped = stripMarkdownImages(text) 26 + 27 + // Then strip actual HTML tags (keep their text content) 28 + // Only match tags that start with a letter or / (to avoid matching things like "a < b > c") 29 + stripped = stripped.replace(/<\/?[a-z][^>]*>/gi, '') 30 + 31 + if (props.packageName) { 32 + // Trim first to handle leading/trailing whitespace from stripped HTML 33 + stripped = stripped.trim() 34 + // Collapse multiple whitespace into single space 35 + stripped = stripped.replace(/\s+/g, ' ') 36 + // Escape special regex characters in package name 37 + const escapedName = props.packageName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') 38 + // Match package name at the start, optionally followed by: space, dash, colon, hyphen, or just space 39 + const namePattern = new RegExp(`^${escapedName}\\s*[-:—]?\\s*`, 'i') 40 + stripped = stripped.replace(namePattern, '').trim() 41 + } 42 + 43 + // Then escape any remaining HTML entities 44 + return stripped 11 45 .replace(/&/g, '&amp;') 12 46 .replace(/</g, '&lt;') 13 47 .replace(/>/g, '&gt;') ··· 19 53 function parseMarkdown(text: string): string { 20 54 if (!text) return '' 21 55 22 - // First escape HTML 23 - let html = escapeHtml(text) 56 + // First strip HTML tags and escape remaining HTML 57 + let html = stripAndEscapeHtml(text) 24 58 25 59 // Bold: **text** or __text__ 26 60 html = html.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>')
+1 -1
app/pages/[...package].vue
··· 493 493 <!-- Description container with min-height to prevent CLS --> 494 494 <div class="max-w-2xl min-h-[4.5rem]"> 495 495 <p v-if="pkg.description" class="text-fg-muted text-base m-0"> 496 - <MarkdownText :text="pkg.description" /> 496 + <MarkdownText :text="pkg.description" :package-name="pkg.name" /> 497 497 </p> 498 498 <p v-else class="text-fg-subtle text-base m-0 italic"> 499 499 {{ $t('package.no_description') }}
+215 -2
test/nuxt/components/MarkdownText.spec.ts
··· 20 20 }) 21 21 22 22 describe('HTML escaping', () => { 23 - it('escapes HTML tags to prevent XSS', async () => { 23 + it('strips HTML tags to prevent XSS', async () => { 24 24 const component = await mountSuspended(MarkdownText, { 25 25 props: { text: '<script>alert("xss")</script>' }, 26 26 }) 27 + // HTML tags should be stripped (not rendered) 27 28 expect(component.html()).not.toContain('<script>') 28 - expect(component.text()).toContain('<script>') 29 + // Only the text content remains 30 + expect(component.text()).toBe('alert("xss")') 29 31 }) 30 32 31 33 it('escapes special characters', async () => { ··· 200 202 expect(component.find('strong').exists()).toBe(true) 201 203 expect(component.find('em').exists()).toBe(true) 202 204 expect(component.find('code').exists()).toBe(true) 205 + }) 206 + }) 207 + 208 + describe('markdown image stripping', () => { 209 + it('strips standalone markdown images', async () => { 210 + const component = await mountSuspended(MarkdownText, { 211 + props: { text: '![badge](https://img.shields.io/badge.svg) A library' }, 212 + }) 213 + expect(component.text()).toBe('A library') 214 + }) 215 + 216 + it('strips linked markdown images (badges)', async () => { 217 + const component = await mountSuspended(MarkdownText, { 218 + props: { 219 + text: '[![Build Status](https://travis-ci.org/user/repo.svg)](https://travis-ci.org/user/repo) A library', 220 + }, 221 + }) 222 + expect(component.text()).toBe('A library') 223 + }) 224 + 225 + it('strips multiple badges', async () => { 226 + const component = await mountSuspended(MarkdownText, { 227 + props: { 228 + text: '[![npm](https://badge.svg)](https://npm.com) [![build](https://ci.svg)](https://ci.com) A library', 229 + }, 230 + }) 231 + expect(component.text()).toBe('A library') 232 + }) 233 + 234 + it('preserves malformed image syntax without closing paren', async () => { 235 + // Incomplete/malformed markdown images are left as-is for safety 236 + const component = await mountSuspended(MarkdownText, { 237 + props: { text: '![badge](https://example.svg A library' }, 238 + }) 239 + // The image syntax is not stripped because it's malformed (no closing paren) 240 + expect(component.text()).toBe('![badge](https://example.svg A library') 241 + }) 242 + 243 + it('strips empty link syntax', async () => { 244 + const component = await mountSuspended(MarkdownText, { 245 + props: { text: '[](https://example.com) A library' }, 246 + }) 247 + expect(component.text()).toBe('A library') 248 + }) 249 + 250 + it('preserves regular markdown links', async () => { 251 + const component = await mountSuspended(MarkdownText, { 252 + props: { text: '[documentation](https://docs.example.com) is here' }, 253 + }) 254 + const link = component.find('a') 255 + expect(link.exists()).toBe(true) 256 + expect(link.text()).toBe('documentation') 257 + expect(component.text()).toBe('documentation is here') 258 + }) 259 + }) 260 + 261 + describe('packageName prop', () => { 262 + it('strips package name from the beginning of plain text', async () => { 263 + const component = await mountSuspended(MarkdownText, { 264 + props: { 265 + text: 'my-package - A great library', 266 + packageName: 'my-package', 267 + }, 268 + }) 269 + expect(component.text()).toBe('A great library') 270 + }) 271 + 272 + it('strips package name with colon separator', async () => { 273 + const component = await mountSuspended(MarkdownText, { 274 + props: { 275 + text: 'my-package: A great library', 276 + packageName: 'my-package', 277 + }, 278 + }) 279 + expect(component.text()).toBe('A great library') 280 + }) 281 + 282 + it('strips package name with em dash separator', async () => { 283 + const component = await mountSuspended(MarkdownText, { 284 + props: { 285 + text: 'my-package — A great library', 286 + packageName: 'my-package', 287 + }, 288 + }) 289 + expect(component.text()).toBe('A great library') 290 + }) 291 + 292 + it('strips package name without separator', async () => { 293 + const component = await mountSuspended(MarkdownText, { 294 + props: { 295 + text: 'my-package A great library', 296 + packageName: 'my-package', 297 + }, 298 + }) 299 + expect(component.text()).toBe('A great library') 300 + }) 301 + 302 + it('is case-insensitive', async () => { 303 + const component = await mountSuspended(MarkdownText, { 304 + props: { 305 + text: 'MY-PACKAGE - A great library', 306 + packageName: 'my-package', 307 + }, 308 + }) 309 + expect(component.text()).toBe('A great library') 310 + }) 311 + 312 + it('does not strip package name from middle of text', async () => { 313 + const component = await mountSuspended(MarkdownText, { 314 + props: { 315 + text: 'A great my-package library', 316 + packageName: 'my-package', 317 + }, 318 + }) 319 + expect(component.text()).toBe('A great my-package library') 320 + }) 321 + 322 + it('handles scoped package names', async () => { 323 + const component = await mountSuspended(MarkdownText, { 324 + props: { 325 + text: '@org/my-package - A great library', 326 + packageName: '@org/my-package', 327 + }, 328 + }) 329 + expect(component.text()).toBe('A great library') 330 + }) 331 + 332 + it('handles package names with special regex characters', async () => { 333 + const component = await mountSuspended(MarkdownText, { 334 + props: { 335 + text: 'pkg.name+test - A great library', 336 + packageName: 'pkg.name+test', 337 + }, 338 + }) 339 + expect(component.text()).toBe('A great library') 340 + }) 341 + 342 + it('strips package name from HTML-containing descriptions', async () => { 343 + const component = await mountSuspended(MarkdownText, { 344 + props: { 345 + text: '<b>my-package</b> - A great library', 346 + packageName: 'my-package', 347 + }, 348 + }) 349 + expect(component.text()).toBe('A great library') 350 + }) 351 + 352 + it('strips package name from descriptions with markdown images', async () => { 353 + const component = await mountSuspended(MarkdownText, { 354 + props: { 355 + text: '![badge](https://badge.svg) my-package - A great library', 356 + packageName: 'my-package', 357 + }, 358 + }) 359 + expect(component.text()).toBe('A great library') 360 + }) 361 + 362 + it('does nothing when packageName is not provided', async () => { 363 + const component = await mountSuspended(MarkdownText, { 364 + props: { 365 + text: 'my-package - A great library', 366 + }, 367 + }) 368 + expect(component.text()).toBe('my-package - A great library') 369 + }) 370 + }) 371 + 372 + describe('HTML tag stripping', () => { 373 + it('strips simple HTML tags but keeps content', async () => { 374 + const component = await mountSuspended(MarkdownText, { 375 + props: { text: '<b>bold text</b> here' }, 376 + }) 377 + expect(component.text()).toBe('bold text here') 378 + expect(component.html()).not.toContain('<b>') 379 + }) 380 + 381 + it('strips nested HTML tags', async () => { 382 + const component = await mountSuspended(MarkdownText, { 383 + props: { text: '<div><span>nested</span> content</div>' }, 384 + }) 385 + expect(component.text()).toBe('nested content') 386 + }) 387 + 388 + it('strips self-closing tags', async () => { 389 + const component = await mountSuspended(MarkdownText, { 390 + props: { text: 'before<br/>after' }, 391 + }) 392 + expect(component.text()).toBe('beforeafter') 393 + }) 394 + 395 + it('strips tags with attributes', async () => { 396 + const component = await mountSuspended(MarkdownText, { 397 + props: { text: '<a href="https://evil.com">click me</a>' }, 398 + }) 399 + expect(component.text()).toBe('click me') 400 + expect(component.find('a').exists()).toBe(false) 401 + }) 402 + 403 + it('preserves text that looks like comparison operators', async () => { 404 + const component = await mountSuspended(MarkdownText, { 405 + props: { text: 'x < y > z and a < b && c > d' }, 406 + }) 407 + expect(component.text()).toBe('x < y > z and a < b && c > d') 408 + }) 409 + 410 + it('handles mixed HTML and markdown', async () => { 411 + const component = await mountSuspended(MarkdownText, { 412 + props: { text: '<b>bold</b> and **also bold**' }, 413 + }) 414 + expect(component.text()).toBe('bold and also bold') 415 + expect(component.find('strong').exists()).toBe(true) 203 416 }) 204 417 }) 205 418 })