Small wget like mirroring utility.
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add seen assets tracking to prevent duplicate downloads

rektide 768901a9 f2f03323

+6
+5
src/scraper/scraper.ts
··· 4 4 import { getPageOutputPath } from "../url/paths.ts"; 5 5 import { extractResources } from "./html.ts"; 6 6 import { downloadAsset } from "./downloader.ts"; 7 + import { seenAssets } from "./seen.ts"; 7 8 8 9 async function scrapePage( 9 10 url: URL, ··· 84 85 } 85 86 86 87 for (const asset of assets) { 88 + if (seenAssets.has(asset)) continue; 89 + 90 + seenAssets.add(asset); 91 + 87 92 try { 88 93 await downloadAsset(new URL(asset), outputDir, overwrite, stripHost, replacementHost); 89 94 } catch {}
+1
src/scraper/seen.ts
··· 1 + export const seenAssets = new Set<string>();