data endpoint for entity 90008 (aka. a website)
1import { env } from '$env/dynamic/private';
2import { get, writable } from 'svelte/store';
3import { type Robot } from 'robots-parser';
4import robotsParser from 'robots-parser';
5import { PUBLIC_BASE_URL } from '$env/static/public';
6import { darkVisitors } from './darkvisitors';
7import { AgentType } from '@darkvisitors/sdk';
8
9const cachedParsedRobots = writable<Robot | null>(null);
10const cachedRobots = writable<string>('');
11const lastFetched = writable<number>(Date.now());
12
13const fetchRobotsTxt = async () => {
14 try {
15 const robotsTxt = await darkVisitors.generateRobotsTxt([
16 AgentType.AIAgent,
17 AgentType.AIAssistant,
18 AgentType.AIDataScraper,
19 AgentType.AISearchCrawler,
20 AgentType.UndocumentedAIAgent,
21 AgentType.SEOCrawler
22 ]);
23 lastFetched.set(Date.now());
24 return robotsTxt.replace("User-agent: Claude-User\nDisallow: /", "");
25 } catch (error) {
26 console.error('failed to fetch robots.txt:', error);
27 return '';
28 }
29};
30
31export const getRobotsTxt = async () => {
32 let robotsTxt = get(cachedRobots);
33 if (robotsTxt.length === 0 || Date.now() - get(lastFetched) > 1000 * 60 * 60 * 24) {
34 robotsTxt = await fetchRobotsTxt();
35 cachedRobots.set(robotsTxt);
36 cachedParsedRobots.set(robotsParser(`${PUBLIC_BASE_URL}/robots.txt`, robotsTxt));
37 }
38 return robotsTxt;
39};
40
41export const testUa = async (url: string, ua: string) => {
42 if (ua.length === 0) return false;
43 let parsedRobots = get(cachedParsedRobots);
44 if (parsedRobots === null) {
45 parsedRobots = robotsParser(`${PUBLIC_BASE_URL}/robots.txt`, await getRobotsTxt());
46 cachedParsedRobots.set(parsedRobots);
47 }
48 return parsedRobots.isAllowed(url, ua);
49};