Rewild Your Web
web
browser
dweb
1// SPDX-License-Identifier: AGPL-3.0-or-later
2
3// Update top_1M.csv from https://github.com/zakird/crux-top-lists
4
5const MAX_ENTRIES = 100000; // Limit to top 100K for performance
6const MAX_RESULTS = 10; // Maximum results to return
7
8export class TopSitesProvider {
9 constructor() {
10 this.name = "topsites";
11 this.icon = "trophy";
12 this.sites = []; // Array of {domain, url, rank}
13 this.loaded = false;
14 this.loading = null;
15 this.load();
16 }
17
18 // Load and parse the CSV file
19 async load() {
20 if (this.loaded) {
21 return;
22 }
23 if (this.loading) {
24 return this.loading;
25 }
26
27 this.loading = (async () => {
28 try {
29 // Use absolute URL so it works from any origin (homescreen, system, etc.)
30 const response = await fetch("//system.localhost:8888/providers/top_100K.txt");
31 const text = await response.text();
32 const lines = text.split("\n");
33
34 // Skip header line
35 for (let i = 1; i < lines.length && this.sites.length < MAX_ENTRIES; i++) {
36 const line = lines[i].trim();
37 if (!line) {
38 continue;
39 }
40
41 const commaIndex = line.lastIndexOf(",");
42 if (commaIndex === -1) {
43 continue;
44 }
45
46 const url = line.substring(0, commaIndex);
47 const rank = parseInt(line.substring(commaIndex + 1), 10);
48
49 // Extract domain from URL
50 try {
51 const urlObj = new URL(url);
52 const domain = urlObj.hostname;
53
54 this.sites.push({
55 domain: domain.toLowerCase(),
56 url: url,
57 rank: rank,
58 });
59 } catch {
60 // Skip invalid URLs
61 }
62 }
63
64 // Sort by rank (lower is better)
65 this.sites.sort((a, b) => a.rank - b.rank);
66
67 this.loaded = true;
68 console.log(`TopSitesProvider loaded ${this.sites.length} sites`);
69 } catch (e) {
70 console.error("Failed to load top sites:", e);
71 }
72 })();
73
74 return this.loading;
75 }
76
77 async query(text) {
78 if (!text || text.trim() === "") {
79 return [];
80 }
81
82 // Ensure data is loaded
83 await this.load();
84
85 const query = text.toLowerCase().trim();
86 const results = [];
87
88 for (const site of this.sites) {
89 const domain = site.domain;
90
91 // Check for prefix match (higher score)
92 const prefixMatch = domain.startsWith(query) ||
93 domain.startsWith("www." + query) ||
94 domain.substring(domain.indexOf(".") + 1).startsWith(query);
95
96 // Check for substring match
97 const substringMatch = !prefixMatch && domain.includes(query);
98
99 if (prefixMatch || substringMatch) {
100 // Calculate score:
101 // - Prefix match gets bonus of 0.5
102 // - Popularity adds up to 0.5 based on rank (lower rank = higher score)
103 const matchBonus = prefixMatch ? 0.5 : 0;
104 const popularityScore = (1 - site.rank / 1000000) * 0.5;
105 const score = matchBonus + popularityScore;
106
107 results.push({
108 score: score,
109 kind: "link",
110 value: {
111 title: site.domain,
112 url: site.url,
113 },
114 });
115
116 // Stop early if we have enough results
117 if (results.length >= MAX_RESULTS * 3) {
118 break;
119 }
120 }
121 }
122
123 // Sort by score and limit results
124 results.sort((a, b) => b.score - a.score);
125 return results.slice(0, MAX_RESULTS);
126 }
127}