(READ ONLY) Margin is an open annotation layer for the internet. Powered by the AT Protocol. margin.at
extension web atproto comments
99
fork

Configure Feed

Select the types of activity you want to include in your feed.

Introducing Margin Discover

scanash00 3ad4cf4a 511a6643

+2483 -7
+35 -1
backend/cmd/server/main.go
··· 15 15 16 16 "margin.at/internal/api" 17 17 "margin.at/internal/db" 18 + "margin.at/internal/embeddings" 18 19 "margin.at/internal/firehose" 19 20 "margin.at/internal/logger" 20 21 internalMiddleware "margin.at/internal/middleware" 21 22 "margin.at/internal/oauth" 23 + "margin.at/internal/recommendations" 22 24 "margin.at/internal/sync" 23 25 ) 24 26 ··· 34 36 if err := database.Migrate(); err != nil { 35 37 logger.Fatal("Failed to run migrations: %v", err) 36 38 } 39 + 40 + embeddingClient := embeddings.NewClient() 41 + if err := database.MigrateRecommendations(); err != nil { 42 + logger.Fatal("Failed to run recommendation migrations: %v", err) 43 + } 44 + recService := recommendations.NewService(database, embeddingClient) 45 + logger.Info("Recommendation engine initialized (embeddings enabled: %v)", embeddingClient.IsEnabled()) 37 46 38 47 syncSvc := sync.NewService(database) 39 48 ··· 46 55 firehose.RelayURL = getEnv("BLOCK_RELAY_URL", "wss://jetstream2.us-east.bsky.network/subscribe") 47 56 logger.Info("Firehose URL: %s", firehose.RelayURL) 48 57 58 + if recService.IsEnabled() { 59 + ingester.SetOnAnnotation(recService.OnAnnotation) 60 + ingester.SetOnDocument(recService.OnDocument) 61 + 62 + go func() { 63 + logger.Info("Starting recommendation backfill...") 64 + if err := recService.BackfillDocumentEmbeddings(200); err != nil { 65 + logger.Error("Document embedding backfill error: %v", err) 66 + } 67 + annCount, err := recService.BackfillAnnotationEmbeddings(200) 68 + if err != nil { 69 + logger.Error("Annotation embedding backfill error: %v", err) 70 + } 71 + hlCount, err := recService.BackfillHighlightEmbeddings(200) 72 + if err != nil { 73 + logger.Error("Highlight embedding backfill error: %v", err) 74 + } 75 + profileCount, err := recService.RebuildAllProfiles() 76 + if err != nil { 77 + logger.Error("Profile rebuild error: %v", err) 78 + } 79 + logger.Info("Recommendation backfill complete (annotations: %d, highlights: %d, profiles: %d)", annCount, hlCount, profileCount) 80 + }() 81 + } 82 + 49 83 go func() { 50 84 if err := ingester.Start(context.Background()); err != nil { 51 85 logger.Error("Firehose ingester error: %v", err) ··· 73 107 tokenRefresher := api.NewTokenRefresher(database, oauthHandler.GetPrivateKey()) 74 108 annotationSvc := api.NewAnnotationService(database, tokenRefresher) 75 109 76 - handler := api.NewHandler(database, annotationSvc, tokenRefresher, syncSvc) 110 + handler := api.NewHandler(database, annotationSvc, tokenRefresher, syncSvc, recService) 77 111 handler.RegisterRoutes(r) 78 112 79 113 r.Post("/api/annotations", annotationSvc.CreateAnnotation)
+168 -1
backend/internal/api/handler.go
··· 14 14 15 15 "github.com/go-chi/chi/v5" 16 16 17 + "margin.at/internal/config" 17 18 "margin.at/internal/db" 18 19 "margin.at/internal/logger" 20 + "margin.at/internal/recommendations" 19 21 internal_sync "margin.at/internal/sync" 20 22 "margin.at/internal/xrpc" 21 23 ) ··· 27 29 apiKeys *APIKeyHandler 28 30 syncService *internal_sync.Service 29 31 moderation *ModerationHandler 32 + recommendations *recommendations.Service 30 33 } 31 34 32 - func NewHandler(database *db.DB, annotationService *AnnotationService, refresher *TokenRefresher, syncService *internal_sync.Service) *Handler { 35 + func NewHandler(database *db.DB, annotationService *AnnotationService, refresher *TokenRefresher, syncService *internal_sync.Service, recService *recommendations.Service) *Handler { 33 36 return &Handler{ 34 37 db: database, 35 38 annotationService: annotationService, ··· 37 40 apiKeys: NewAPIKeyHandler(database, refresher), 38 41 syncService: syncService, 39 42 moderation: NewModerationHandler(database, refresher), 43 + recommendations: recService, 40 44 } 41 45 } 42 46 ··· 80 84 81 85 r.Get("/trending-tags", h.HandleGetTrendingTags) 82 86 r.Get("/search", h.Search) 87 + r.Get("/recommendations", h.GetRecommendations) 88 + r.Get("/documents", h.GetDocuments) 89 + r.Post("/admin/backfill", h.AdminBackfill) 83 90 84 91 r.Get("/replies", h.GetReplies) 85 92 r.Get("/likes", h.GetLikeCount) ··· 1516 1523 "fetchedCount": len(feed), 1517 1524 }) 1518 1525 } 1526 + 1527 + func (h *Handler) GetRecommendations(w http.ResponseWriter, r *http.Request) { 1528 + viewerDID := h.getViewerDID(r) 1529 + if viewerDID == "" { 1530 + http.Error(w, "authentication required", http.StatusUnauthorized) 1531 + return 1532 + } 1533 + 1534 + if !h.recommendations.IsEnabled() { 1535 + http.Error(w, "recommendations not available", http.StatusServiceUnavailable) 1536 + return 1537 + } 1538 + 1539 + limit := parseIntParam(r, "limit", 20) 1540 + if limit > 100 { 1541 + limit = 100 1542 + } 1543 + 1544 + items, err := h.recommendations.GetRecommendations(viewerDID, limit) 1545 + if err != nil { 1546 + logger.Error("Recommendations error for %s: %v", viewerDID, err) 1547 + http.Error(w, "failed to get recommendations", http.StatusInternalServerError) 1548 + return 1549 + } 1550 + 1551 + if items == nil { 1552 + items = []recommendations.RecommendedItem{} 1553 + } 1554 + 1555 + w.Header().Set("Content-Type", "application/json") 1556 + json.NewEncoder(w).Encode(map[string]interface{}{ 1557 + "items": items, 1558 + "totalItems": len(items), 1559 + }) 1560 + } 1561 + 1562 + func (h *Handler) GetDocuments(w http.ResponseWriter, r *http.Request) { 1563 + limit := parseIntParam(r, "limit", 30) 1564 + if limit > 100 { 1565 + limit = 100 1566 + } 1567 + offset := parseIntParam(r, "offset", 0) 1568 + sort := r.URL.Query().Get("sort") 1569 + 1570 + var docs []db.Document 1571 + var err error 1572 + 1573 + switch sort { 1574 + case "popular": 1575 + docs, err = h.db.GetPopularDocuments(limit, offset) 1576 + default: 1577 + docs, err = h.db.GetRecentDocuments(limit, offset) 1578 + } 1579 + 1580 + if err != nil { 1581 + logger.Error("GetDocuments error: %v", err) 1582 + http.Error(w, "failed to get documents", http.StatusInternalServerError) 1583 + return 1584 + } 1585 + 1586 + if docs == nil { 1587 + docs = []db.Document{} 1588 + } 1589 + 1590 + type DocumentResponse struct { 1591 + URI string `json:"uri"` 1592 + AuthorDID string `json:"authorDid"` 1593 + Site string `json:"site"` 1594 + Path *string `json:"path,omitempty"` 1595 + Title string `json:"title"` 1596 + Description *string `json:"description,omitempty"` 1597 + Tags []string `json:"tags,omitempty"` 1598 + CanonicalURL string `json:"canonicalUrl"` 1599 + PublishedAt time.Time `json:"publishedAt"` 1600 + } 1601 + 1602 + items := make([]DocumentResponse, len(docs)) 1603 + for i, d := range docs { 1604 + var tags []string 1605 + if d.TagsJSON != nil { 1606 + json.Unmarshal([]byte(*d.TagsJSON), &tags) 1607 + } 1608 + items[i] = DocumentResponse{ 1609 + URI: d.URI, 1610 + AuthorDID: d.AuthorDID, 1611 + Site: d.Site, 1612 + Path: d.Path, 1613 + Title: d.Title, 1614 + Description: d.Description, 1615 + Tags: tags, 1616 + CanonicalURL: d.CanonicalURL, 1617 + PublishedAt: d.PublishedAt, 1618 + } 1619 + } 1620 + 1621 + total, _ := h.db.GetDocumentCount() 1622 + 1623 + w.Header().Set("Content-Type", "application/json") 1624 + json.NewEncoder(w).Encode(map[string]interface{}{ 1625 + "items": items, 1626 + "totalItems": total, 1627 + }) 1628 + } 1629 + 1630 + func (h *Handler) AdminBackfill(w http.ResponseWriter, r *http.Request) { 1631 + session, err := h.refresher.GetSessionWithAutoRefresh(r) 1632 + if err != nil || session == nil { 1633 + http.Error(w, "authentication required", http.StatusUnauthorized) 1634 + return 1635 + } 1636 + if !config.Get().IsAdmin(session.DID) { 1637 + http.Error(w, "admin access required", http.StatusForbidden) 1638 + return 1639 + } 1640 + if !h.recommendations.IsEnabled() { 1641 + http.Error(w, "embeddings not enabled (set OPENAI_API_KEY)", http.StatusServiceUnavailable) 1642 + return 1643 + } 1644 + 1645 + batchSize := parseIntParam(r, "batch", 100) 1646 + 1647 + type result struct { 1648 + Documents int `json:"documents"` 1649 + Annotations int `json:"annotations"` 1650 + ProfilesRebuilt int `json:"profilesRebuilt"` 1651 + Error string `json:"error,omitempty"` 1652 + } 1653 + res := result{} 1654 + 1655 + if err := h.recommendations.BackfillDocumentEmbeddings(batchSize); err != nil { 1656 + logger.Error("Document backfill error: %v", err) 1657 + res.Error = err.Error() 1658 + } 1659 + 1660 + annCount, err := h.recommendations.BackfillAnnotationEmbeddings(batchSize) 1661 + if err != nil { 1662 + logger.Error("Annotation backfill error: %v", err) 1663 + if res.Error != "" { 1664 + res.Error += "; " 1665 + } 1666 + res.Error += err.Error() 1667 + } 1668 + res.Annotations = annCount 1669 + 1670 + profileCount, err := h.recommendations.RebuildAllProfiles() 1671 + if err != nil { 1672 + logger.Error("Profile rebuild error: %v", err) 1673 + if res.Error != "" { 1674 + res.Error += "; " 1675 + } 1676 + res.Error += err.Error() 1677 + } 1678 + res.ProfilesRebuilt = profileCount 1679 + 1680 + docCount, _ := h.db.GetDocumentCount() 1681 + res.Documents = docCount 1682 + 1683 + w.Header().Set("Content-Type", "application/json") 1684 + json.NewEncoder(w).Encode(res) 1685 + }
+30
backend/internal/db/db.go
··· 498 498 db.Exec(`CREATE INDEX IF NOT EXISTS idx_content_labels_uri ON content_labels(uri)`) 499 499 db.Exec(`CREATE INDEX IF NOT EXISTS idx_content_labels_src ON content_labels(src)`) 500 500 501 + db.Exec(`CREATE TABLE IF NOT EXISTS publications ( 502 + uri TEXT PRIMARY KEY, 503 + author_did TEXT NOT NULL, 504 + url TEXT NOT NULL, 505 + name TEXT NOT NULL, 506 + description TEXT, 507 + show_in_discover BOOLEAN NOT NULL DEFAULT true, 508 + indexed_at ` + dateType + ` NOT NULL 509 + )`) 510 + db.Exec(`CREATE INDEX IF NOT EXISTS idx_publications_author ON publications(author_did)`) 511 + db.Exec(`CREATE INDEX IF NOT EXISTS idx_publications_url ON publications(url)`) 512 + 513 + db.Exec(`CREATE TABLE IF NOT EXISTS documents ( 514 + uri TEXT PRIMARY KEY, 515 + author_did TEXT NOT NULL, 516 + site TEXT NOT NULL, 517 + path TEXT, 518 + title TEXT NOT NULL, 519 + description TEXT, 520 + text_content TEXT, 521 + tags_json TEXT, 522 + canonical_url TEXT, 523 + published_at ` + dateType + ` NOT NULL, 524 + indexed_at ` + dateType + ` NOT NULL 525 + )`) 526 + db.Exec(`CREATE INDEX IF NOT EXISTS idx_documents_author ON documents(author_did)`) 527 + db.Exec(`CREATE INDEX IF NOT EXISTS idx_documents_site ON documents(site)`) 528 + db.Exec(`CREATE INDEX IF NOT EXISTS idx_documents_canonical ON documents(canonical_url)`) 529 + db.Exec(`CREATE INDEX IF NOT EXISTS idx_documents_published ON documents(published_at DESC)`) 530 + 501 531 db.runMigrations() 502 532 503 533 return nil
+502
backend/internal/db/queries_recommendations.go
··· 1 + package db 2 + 3 + import ( 4 + "encoding/json" 5 + "fmt" 6 + "strconv" 7 + "strings" 8 + "time" 9 + ) 10 + 11 + type Document struct { 12 + URI string `json:"uri"` 13 + AuthorDID string `json:"authorDid"` 14 + Site string `json:"site"` 15 + Path *string `json:"path,omitempty"` 16 + Title string `json:"title"` 17 + Description *string `json:"description,omitempty"` 18 + TextContent *string `json:"textContent,omitempty"` 19 + TagsJSON *string `json:"tags,omitempty"` 20 + CanonicalURL string `json:"canonicalUrl"` 21 + PublishedAt time.Time `json:"publishedAt"` 22 + IndexedAt time.Time `json:"indexedAt"` 23 + } 24 + 25 + type Publication struct { 26 + URI string `json:"uri"` 27 + AuthorDID string `json:"authorDid"` 28 + URL string `json:"url"` 29 + Name string `json:"name"` 30 + Description *string `json:"description,omitempty"` 31 + ShowInDiscover bool `json:"showInDiscover"` 32 + IndexedAt time.Time `json:"indexedAt"` 33 + } 34 + 35 + type DocumentEmbedding struct { 36 + DocumentURI string `json:"documentUri"` 37 + Embedding []float32 `json:"embedding"` 38 + UpdatedAt time.Time `json:"updatedAt"` 39 + } 40 + 41 + type AnnotationEmbedding struct { 42 + AnnotationURI string `json:"annotationUri"` 43 + AuthorDID string `json:"authorDid"` 44 + DocumentURI *string `json:"documentUri,omitempty"` 45 + Embedding []float32 `json:"embedding"` 46 + UpdatedAt time.Time `json:"updatedAt"` 47 + } 48 + 49 + type UserProfile struct { 50 + AuthorDID string `json:"authorDid"` 51 + Embedding []float32 `json:"embedding"` 52 + TagAffinities string `json:"tagAffinities"` 53 + AnnotationCount int `json:"annotationCount"` 54 + UpdatedAt time.Time `json:"updatedAt"` 55 + } 56 + 57 + func (db *DB) MigrateRecommendations() error { 58 + dateType := "TIMESTAMP" 59 + if db.driver == "sqlite3" { 60 + dateType = "DATETIME" 61 + } 62 + 63 + _, err := db.Exec(` 64 + CREATE TABLE IF NOT EXISTS document_embeddings ( 65 + document_uri TEXT PRIMARY KEY, 66 + embedding TEXT NOT NULL, 67 + updated_at ` + dateType + ` NOT NULL 68 + )`) 69 + if err != nil { 70 + return fmt.Errorf("create document_embeddings table: %w", err) 71 + } 72 + 73 + _, err = db.Exec(` 74 + CREATE TABLE IF NOT EXISTS annotation_embeddings ( 75 + annotation_uri TEXT PRIMARY KEY, 76 + author_did TEXT NOT NULL, 77 + document_uri TEXT, 78 + embedding TEXT NOT NULL, 79 + updated_at ` + dateType + ` NOT NULL 80 + )`) 81 + if err != nil { 82 + return fmt.Errorf("create annotation_embeddings table: %w", err) 83 + } 84 + db.Exec(`CREATE INDEX IF NOT EXISTS idx_ann_emb_author ON annotation_embeddings(author_did)`) 85 + db.Exec(`CREATE INDEX IF NOT EXISTS idx_ann_emb_document ON annotation_embeddings(document_uri)`) 86 + 87 + _, err = db.Exec(` 88 + CREATE TABLE IF NOT EXISTS user_profiles ( 89 + author_did TEXT PRIMARY KEY, 90 + embedding TEXT NOT NULL, 91 + tag_affinities TEXT DEFAULT '{}', 92 + annotation_count INTEGER NOT NULL DEFAULT 0, 93 + updated_at ` + dateType + ` NOT NULL 94 + )`) 95 + if err != nil { 96 + return fmt.Errorf("create user_profiles table: %w", err) 97 + } 98 + 99 + return nil 100 + } 101 + 102 + func (db *DB) UpsertPublication(p *Publication) error { 103 + query := ` 104 + INSERT INTO publications (uri, author_did, url, name, description, show_in_discover, indexed_at) 105 + VALUES ($1, $2, $3, $4, $5, $6, $7) 106 + ON CONFLICT(uri) DO UPDATE SET 107 + name = EXCLUDED.name, 108 + description = EXCLUDED.description, 109 + show_in_discover = EXCLUDED.show_in_discover, 110 + indexed_at = EXCLUDED.indexed_at 111 + ` 112 + _, err := db.Exec(query, p.URI, p.AuthorDID, p.URL, p.Name, p.Description, p.ShowInDiscover, p.IndexedAt) 113 + return err 114 + } 115 + 116 + func (db *DB) DeletePublication(uri string) error { 117 + _, err := db.Exec("DELETE FROM publications WHERE uri = $1", uri) 118 + return err 119 + } 120 + 121 + func (db *DB) GetPublicationByURL(url string) (*Publication, error) { 122 + var p Publication 123 + err := db.QueryRow( 124 + "SELECT uri, author_did, url, name, description, show_in_discover, indexed_at FROM publications WHERE url = $1", 125 + url, 126 + ).Scan(&p.URI, &p.AuthorDID, &p.URL, &p.Name, &p.Description, &p.ShowInDiscover, &p.IndexedAt) 127 + if err != nil { 128 + return nil, err 129 + } 130 + return &p, nil 131 + } 132 + 133 + func (db *DB) UpsertDocument(d *Document) error { 134 + query := ` 135 + INSERT INTO documents (uri, author_did, site, path, title, description, text_content, tags_json, canonical_url, published_at, indexed_at) 136 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) 137 + ON CONFLICT(uri) DO UPDATE SET 138 + title = EXCLUDED.title, 139 + description = EXCLUDED.description, 140 + text_content = EXCLUDED.text_content, 141 + tags_json = EXCLUDED.tags_json, 142 + canonical_url = EXCLUDED.canonical_url, 143 + indexed_at = EXCLUDED.indexed_at 144 + ` 145 + _, err := db.Exec(query, d.URI, d.AuthorDID, d.Site, d.Path, d.Title, d.Description, d.TextContent, d.TagsJSON, d.CanonicalURL, d.PublishedAt, d.IndexedAt) 146 + return err 147 + } 148 + 149 + func (db *DB) DeleteDocument(uri string) error { 150 + _, err := db.Exec("DELETE FROM documents WHERE uri = $1", uri) 151 + return err 152 + } 153 + 154 + func (db *DB) GetDocumentByCanonicalURL(canonicalURL string) (*Document, error) { 155 + var d Document 156 + err := db.QueryRow( 157 + `SELECT uri, author_did, site, path, title, description, text_content, tags_json, canonical_url, published_at, indexed_at 158 + FROM documents WHERE canonical_url = $1`, 159 + canonicalURL, 160 + ).Scan(&d.URI, &d.AuthorDID, &d.Site, &d.Path, &d.Title, &d.Description, &d.TextContent, &d.TagsJSON, &d.CanonicalURL, &d.PublishedAt, &d.IndexedAt) 161 + if err != nil { 162 + return nil, err 163 + } 164 + return &d, nil 165 + } 166 + 167 + func (db *DB) GetDocumentByURI(uri string) (*Document, error) { 168 + var d Document 169 + err := db.QueryRow( 170 + `SELECT uri, author_did, site, path, title, description, text_content, tags_json, canonical_url, published_at, indexed_at 171 + FROM documents WHERE uri = $1`, 172 + uri, 173 + ).Scan(&d.URI, &d.AuthorDID, &d.Site, &d.Path, &d.Title, &d.Description, &d.TextContent, &d.TagsJSON, &d.CanonicalURL, &d.PublishedAt, &d.IndexedAt) 174 + if err != nil { 175 + return nil, err 176 + } 177 + return &d, nil 178 + } 179 + 180 + func (db *DB) GetDocumentsWithoutEmbeddings(limit int) ([]Document, error) { 181 + rows, err := db.Query(db.Rebind(` 182 + SELECT d.uri, d.author_did, d.site, d.path, d.title, d.description, d.text_content, d.tags_json, d.canonical_url, d.published_at, d.indexed_at 183 + FROM documents d 184 + LEFT JOIN document_embeddings de ON d.uri = de.document_uri 185 + WHERE de.document_uri IS NULL 186 + ORDER BY d.indexed_at DESC 187 + LIMIT ? 188 + `), limit) 189 + if err != nil { 190 + return nil, err 191 + } 192 + defer rows.Close() 193 + return scanDocuments(rows) 194 + } 195 + 196 + func (db *DB) GetAnnotationsWithoutEmbeddings(limit int) ([]Annotation, error) { 197 + rows, err := db.Query(db.Rebind(` 198 + SELECT a.uri, a.author_did, a.motivation, a.body_value, a.body_format, a.body_uri, a.target_source, a.target_hash, a.target_title, a.selector_json, a.tags_json, a.created_at, a.indexed_at, a.cid 199 + FROM annotations a 200 + LEFT JOIN annotation_embeddings ae ON a.uri = ae.annotation_uri 201 + WHERE ae.annotation_uri IS NULL AND a.motivation IN ('commenting', 'highlighting') 202 + ORDER BY a.created_at DESC 203 + LIMIT ? 204 + `), limit) 205 + if err != nil { 206 + return nil, err 207 + } 208 + defer rows.Close() 209 + return scanAnnotations(rows) 210 + } 211 + 212 + type HighlightForEmbedding struct { 213 + URI string 214 + AuthorDID string 215 + TargetSource string 216 + TargetTitle *string 217 + SelectorJSON *string 218 + TagsJSON *string 219 + } 220 + 221 + func (db *DB) GetHighlightsWithoutEmbeddings(limit int) ([]HighlightForEmbedding, error) { 222 + rows, err := db.Query(db.Rebind(` 223 + SELECT h.uri, h.author_did, h.target_source, h.target_title, h.selector_json, h.tags_json 224 + FROM highlights h 225 + LEFT JOIN annotation_embeddings ae ON h.uri = ae.annotation_uri 226 + WHERE ae.annotation_uri IS NULL 227 + ORDER BY h.created_at DESC 228 + LIMIT ? 229 + `), limit) 230 + if err != nil { 231 + return nil, err 232 + } 233 + defer rows.Close() 234 + 235 + var results []HighlightForEmbedding 236 + for rows.Next() { 237 + var h HighlightForEmbedding 238 + if err := rows.Scan(&h.URI, &h.AuthorDID, &h.TargetSource, &h.TargetTitle, &h.SelectorJSON, &h.TagsJSON); err != nil { 239 + return nil, err 240 + } 241 + results = append(results, h) 242 + } 243 + return results, nil 244 + } 245 + 246 + func (db *DB) GetDistinctAnnotationAuthors() ([]string, error) { 247 + rows, err := db.Query(`SELECT DISTINCT author_did FROM annotation_embeddings`) 248 + if err != nil { 249 + return nil, err 250 + } 251 + defer rows.Close() 252 + var dids []string 253 + for rows.Next() { 254 + var did string 255 + if err := rows.Scan(&did); err != nil { 256 + return nil, err 257 + } 258 + dids = append(dids, did) 259 + } 260 + return dids, nil 261 + } 262 + 263 + func scanDocuments(rows interface { 264 + Next() bool 265 + Scan(...interface{}) error 266 + }) ([]Document, error) { 267 + var docs []Document 268 + for rows.Next() { 269 + var d Document 270 + if err := rows.Scan(&d.URI, &d.AuthorDID, &d.Site, &d.Path, &d.Title, &d.Description, &d.TextContent, &d.TagsJSON, &d.CanonicalURL, &d.PublishedAt, &d.IndexedAt); err != nil { 271 + return nil, err 272 + } 273 + docs = append(docs, d) 274 + } 275 + return docs, nil 276 + } 277 + 278 + func (db *DB) GetRecentDocuments(limit, offset int) ([]Document, error) { 279 + rows, err := db.Query(db.Rebind(` 280 + SELECT uri, author_did, site, path, title, description, text_content, tags_json, canonical_url, published_at, indexed_at 281 + FROM documents 282 + ORDER BY published_at DESC 283 + LIMIT ? OFFSET ? 284 + `), limit, offset) 285 + if err != nil { 286 + return nil, err 287 + } 288 + defer rows.Close() 289 + return scanDocuments(rows) 290 + } 291 + 292 + func (db *DB) GetPopularDocuments(limit, offset int) ([]Document, error) { 293 + rows, err := db.Query(db.Rebind(` 294 + SELECT d.uri, d.author_did, d.site, d.path, d.title, d.description, d.text_content, d.tags_json, d.canonical_url, d.published_at, d.indexed_at 295 + FROM documents d 296 + LEFT JOIN annotations a ON a.target_source = d.canonical_url 297 + GROUP BY d.uri 298 + ORDER BY COUNT(a.uri) DESC, d.published_at DESC 299 + LIMIT ? OFFSET ? 300 + `), limit, offset) 301 + if err != nil { 302 + return nil, err 303 + } 304 + defer rows.Close() 305 + return scanDocuments(rows) 306 + } 307 + 308 + func (db *DB) GetDocumentCount() (int, error) { 309 + var count int 310 + err := db.QueryRow("SELECT COUNT(*) FROM documents").Scan(&count) 311 + return count, err 312 + } 313 + 314 + func (db *DB) UpsertDocumentEmbedding(documentURI string, embedding []float32) error { 315 + vecStr := float32SliceToVectorString(embedding) 316 + _, err := db.Exec( 317 + `INSERT INTO document_embeddings (document_uri, embedding, updated_at) VALUES ($1, $2, $3) 318 + ON CONFLICT(document_uri) DO UPDATE SET embedding = EXCLUDED.embedding, updated_at = EXCLUDED.updated_at`, 319 + documentURI, vecStr, time.Now(), 320 + ) 321 + return err 322 + } 323 + 324 + func (db *DB) UpsertAnnotationEmbedding(annotationURI, authorDID string, documentURI *string, embedding []float32) error { 325 + vecStr := float32SliceToVectorString(embedding) 326 + _, err := db.Exec( 327 + `INSERT INTO annotation_embeddings (annotation_uri, author_did, document_uri, embedding, updated_at) VALUES ($1, $2, $3, $4, $5) 328 + ON CONFLICT(annotation_uri) DO UPDATE SET embedding = EXCLUDED.embedding, document_uri = EXCLUDED.document_uri, updated_at = EXCLUDED.updated_at`, 329 + annotationURI, authorDID, documentURI, vecStr, time.Now(), 330 + ) 331 + return err 332 + } 333 + 334 + func (db *DB) DeleteAnnotationEmbedding(annotationURI string) error { 335 + _, err := db.Exec("DELETE FROM annotation_embeddings WHERE annotation_uri = $1", annotationURI) 336 + return err 337 + } 338 + 339 + func (db *DB) UpsertUserProfile(authorDID string, embedding []float32, tagAffinities map[string]float64, annotationCount int) error { 340 + vecStr := float32SliceToVectorString(embedding) 341 + tagsJSON, _ := json.Marshal(tagAffinities) 342 + _, err := db.Exec( 343 + `INSERT INTO user_profiles (author_did, embedding, tag_affinities, annotation_count, updated_at) VALUES ($1, $2, $3, $4, $5) 344 + ON CONFLICT(author_did) DO UPDATE SET embedding = EXCLUDED.embedding, tag_affinities = EXCLUDED.tag_affinities, annotation_count = EXCLUDED.annotation_count, updated_at = EXCLUDED.updated_at`, 345 + authorDID, vecStr, string(tagsJSON), annotationCount, time.Now(), 346 + ) 347 + return err 348 + } 349 + 350 + func (db *DB) GetUserProfile(authorDID string) (*UserProfile, error) { 351 + var p UserProfile 352 + var embStr string 353 + err := db.QueryRow( 354 + `SELECT author_did, embedding, tag_affinities, annotation_count, updated_at FROM user_profiles WHERE author_did = $1`, 355 + authorDID, 356 + ).Scan(&p.AuthorDID, &embStr, &p.TagAffinities, &p.AnnotationCount, &p.UpdatedAt) 357 + if err != nil { 358 + return nil, err 359 + } 360 + p.Embedding = parseVectorString(embStr) 361 + return &p, nil 362 + } 363 + 364 + func (db *DB) GetAnnotationEmbeddingsByAuthor(authorDID string) ([]AnnotationEmbedding, error) { 365 + rows, err := db.Query( 366 + `SELECT annotation_uri, author_did, document_uri, embedding, updated_at FROM annotation_embeddings WHERE author_did = $1`, 367 + authorDID, 368 + ) 369 + if err != nil { 370 + return nil, err 371 + } 372 + defer rows.Close() 373 + 374 + var results []AnnotationEmbedding 375 + for rows.Next() { 376 + var ae AnnotationEmbedding 377 + var embStr string 378 + if err := rows.Scan(&ae.AnnotationURI, &ae.AuthorDID, &ae.DocumentURI, &embStr, &ae.UpdatedAt); err != nil { 379 + return nil, err 380 + } 381 + ae.Embedding = parseVectorString(embStr) 382 + results = append(results, ae) 383 + } 384 + return results, nil 385 + } 386 + 387 + func (db *DB) GetRecentAnnotationEmbeddingsByAuthor(authorDID string, limit int) ([]AnnotationEmbedding, error) { 388 + rows, err := db.Query( 389 + db.Rebind(`SELECT annotation_uri, author_did, document_uri, embedding, updated_at FROM annotation_embeddings WHERE author_did = ? ORDER BY updated_at DESC LIMIT ?`), 390 + authorDID, limit, 391 + ) 392 + if err != nil { 393 + return nil, err 394 + } 395 + defer rows.Close() 396 + 397 + var results []AnnotationEmbedding 398 + for rows.Next() { 399 + var ae AnnotationEmbedding 400 + var embStr string 401 + if err := rows.Scan(&ae.AnnotationURI, &ae.AuthorDID, &ae.DocumentURI, &embStr, &ae.UpdatedAt); err != nil { 402 + return nil, err 403 + } 404 + ae.Embedding = parseVectorString(embStr) 405 + results = append(results, ae) 406 + } 407 + return results, nil 408 + } 409 + 410 + type CandidateDocument struct { 411 + URI string `json:"uri"` 412 + AuthorDID string `json:"authorDid"` 413 + Site string `json:"site"` 414 + Path *string `json:"path,omitempty"` 415 + Title string `json:"title"` 416 + Description *string `json:"description,omitempty"` 417 + TagsJSON *string `json:"tags,omitempty"` 418 + CanonicalURL string `json:"canonicalUrl"` 419 + PublishedAt time.Time `json:"publishedAt"` 420 + Embedding []float32 `json:"-"` 421 + Engagement int `json:"engagement"` 422 + } 423 + 424 + func (db *DB) GetCandidateDocuments(userDID string, limit int) ([]CandidateDocument, error) { 425 + rows, err := db.Query(db.Rebind(` 426 + SELECT 427 + d.uri, d.author_did, d.site, d.path, d.title, d.description, d.tags_json, 428 + d.canonical_url, d.published_at, de.embedding, 429 + COALESCE(eng.cnt, 0) AS engagement 430 + FROM documents d 431 + JOIN document_embeddings de ON d.uri = de.document_uri 432 + LEFT JOIN ( 433 + SELECT document_uri, COUNT(DISTINCT author_did) AS cnt 434 + FROM annotation_embeddings 435 + WHERE document_uri IS NOT NULL 436 + GROUP BY document_uri 437 + ) eng ON eng.document_uri = d.uri 438 + LEFT JOIN publications p ON d.site = p.uri OR d.site = p.url 439 + WHERE d.author_did != ? 440 + AND (p.show_in_discover IS NULL OR p.show_in_discover = true) 441 + AND LENGTH(d.title) > 15 442 + AND (LENGTH(COALESCE(d.description, '')) >= 30 OR LENGTH(COALESCE(d.text_content, '')) >= 100) 443 + AND d.title !~* '(^test$|^test\\s|\\stest$|^testing|^hello\\sworld|^untitled|^draft|^asdf|^lorem|^foo$|^bar$|^placeholder)' 444 + AND d.uri NOT IN ( 445 + SELECT DISTINCT document_uri FROM annotation_embeddings 446 + WHERE author_did = ? AND document_uri IS NOT NULL 447 + ) 448 + ORDER BY d.published_at DESC 449 + LIMIT ? 450 + `), userDID, userDID, limit) 451 + if err != nil { 452 + return nil, fmt.Errorf("candidate query: %w", err) 453 + } 454 + defer rows.Close() 455 + 456 + var results []CandidateDocument 457 + for rows.Next() { 458 + var c CandidateDocument 459 + var embStr string 460 + if err := rows.Scan( 461 + &c.URI, &c.AuthorDID, &c.Site, &c.Path, &c.Title, &c.Description, 462 + &c.TagsJSON, &c.CanonicalURL, &c.PublishedAt, &embStr, &c.Engagement, 463 + ); err != nil { 464 + return nil, err 465 + } 466 + c.Embedding = parseVectorString(embStr) 467 + results = append(results, c) 468 + } 469 + return results, nil 470 + } 471 + 472 + func (db *DB) MatchAnnotationToDocument(targetSource string) (*string, error) { 473 + var uri string 474 + err := db.QueryRow(`SELECT uri FROM documents WHERE canonical_url = $1`, targetSource).Scan(&uri) 475 + if err != nil { 476 + return nil, err 477 + } 478 + return &uri, nil 479 + } 480 + 481 + func float32SliceToVectorString(v []float32) string { 482 + parts := make([]string, len(v)) 483 + for i, f := range v { 484 + parts[i] = fmt.Sprintf("%g", f) 485 + } 486 + return "[" + strings.Join(parts, ",") + "]" 487 + } 488 + 489 + func parseVectorString(s string) []float32 { 490 + s = strings.TrimPrefix(s, "[") 491 + s = strings.TrimSuffix(s, "]") 492 + if s == "" { 493 + return nil 494 + } 495 + parts := strings.Split(s, ",") 496 + result := make([]float32, len(parts)) 497 + for i, p := range parts { 498 + f, _ := strconv.ParseFloat(strings.TrimSpace(p), 32) 499 + result[i] = float32(f) 500 + } 501 + return result 502 + }
+220
backend/internal/embeddings/client.go
··· 1 + package embeddings 2 + 3 + import ( 4 + "bytes" 5 + "encoding/json" 6 + "fmt" 7 + "io" 8 + "net/http" 9 + "os" 10 + "strings" 11 + "sync" 12 + "time" 13 + 14 + "margin.at/internal/logger" 15 + ) 16 + 17 + const ( 18 + Model = "text-embedding-3-small" 19 + Dimensions = 1536 20 + MaxTokens = 8191 21 + MaxInputChars = 8000 22 + BatchSize = 64 23 + openAIEndpoint = "https://api.openai.com/v1/embeddings" 24 + ) 25 + 26 + type Client struct { 27 + apiKey string 28 + httpClient *http.Client 29 + mu sync.Mutex 30 + } 31 + 32 + type embeddingRequest struct { 33 + Model string `json:"model"` 34 + Input []string `json:"input"` 35 + Dimensions int `json:"dimensions,omitempty"` 36 + } 37 + 38 + type embeddingResponse struct { 39 + Data []embeddingData `json:"data"` 40 + Usage struct { 41 + TotalTokens int `json:"total_tokens"` 42 + } `json:"usage"` 43 + Error *struct { 44 + Message string `json:"message"` 45 + } `json:"error,omitempty"` 46 + } 47 + 48 + type embeddingData struct { 49 + Index int `json:"index"` 50 + Embedding []float32 `json:"embedding"` 51 + } 52 + 53 + func NewClient() *Client { 54 + apiKey := os.Getenv("OPENAI_API_KEY") 55 + if apiKey == "" { 56 + logger.Info("OPENAI_API_KEY not set — embedding generation will be disabled") 57 + } 58 + return &Client{ 59 + apiKey: apiKey, 60 + httpClient: &http.Client{ 61 + Timeout: 30 * time.Second, 62 + }, 63 + } 64 + } 65 + 66 + func (c *Client) IsEnabled() bool { 67 + return c.apiKey != "" 68 + } 69 + 70 + func (c *Client) Embed(text string) ([]float32, error) { 71 + results, err := c.EmbedBatch([]string{text}) 72 + if err != nil { 73 + return nil, err 74 + } 75 + if len(results) == 0 { 76 + return nil, fmt.Errorf("empty embedding response") 77 + } 78 + return results[0], nil 79 + } 80 + 81 + func (c *Client) EmbedBatch(texts []string) ([][]float32, error) { 82 + if !c.IsEnabled() { 83 + return nil, fmt.Errorf("OpenAI API key not configured") 84 + } 85 + 86 + truncated := make([]string, len(texts)) 87 + for i, t := range texts { 88 + t = truncateText(t, MaxInputChars) 89 + if strings.TrimSpace(t) == "" { 90 + t = " " 91 + } 92 + truncated[i] = t 93 + } 94 + 95 + results := make([][]float32, len(texts)) 96 + 97 + for start := 0; start < len(truncated); start += BatchSize { 98 + end := start + BatchSize 99 + if end > len(truncated) { 100 + end = len(truncated) 101 + } 102 + batch := truncated[start:end] 103 + 104 + embeddings, err := c.callAPI(batch) 105 + if err != nil { 106 + return nil, fmt.Errorf("embedding batch %d-%d failed: %w", start, end, err) 107 + } 108 + 109 + for _, emb := range embeddings { 110 + idx := start + emb.Index 111 + if idx < len(results) { 112 + results[idx] = emb.Embedding 113 + } 114 + } 115 + } 116 + 117 + return results, nil 118 + } 119 + 120 + func (c *Client) callAPI(inputs []string) ([]embeddingData, error) { 121 + reqBody := embeddingRequest{ 122 + Model: Model, 123 + Input: inputs, 124 + } 125 + 126 + body, err := json.Marshal(reqBody) 127 + if err != nil { 128 + return nil, fmt.Errorf("marshal request: %w", err) 129 + } 130 + 131 + req, err := http.NewRequest("POST", openAIEndpoint, bytes.NewReader(body)) 132 + if err != nil { 133 + return nil, fmt.Errorf("create request: %w", err) 134 + } 135 + req.Header.Set("Content-Type", "application/json") 136 + req.Header.Set("Authorization", "Bearer "+c.apiKey) 137 + 138 + resp, err := c.httpClient.Do(req) 139 + if err != nil { 140 + return nil, fmt.Errorf("API request: %w", err) 141 + } 142 + defer resp.Body.Close() 143 + 144 + respBody, err := io.ReadAll(resp.Body) 145 + if err != nil { 146 + return nil, fmt.Errorf("read response: %w", err) 147 + } 148 + 149 + if resp.StatusCode != http.StatusOK { 150 + return nil, fmt.Errorf("API returned %d: %s", resp.StatusCode, string(respBody)) 151 + } 152 + 153 + var result embeddingResponse 154 + if err := json.Unmarshal(respBody, &result); err != nil { 155 + return nil, fmt.Errorf("unmarshal response: %w", err) 156 + } 157 + 158 + if result.Error != nil { 159 + return nil, fmt.Errorf("API error: %s", result.Error.Message) 160 + } 161 + 162 + return result.Data, nil 163 + } 164 + 165 + func truncateText(text string, maxChars int) string { 166 + if len(text) <= maxChars { 167 + return text 168 + } 169 + return text[:maxChars] 170 + } 171 + 172 + func BuildAnnotationText(bodyValue, selectorJSON, targetTitle, tagsJSON *string) string { 173 + var parts []string 174 + 175 + if selectorJSON != nil && *selectorJSON != "" { 176 + var selector struct { 177 + Exact string `json:"exact"` 178 + Prefix string `json:"prefix"` 179 + Suffix string `json:"suffix"` 180 + } 181 + if err := json.Unmarshal([]byte(*selectorJSON), &selector); err == nil && selector.Exact != "" { 182 + parts = append(parts, selector.Exact) 183 + } 184 + } 185 + 186 + if bodyValue != nil && *bodyValue != "" { 187 + parts = append(parts, *bodyValue) 188 + } 189 + 190 + if targetTitle != nil && *targetTitle != "" { 191 + parts = append(parts, *targetTitle) 192 + } 193 + 194 + if tagsJSON != nil && *tagsJSON != "" { 195 + var tags []string 196 + if err := json.Unmarshal([]byte(*tagsJSON), &tags); err == nil && len(tags) > 0 { 197 + parts = append(parts, strings.Join(tags, ", ")) 198 + } 199 + } 200 + 201 + return strings.Join(parts, " | ") 202 + } 203 + 204 + func BuildDocumentText(title, description, textContent string, tags []string) string { 205 + var parts []string 206 + 207 + parts = append(parts, title) 208 + 209 + if len(tags) > 0 { 210 + parts = append(parts, strings.Join(tags, ", ")) 211 + } 212 + 213 + if textContent != "" { 214 + parts = append(parts, textContent) 215 + } else if description != "" { 216 + parts = append(parts, description) 217 + } 218 + 219 + return strings.Join(parts, " | ") 220 + }
+101 -1
backend/internal/firehose/ingester.go
··· 12 12 "margin.at/internal/crypto" 13 13 "margin.at/internal/db" 14 14 "margin.at/internal/logger" 15 + "margin.at/internal/standardsite" 15 16 internal_sync "margin.at/internal/sync" 17 + "margin.at/internal/verification" 16 18 "margin.at/internal/xrpc" 17 19 ) 18 20 ··· 31 33 CollectionPreferences = "at.margin.preferences" 32 34 CollectionSembleCard = "network.cosmik.card" 33 35 CollectionSembleCollection = "network.cosmik.collection" 36 + CollectionDocument = "site.standard.document" 37 + CollectionPublication = "site.standard.publication" 34 38 ) 35 39 36 40 var RelayURLs = []string{ ··· 41 45 42 46 var RelayURL = RelayURLs[0] 43 47 48 + type AnnotationCallback func(uri, authorDID, targetSource string, bodyValue, selectorJSON, targetTitle, tagsJSON *string) 49 + 50 + type DocumentCallback func(documentURI string) 51 + 44 52 type Ingester struct { 45 53 db *db.DB 46 54 sync *internal_sync.Service 47 55 cancel context.CancelFunc 48 56 handlers map[string]RecordHandler 49 57 currentRelayIdx int 58 + onAnnotation AnnotationCallback 59 + onDocument DocumentCallback 50 60 } 51 61 52 62 type RecordHandler func(event *FirehoseEvent) ··· 71 81 i.RegisterHandler(CollectionSembleCard, i.handleSembleCard) 72 82 i.RegisterHandler(CollectionSembleCollection, i.handleSembleCollection) 73 83 i.RegisterHandler(xrpc.CollectionSembleCollectionLink, i.handleSembleCollectionLink) 84 + i.RegisterHandler(CollectionDocument, i.handleDocument) 74 85 75 86 return i 76 87 } ··· 79 90 i.handlers[collection] = handler 80 91 } 81 92 93 + func (i *Ingester) SetOnAnnotation(cb AnnotationCallback) { 94 + i.onAnnotation = cb 95 + } 96 + 97 + func (i *Ingester) SetOnDocument(cb DocumentCallback) { 98 + i.onDocument = cb 99 + } 100 + 82 101 func (i *Ingester) Start(ctx context.Context) error { 83 102 ctx, cancel := context.WithCancel(ctx) 84 103 i.cancel = cancel ··· 287 306 i.db.DeleteCollection(uri) 288 307 case xrpc.CollectionSembleCollectionLink: 289 308 i.db.RemoveFromCollection(uri) 290 - 309 + case CollectionDocument: 310 + i.db.DeleteDocument(uri) 291 311 } 292 312 } 293 313 ··· 413 433 logger.Error("Failed to index annotation: %v", err) 414 434 } else { 415 435 logger.Info("Indexed annotation from %s on %s", event.Repo, targetSource) 436 + if i.onAnnotation != nil { 437 + go i.onAnnotation(uri, event.Repo, targetSource, bodyValuePtr, selectorJSONPtr, targetTitlePtr, tagsJSONPtr) 438 + } 416 439 } 417 440 } 418 441 ··· 545 568 logger.Error("Failed to index highlight: %v", err) 546 569 } else { 547 570 logger.Info("Indexed highlight from %s on %s", event.Repo, record.Target.Source) 571 + if i.onAnnotation != nil { 572 + go i.onAnnotation(uri, event.Repo, record.Target.Source, nil, selectorJSONPtr, titlePtr, tagsJSONPtr) 573 + } 548 574 } 549 575 } 550 576 ··· 1023 1049 logger.Info("Indexed Semble collection link from %s", event.Repo) 1024 1050 } 1025 1051 } 1052 + 1053 + func (i *Ingester) handleDocument(event *FirehoseEvent) { 1054 + var record struct { 1055 + Site string `json:"site"` 1056 + Path string `json:"path"` 1057 + Title string `json:"title"` 1058 + Description string `json:"description"` 1059 + TextContent string `json:"textContent"` 1060 + Tags []string `json:"tags"` 1061 + PublishedAt string `json:"publishedAt"` 1062 + CanonicalURL string `json:"canonicalUrl"` 1063 + } 1064 + 1065 + if err := json.Unmarshal(event.Record, &record); err != nil { 1066 + return 1067 + } 1068 + 1069 + if record.Title == "" || record.Site == "" { 1070 + return 1071 + } 1072 + 1073 + uri := fmt.Sprintf("at://%s/%s/%s", event.Repo, event.Collection, event.Rkey) 1074 + 1075 + publishedAt, err := time.Parse(time.RFC3339, record.PublishedAt) 1076 + if err != nil { 1077 + publishedAt = time.Now() 1078 + } 1079 + 1080 + canonicalURL := standardsite.ResolveCanonicalURL(record.Site, record.Path, record.CanonicalURL) 1081 + if canonicalURL == "" { 1082 + return 1083 + } 1084 + 1085 + var pathPtr, descPtr, textPtr, tagsJSONPtr *string 1086 + if record.Path != "" { 1087 + pathPtr = &record.Path 1088 + } 1089 + if record.Description != "" { 1090 + descPtr = &record.Description 1091 + } 1092 + if record.TextContent != "" { 1093 + textPtr = &record.TextContent 1094 + } 1095 + if len(record.Tags) > 0 { 1096 + tagsBytes, _ := json.Marshal(record.Tags) 1097 + tagsStr := string(tagsBytes) 1098 + tagsJSONPtr = &tagsStr 1099 + } 1100 + 1101 + doc := &db.Document{ 1102 + URI: uri, 1103 + AuthorDID: event.Repo, 1104 + Site: record.Site, 1105 + Path: pathPtr, 1106 + Title: record.Title, 1107 + Description: descPtr, 1108 + TextContent: textPtr, 1109 + TagsJSON: tagsJSONPtr, 1110 + CanonicalURL: canonicalURL, 1111 + PublishedAt: publishedAt, 1112 + IndexedAt: time.Now(), 1113 + } 1114 + 1115 + verification.VerifyDocumentAsync(canonicalURL, uri, func(verifiedURI string) { 1116 + if err := i.db.UpsertDocument(doc); err != nil { 1117 + logger.Error("Failed to index document: %v", err) 1118 + } else { 1119 + logger.Info("Indexed verified document from %s: %s", event.Repo, record.Title) 1120 + if i.onDocument != nil { 1121 + go i.onDocument(verifiedURI) 1122 + } 1123 + } 1124 + }) 1125 + }
+732
backend/internal/recommendations/service.go
··· 1 + package recommendations 2 + 3 + import ( 4 + "database/sql" 5 + "encoding/json" 6 + "math" 7 + "sort" 8 + "strings" 9 + "sync" 10 + "time" 11 + "unicode" 12 + 13 + "margin.at/internal/db" 14 + "margin.at/internal/embeddings" 15 + "margin.at/internal/logger" 16 + ) 17 + 18 + type Service struct { 19 + db *db.DB 20 + embeds *embeddings.Client 21 + mu sync.Mutex 22 + } 23 + 24 + func NewService(database *db.DB, embeddingClient *embeddings.Client) *Service { 25 + return &Service{ 26 + db: database, 27 + embeds: embeddingClient, 28 + } 29 + } 30 + 31 + func (s *Service) IsEnabled() bool { 32 + return s.embeds.IsEnabled() 33 + } 34 + 35 + func (s *Service) OnAnnotation(uri, authorDID, targetSource string, bodyValue, selectorJSON, targetTitle, tagsJSON *string) { 36 + if !s.embeds.IsEnabled() { 37 + return 38 + } 39 + 40 + text := embeddings.BuildAnnotationText(bodyValue, selectorJSON, targetTitle, tagsJSON) 41 + if strings.TrimSpace(text) == "" { 42 + return 43 + } 44 + 45 + embedding, err := s.embeds.Embed(text) 46 + if err != nil { 47 + logger.Error("Failed to embed annotation %s: %v", uri, err) 48 + return 49 + } 50 + 51 + var documentURI *string 52 + docURI, err := s.db.MatchAnnotationToDocument(targetSource) 53 + if err == nil && docURI != nil { 54 + documentURI = docURI 55 + } 56 + 57 + if err := s.db.UpsertAnnotationEmbedding(uri, authorDID, documentURI, embedding); err != nil { 58 + logger.Error("Failed to store annotation embedding %s: %v", uri, err) 59 + return 60 + } 61 + 62 + s.updateUserProfile(authorDID) 63 + } 64 + 65 + func (s *Service) OnDocument(documentURI string) { 66 + if !s.embeds.IsEnabled() { 67 + return 68 + } 69 + 70 + doc, err := s.db.GetDocumentByURI(documentURI) 71 + if err != nil { 72 + logger.Error("Failed to fetch document %s for embedding: %v", documentURI, err) 73 + return 74 + } 75 + 76 + var textContent, description string 77 + if doc.TextContent != nil { 78 + textContent = *doc.TextContent 79 + } 80 + if doc.Description != nil { 81 + description = *doc.Description 82 + } 83 + 84 + var tags []string 85 + if doc.TagsJSON != nil { 86 + json.Unmarshal([]byte(*doc.TagsJSON), &tags) 87 + } 88 + 89 + text := embeddings.BuildDocumentText(doc.Title, description, textContent, tags) 90 + if strings.TrimSpace(text) == "" { 91 + return 92 + } 93 + 94 + embedding, err := s.embeds.Embed(text) 95 + if err != nil { 96 + logger.Error("Failed to embed document %s: %v", documentURI, err) 97 + return 98 + } 99 + 100 + if err := s.db.UpsertDocumentEmbedding(documentURI, embedding); err != nil { 101 + logger.Error("Failed to store document embedding %s: %v", documentURI, err) 102 + } 103 + } 104 + 105 + func (s *Service) GetRecommendations(authorDID string, limit int) ([]RecommendedItem, error) { 106 + if !s.embeds.IsEnabled() { 107 + return nil, nil 108 + } 109 + 110 + profile, err := s.db.GetUserProfile(authorDID) 111 + if err == sql.ErrNoRows { 112 + return nil, nil 113 + } 114 + if err != nil { 115 + return nil, err 116 + } 117 + 118 + if len(profile.Embedding) == 0 { 119 + return nil, nil 120 + } 121 + 122 + candidates, err := s.db.GetCandidateDocuments(authorDID, 500) 123 + if err != nil { 124 + return nil, err 125 + } 126 + 127 + if len(candidates) == 0 { 128 + return nil, nil 129 + } 130 + 131 + userLang := s.detectUserScript(authorDID) 132 + 133 + var tagAffinities map[string]float64 134 + if profile.TagAffinities != "" { 135 + json.Unmarshal([]byte(profile.TagAffinities), &tagAffinities) 136 + } 137 + 138 + preList := make([]preScoredItem, 0, len(candidates)) 139 + for i, c := range candidates { 140 + docLang := detectScript(c.Title) 141 + if userLang != "" && docLang != "" && docLang != userLang { 142 + continue 143 + } 144 + 145 + centroidSim := cosineSimilarity(profile.Embedding, c.Embedding) 146 + if centroidSim < 0.20 { 147 + continue 148 + } 149 + ageDays := time.Since(c.PublishedAt).Hours() / 24 150 + tagScore := computeTagScore(c.TagsJSON, tagAffinities) 151 + 152 + score := centroidSim*0.65 + tagScore*0.10 + 153 + math.Exp(-0.023*ageDays)*0.15 + 154 + math.Min(float64(c.Engagement)/10.0, 1.0)*0.10 155 + 156 + preList = append(preList, preScoredItem{idx: i, centroidSim: centroidSim, score: score}) 157 + } 158 + 159 + shortlistSize := limit * 5 160 + if shortlistSize < 50 { 161 + shortlistSize = 50 162 + } 163 + sortPreScored(preList) 164 + if len(preList) > shortlistSize { 165 + preList = preList[:shortlistSize] 166 + } 167 + 168 + annEmbeddings, _ := s.db.GetRecentAnnotationEmbeddingsByAuthor(authorDID, 30) 169 + 170 + topK := 3 171 + if len(annEmbeddings) < topK { 172 + topK = len(annEmbeddings) 173 + } 174 + 175 + scored := make([]scoredCandidate, 0, len(preList)) 176 + for _, ps := range preList { 177 + c := candidates[ps.idx] 178 + var semantic float64 179 + 180 + if topK > 0 { 181 + topSims := make([]float64, topK) 182 + for _, ae := range annEmbeddings { 183 + sim := cosineSimilarity(ae.Embedding, c.Embedding) 184 + for j := range topSims { 185 + if sim > topSims[j] { 186 + copy(topSims[j+1:], topSims[j:]) 187 + topSims[j] = sim 188 + break 189 + } 190 + } 191 + } 192 + avgTop := 0.0 193 + for _, s := range topSims { 194 + avgTop += s 195 + } 196 + avgTop /= float64(topK) 197 + 198 + semantic = avgTop*0.6 + ps.centroidSim*0.4 199 + } else { 200 + semantic = ps.centroidSim 201 + } 202 + 203 + ageDays := time.Since(c.PublishedAt).Hours() / 24 204 + tagScore := computeTagScore(c.TagsJSON, tagAffinities) 205 + 206 + finalScore := semantic*0.60 + tagScore*0.10 + 207 + math.Exp(-0.023*ageDays)*0.10 + 208 + math.Min(float64(c.Engagement)/10.0, 1.0)*0.10 + 209 + contentQuality(c)*0.10 210 + 211 + if finalScore < 0.25 { 212 + continue 213 + } 214 + 215 + scored = append(scored, scoredCandidate{ 216 + candidate: c, 217 + score: finalScore, 218 + }) 219 + } 220 + 221 + reranked := mmrRerank(scored, profile.Embedding, 0.6, limit) 222 + 223 + results := make([]RecommendedItem, len(reranked)) 224 + for i, r := range reranked { 225 + results[i] = RecommendedItem{ 226 + URI: r.candidate.URI, 227 + AuthorDID: r.candidate.AuthorDID, 228 + Site: r.candidate.Site, 229 + Path: r.candidate.Path, 230 + Title: r.candidate.Title, 231 + Description: r.candidate.Description, 232 + Tags: parseTags(r.candidate.TagsJSON), 233 + CanonicalURL: r.candidate.CanonicalURL, 234 + PublishedAt: r.candidate.PublishedAt, 235 + Score: r.score, 236 + Engagement: r.candidate.Engagement, 237 + } 238 + } 239 + 240 + return results, nil 241 + } 242 + 243 + type RecommendedItem struct { 244 + URI string `json:"uri"` 245 + AuthorDID string `json:"authorDid"` 246 + Site string `json:"site"` 247 + Path *string `json:"path,omitempty"` 248 + Title string `json:"title"` 249 + Description *string `json:"description,omitempty"` 250 + Tags []string `json:"tags,omitempty"` 251 + CanonicalURL string `json:"canonicalUrl"` 252 + PublishedAt time.Time `json:"publishedAt"` 253 + Score float64 `json:"score"` 254 + Engagement int `json:"engagement"` 255 + } 256 + 257 + func (s *Service) updateUserProfile(authorDID string) { 258 + s.mu.Lock() 259 + defer s.mu.Unlock() 260 + 261 + annEmbeddings, err := s.db.GetAnnotationEmbeddingsByAuthor(authorDID) 262 + if err != nil || len(annEmbeddings) == 0 { 263 + return 264 + } 265 + 266 + dims := len(annEmbeddings[0].Embedding) 267 + centroid := make([]float64, dims) 268 + totalWeight := 0.0 269 + 270 + tagCounts := make(map[string]float64) 271 + 272 + for _, ae := range annEmbeddings { 273 + ageDays := time.Since(ae.UpdatedAt).Hours() / 24 274 + weight := math.Exp(-0.023 * ageDays) 275 + 276 + for j, v := range ae.Embedding { 277 + centroid[j] += float64(v) * weight 278 + } 279 + totalWeight += weight 280 + } 281 + 282 + if totalWeight == 0 { 283 + return 284 + } 285 + 286 + result := make([]float32, dims) 287 + for i := range centroid { 288 + result[i] = float32(centroid[i] / totalWeight) 289 + } 290 + 291 + annotations, _ := s.db.GetAnnotationsByAuthor(authorDID, 500, 0) 292 + for _, ann := range annotations { 293 + if ann.TagsJSON != nil { 294 + var tags []string 295 + json.Unmarshal([]byte(*ann.TagsJSON), &tags) 296 + for _, t := range tags { 297 + tagCounts[strings.ToLower(t)] += 1.0 298 + } 299 + } 300 + } 301 + highlights, _ := s.db.GetHighlightsByAuthor(authorDID, 500, 0) 302 + for _, h := range highlights { 303 + if h.TagsJSON != nil { 304 + var tags []string 305 + json.Unmarshal([]byte(*h.TagsJSON), &tags) 306 + for _, t := range tags { 307 + tagCounts[strings.ToLower(t)] += 1.0 308 + } 309 + } 310 + } 311 + 312 + maxCount := 0.0 313 + for _, c := range tagCounts { 314 + if c > maxCount { 315 + maxCount = c 316 + } 317 + } 318 + if maxCount > 0 { 319 + for k := range tagCounts { 320 + tagCounts[k] /= maxCount 321 + } 322 + } 323 + 324 + if err := s.db.UpsertUserProfile(authorDID, result, tagCounts, len(annEmbeddings)); err != nil { 325 + logger.Error("Failed to update user profile for %s: %v", authorDID, err) 326 + } 327 + } 328 + 329 + func (s *Service) BackfillDocumentEmbeddings(batchSize int) error { 330 + if !s.embeds.IsEnabled() { 331 + return nil 332 + } 333 + 334 + total := 0 335 + for { 336 + docs, err := s.db.GetDocumentsWithoutEmbeddings(batchSize) 337 + if err != nil { 338 + return err 339 + } 340 + if len(docs) == 0 { 341 + break 342 + } 343 + 344 + logger.Info("Backfilling embeddings for %d documents (total so far: %d)", len(docs), total) 345 + 346 + texts := make([]string, len(docs)) 347 + for i, doc := range docs { 348 + var textContent, description string 349 + if doc.TextContent != nil { 350 + textContent = *doc.TextContent 351 + } 352 + if doc.Description != nil { 353 + description = *doc.Description 354 + } 355 + var tags []string 356 + if doc.TagsJSON != nil { 357 + json.Unmarshal([]byte(*doc.TagsJSON), &tags) 358 + } 359 + texts[i] = embeddings.BuildDocumentText(doc.Title, description, textContent, tags) 360 + } 361 + 362 + vecs, err := s.embeds.EmbedBatch(texts) 363 + if err != nil { 364 + return err 365 + } 366 + 367 + for i, doc := range docs { 368 + if err := s.db.UpsertDocumentEmbedding(doc.URI, vecs[i]); err != nil { 369 + logger.Error("Failed to store embedding for doc %s: %v", doc.URI, err) 370 + } 371 + } 372 + 373 + total += len(docs) 374 + if len(docs) < batchSize { 375 + break 376 + } 377 + } 378 + 379 + if total > 0 { 380 + logger.Info("Backfilled %d document embeddings total", total) 381 + } 382 + return nil 383 + } 384 + 385 + func (s *Service) BackfillAnnotationEmbeddings(batchSize int) (int, error) { 386 + if !s.embeds.IsEnabled() { 387 + return 0, nil 388 + } 389 + 390 + total := 0 391 + for { 392 + anns, err := s.db.GetAnnotationsWithoutEmbeddings(batchSize) 393 + if err != nil { 394 + return total, err 395 + } 396 + if len(anns) == 0 { 397 + break 398 + } 399 + 400 + logger.Info("Backfilling embeddings for %d annotations (total so far: %d)", len(anns), total) 401 + 402 + texts := make([]string, len(anns)) 403 + for i, a := range anns { 404 + texts[i] = embeddings.BuildAnnotationText(a.BodyValue, a.SelectorJSON, a.TargetTitle, a.TagsJSON) 405 + } 406 + 407 + vecs, err := s.embeds.EmbedBatch(texts) 408 + if err != nil { 409 + return total, err 410 + } 411 + 412 + batch := 0 413 + for i, a := range anns { 414 + if strings.TrimSpace(texts[i]) == "" { 415 + continue 416 + } 417 + var documentURI *string 418 + if docURI, err := s.db.MatchAnnotationToDocument(a.TargetSource); err == nil && docURI != nil { 419 + documentURI = docURI 420 + } 421 + if err := s.db.UpsertAnnotationEmbedding(a.URI, a.AuthorDID, documentURI, vecs[i]); err != nil { 422 + logger.Error("Failed to store embedding for annotation %s: %v", a.URI, err) 423 + } else { 424 + batch++ 425 + } 426 + } 427 + 428 + total += batch 429 + if len(anns) < batchSize { 430 + break 431 + } 432 + } 433 + 434 + if total > 0 { 435 + logger.Info("Backfilled %d annotation embeddings total", total) 436 + } 437 + return total, nil 438 + } 439 + 440 + func (s *Service) BackfillHighlightEmbeddings(batchSize int) (int, error) { 441 + if !s.embeds.IsEnabled() { 442 + return 0, nil 443 + } 444 + 445 + total := 0 446 + for { 447 + highlights, err := s.db.GetHighlightsWithoutEmbeddings(batchSize) 448 + if err != nil { 449 + return total, err 450 + } 451 + if len(highlights) == 0 { 452 + break 453 + } 454 + 455 + logger.Info("Backfilling embeddings for %d highlights (total so far: %d)", len(highlights), total) 456 + 457 + texts := make([]string, len(highlights)) 458 + for i, h := range highlights { 459 + texts[i] = embeddings.BuildAnnotationText(nil, h.SelectorJSON, h.TargetTitle, h.TagsJSON) 460 + } 461 + 462 + vecs, err := s.embeds.EmbedBatch(texts) 463 + if err != nil { 464 + return total, err 465 + } 466 + 467 + batch := 0 468 + for i, h := range highlights { 469 + if strings.TrimSpace(texts[i]) == "" { 470 + continue 471 + } 472 + var documentURI *string 473 + if docURI, err := s.db.MatchAnnotationToDocument(h.TargetSource); err == nil && docURI != nil { 474 + documentURI = docURI 475 + } 476 + if err := s.db.UpsertAnnotationEmbedding(h.URI, h.AuthorDID, documentURI, vecs[i]); err != nil { 477 + logger.Error("Failed to store embedding for highlight %s: %v", h.URI, err) 478 + } else { 479 + batch++ 480 + } 481 + } 482 + 483 + total += batch 484 + if len(highlights) < batchSize { 485 + break 486 + } 487 + } 488 + 489 + if total > 0 { 490 + logger.Info("Backfilled %d highlight embeddings total", total) 491 + } 492 + return total, nil 493 + } 494 + 495 + func (s *Service) RebuildAllProfiles() (int, error) { 496 + if !s.embeds.IsEnabled() { 497 + return 0, nil 498 + } 499 + 500 + dids, err := s.db.GetDistinctAnnotationAuthors() 501 + if err != nil { 502 + return 0, err 503 + } 504 + 505 + for _, did := range dids { 506 + s.updateUserProfile(did) 507 + } 508 + 509 + logger.Info("Rebuilt profiles for %d users", len(dids)) 510 + return len(dids), nil 511 + } 512 + 513 + func cosineSimilarity(a, b []float32) float64 { 514 + if len(a) != len(b) || len(a) == 0 { 515 + return 0 516 + } 517 + var dot, normA, normB float64 518 + for i := range a { 519 + dot += float64(a[i]) * float64(b[i]) 520 + normA += float64(a[i]) * float64(a[i]) 521 + normB += float64(b[i]) * float64(b[i]) 522 + } 523 + denom := math.Sqrt(normA) * math.Sqrt(normB) 524 + if denom == 0 { 525 + return 0 526 + } 527 + return dot / denom 528 + } 529 + 530 + type scoredCandidate struct { 531 + candidate db.CandidateDocument 532 + score float64 533 + } 534 + 535 + func mmrRerank(candidates []scoredCandidate, userVec []float32, lambda float64, k int) []scoredCandidate { 536 + if len(candidates) <= k { 537 + return candidates 538 + } 539 + 540 + selected := make([]scoredCandidate, 0, k) 541 + remaining := make([]scoredCandidate, len(candidates)) 542 + copy(remaining, candidates) 543 + 544 + for len(selected) < k && len(remaining) > 0 { 545 + bestIdx := -1 546 + bestScore := math.Inf(-1) 547 + 548 + for i, cand := range remaining { 549 + relevance := cand.score 550 + 551 + maxSim := 0.0 552 + for _, sel := range selected { 553 + sim := cosineSimilarity(cand.candidate.Embedding, sel.candidate.Embedding) 554 + if cand.candidate.Site == sel.candidate.Site { 555 + sim = math.Max(sim, 0.5) 556 + } 557 + if sim > maxSim { 558 + maxSim = sim 559 + } 560 + } 561 + 562 + mmrScore := lambda*relevance - (1-lambda)*maxSim 563 + if mmrScore > bestScore { 564 + bestScore = mmrScore 565 + bestIdx = i 566 + } 567 + } 568 + 569 + if bestIdx < 0 { 570 + break 571 + } 572 + 573 + selected = append(selected, remaining[bestIdx]) 574 + remaining = append(remaining[:bestIdx], remaining[bestIdx+1:]...) 575 + } 576 + 577 + return selected 578 + } 579 + 580 + func computeTagScore(docTagsJSON *string, affinities map[string]float64) float64 { 581 + if docTagsJSON == nil || len(affinities) == 0 { 582 + return 0 583 + } 584 + var docTags []string 585 + if err := json.Unmarshal([]byte(*docTagsJSON), &docTags); err != nil { 586 + return 0 587 + } 588 + score := 0.0 589 + for _, t := range docTags { 590 + if w, ok := affinities[strings.ToLower(t)]; ok { 591 + score += w 592 + } 593 + } 594 + 595 + if len(docTags) > 0 { 596 + score /= float64(len(docTags)) 597 + } 598 + return math.Min(score, 1.0) 599 + } 600 + 601 + func parseTags(tagsJSON *string) []string { 602 + if tagsJSON == nil { 603 + return nil 604 + } 605 + var tags []string 606 + json.Unmarshal([]byte(*tagsJSON), &tags) 607 + return tags 608 + } 609 + 610 + type preScoredItem struct { 611 + idx int 612 + centroidSim float64 613 + score float64 614 + } 615 + 616 + func sortPreScored(items []preScoredItem) { 617 + sort.Slice(items, func(i, j int) bool { 618 + return items[i].score > items[j].score 619 + }) 620 + } 621 + 622 + func detectScript(text string) string { 623 + var latin, cjk, cyrillic, arabic, devanagari, total int 624 + for _, r := range text { 625 + if !unicode.IsLetter(r) { 626 + continue 627 + } 628 + total++ 629 + switch { 630 + case r <= 0x024F: 631 + latin++ 632 + case unicode.Is(unicode.Han, r) || unicode.Is(unicode.Hiragana, r) || unicode.Is(unicode.Katakana, r): 633 + cjk++ 634 + case unicode.Is(unicode.Cyrillic, r): 635 + cyrillic++ 636 + case unicode.Is(unicode.Arabic, r): 637 + arabic++ 638 + case unicode.Is(unicode.Devanagari, r): 639 + devanagari++ 640 + } 641 + } 642 + if total < 3 { 643 + return "" 644 + } 645 + threshold := float64(total) * 0.4 646 + switch { 647 + case float64(latin) >= threshold: 648 + return "latin" 649 + case float64(cjk) >= threshold: 650 + return "cjk" 651 + case float64(cyrillic) >= threshold: 652 + return "cyrillic" 653 + case float64(arabic) >= threshold: 654 + return "arabic" 655 + case float64(devanagari) >= threshold: 656 + return "devanagari" 657 + } 658 + return "" 659 + } 660 + 661 + func (s *Service) detectUserScript(authorDID string) string { 662 + annotations, _ := s.db.GetAnnotationsByAuthor(authorDID, 100, 0) 663 + highlights, _ := s.db.GetHighlightsByAuthor(authorDID, 100, 0) 664 + 665 + scriptCounts := make(map[string]int) 666 + for _, a := range annotations { 667 + if a.TargetTitle != nil { 668 + if sc := detectScript(*a.TargetTitle); sc != "" { 669 + scriptCounts[sc]++ 670 + } 671 + } 672 + } 673 + for _, h := range highlights { 674 + if h.TargetTitle != nil { 675 + if sc := detectScript(*h.TargetTitle); sc != "" { 676 + scriptCounts[sc]++ 677 + } 678 + } 679 + } 680 + 681 + best := "" 682 + bestCount := 0 683 + for sc, cnt := range scriptCounts { 684 + if cnt > bestCount { 685 + best = sc 686 + bestCount = cnt 687 + } 688 + } 689 + total := 0 690 + for _, cnt := range scriptCounts { 691 + total += cnt 692 + } 693 + if total > 0 && float64(bestCount)/float64(total) >= 0.6 { 694 + return best 695 + } 696 + return "" 697 + } 698 + 699 + func contentQuality(c db.CandidateDocument) float64 { 700 + score := 0.0 701 + 702 + titleLen := len(c.Title) 703 + if titleLen > 40 { 704 + score += 0.3 705 + } else if titleLen > 25 { 706 + score += 0.2 707 + } else { 708 + score += 0.1 709 + } 710 + 711 + if c.Description != nil && len(*c.Description) > 50 { 712 + score += 0.3 713 + } else if c.Description != nil && len(*c.Description) > 0 { 714 + score += 0.15 715 + } 716 + 717 + if c.TagsJSON != nil { 718 + var tags []string 719 + json.Unmarshal([]byte(*c.TagsJSON), &tags) 720 + if len(tags) > 0 { 721 + score += 0.2 722 + } 723 + } 724 + 725 + if c.Engagement >= 3 { 726 + score += 0.2 727 + } else if c.Engagement >= 1 { 728 + score += 0.1 729 + } 730 + 731 + return math.Min(score, 1.0) 732 + }
+138
backend/internal/standardsite/resolve.go
··· 1 + package standardsite 2 + 3 + import ( 4 + "encoding/json" 5 + "fmt" 6 + "io" 7 + "net/http" 8 + "strings" 9 + "sync" 10 + "time" 11 + ) 12 + 13 + var ( 14 + client = &http.Client{Timeout: 10 * time.Second} 15 + 16 + pubCache = make(map[string]string) 17 + pubCacheMu sync.RWMutex 18 + ) 19 + 20 + func ResolveCanonicalURL(site, path, canonicalURL string) string { 21 + if canonicalURL != "" && strings.HasPrefix(canonicalURL, "https://") { 22 + return canonicalURL 23 + } 24 + 25 + if strings.HasPrefix(site, "at://") { 26 + pubURL := resolvePublicationURL(site) 27 + if pubURL != "" { 28 + base := strings.TrimRight(pubURL, "/") 29 + if path != "" { 30 + return base + "/" + strings.TrimLeft(path, "/") 31 + } 32 + return base 33 + } 34 + return "" 35 + } 36 + 37 + base := strings.TrimRight(site, "/") 38 + if path != "" { 39 + return base + "/" + strings.TrimLeft(path, "/") 40 + } 41 + return base 42 + } 43 + 44 + func resolvePublicationURL(atURI string) string { 45 + pubCacheMu.RLock() 46 + if url, ok := pubCache[atURI]; ok { 47 + pubCacheMu.RUnlock() 48 + return url 49 + } 50 + pubCacheMu.RUnlock() 51 + 52 + parts := strings.SplitN(strings.TrimPrefix(atURI, "at://"), "/", 3) 53 + if len(parts) != 3 { 54 + return "" 55 + } 56 + did, collection, rkey := parts[0], parts[1], parts[2] 57 + 58 + pdsHost := resolvePDS(did) 59 + if pdsHost == "" { 60 + return "" 61 + } 62 + 63 + url := fmt.Sprintf("%s/xrpc/com.atproto.repo.getRecord?repo=%s&collection=%s&rkey=%s", 64 + pdsHost, did, collection, rkey) 65 + 66 + resp, err := client.Get(url) 67 + if err != nil || resp.StatusCode != http.StatusOK { 68 + if resp != nil { 69 + resp.Body.Close() 70 + } 71 + return "" 72 + } 73 + defer resp.Body.Close() 74 + 75 + body, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) 76 + if err != nil { 77 + return "" 78 + } 79 + 80 + var result struct { 81 + Value struct { 82 + URL string `json:"url"` 83 + } `json:"value"` 84 + } 85 + if err := json.Unmarshal(body, &result); err != nil || result.Value.URL == "" { 86 + return "" 87 + } 88 + 89 + pubCacheMu.Lock() 90 + pubCache[atURI] = result.Value.URL 91 + pubCacheMu.Unlock() 92 + 93 + return result.Value.URL 94 + } 95 + 96 + func resolvePDS(did string) string { 97 + var url string 98 + if strings.HasPrefix(did, "did:plc:") { 99 + url = "https://plc.directory/" + did 100 + } else if strings.HasPrefix(did, "did:web:") { 101 + domain := strings.TrimPrefix(did, "did:web:") 102 + url = "https://" + domain + "/.well-known/did.json" 103 + } else { 104 + return "" 105 + } 106 + 107 + resp, err := client.Get(url) 108 + if err != nil || resp.StatusCode != http.StatusOK { 109 + if resp != nil { 110 + resp.Body.Close() 111 + } 112 + return "" 113 + } 114 + defer resp.Body.Close() 115 + 116 + body, err := io.ReadAll(io.LimitReader(resp.Body, 32*1024)) 117 + if err != nil { 118 + return "" 119 + } 120 + 121 + var doc struct { 122 + Service []struct { 123 + ID string `json:"id"` 124 + Type string `json:"type"` 125 + ServiceEndpoint string `json:"serviceEndpoint"` 126 + } `json:"service"` 127 + } 128 + if err := json.Unmarshal(body, &doc); err != nil { 129 + return "" 130 + } 131 + 132 + for _, svc := range doc.Service { 133 + if svc.ID == "#atproto_pds" && svc.Type == "AtprotoPersonalDataServer" { 134 + return strings.TrimRight(svc.ServiceEndpoint, "/") 135 + } 136 + } 137 + return "" 138 + }
+60
backend/internal/sync/service.go
··· 12 12 "margin.at/internal/crypto" 13 13 "margin.at/internal/db" 14 14 "margin.at/internal/logger" 15 + "margin.at/internal/standardsite" 16 + "margin.at/internal/verification" 15 17 "margin.at/internal/xrpc" 16 18 ) 17 19 ··· 39 41 xrpc.CollectionSembleCard, 40 42 xrpc.CollectionSembleCollection, 41 43 xrpc.CollectionSembleCollectionLink, 44 + xrpc.CollectionDocument, 42 45 } 43 46 44 47 results := make(map[string]string) ··· 645 648 URI: uri, 646 649 CID: cidPtr, 647 650 IndexedAt: time.Now(), 651 + }) 652 + 653 + case xrpc.CollectionDocument: 654 + var record struct { 655 + Site string `json:"site"` 656 + Path string `json:"path"` 657 + Title string `json:"title"` 658 + Description string `json:"description"` 659 + TextContent string `json:"textContent"` 660 + Tags []string `json:"tags"` 661 + PublishedAt string `json:"publishedAt"` 662 + CanonicalURL string `json:"canonicalUrl"` 663 + } 664 + if err := json.Unmarshal(value, &record); err != nil { 665 + return err 666 + } 667 + if record.Title == "" || record.Site == "" { 668 + return nil 669 + } 670 + publishedAt, err := time.Parse(time.RFC3339, record.PublishedAt) 671 + if err != nil { 672 + publishedAt = time.Now() 673 + } 674 + canonicalURL := standardsite.ResolveCanonicalURL(record.Site, record.Path, record.CanonicalURL) 675 + if canonicalURL == "" { 676 + return nil 677 + } 678 + var pathPtr, descPtr, textPtr, tagsJSONPtr *string 679 + if record.Path != "" { 680 + pathPtr = &record.Path 681 + } 682 + if record.Description != "" { 683 + descPtr = &record.Description 684 + } 685 + if record.TextContent != "" { 686 + textPtr = &record.TextContent 687 + } 688 + if len(record.Tags) > 0 { 689 + tagsBytes, _ := json.Marshal(record.Tags) 690 + tagsStr := string(tagsBytes) 691 + tagsJSONPtr = &tagsStr 692 + } 693 + if err := verification.VerifyDocument(canonicalURL, uri); err != nil { 694 + return nil 695 + } 696 + return s.db.UpsertDocument(&db.Document{ 697 + URI: uri, 698 + AuthorDID: did, 699 + Site: record.Site, 700 + Path: pathPtr, 701 + Title: record.Title, 702 + Description: descPtr, 703 + TextContent: textPtr, 704 + TagsJSON: tagsJSONPtr, 705 + CanonicalURL: canonicalURL, 706 + PublishedAt: publishedAt, 707 + IndexedAt: time.Now(), 648 708 }) 649 709 650 710 case xrpc.CollectionPreferences:
+130
backend/internal/verification/verify.go
··· 1 + package verification 2 + 3 + import ( 4 + "fmt" 5 + "io" 6 + "net/http" 7 + "net/url" 8 + "regexp" 9 + "strings" 10 + "time" 11 + 12 + "margin.at/internal/logger" 13 + ) 14 + 15 + var client = &http.Client{ 16 + Timeout: 10 * time.Second, 17 + CheckRedirect: func(req *http.Request, via []*http.Request) error { 18 + if len(via) >= 3 { 19 + return fmt.Errorf("too many redirects") 20 + } 21 + return nil 22 + }, 23 + } 24 + 25 + var linkTagPattern = regexp.MustCompile(`<link[^>]+rel=["']site\.standard\.document["'][^>]+href=["']([^"']+)["'][^>]*/?>|<link[^>]+href=["']([^"']+)["'][^>]+rel=["']site\.standard\.document["'][^>]*/?>`) 26 + 27 + func VerifyPublication(pubURL, expectedURI string) error { 28 + pubURL = strings.TrimRight(pubURL, "/") 29 + 30 + parsed, err := url.Parse(pubURL) 31 + if err != nil { 32 + return fmt.Errorf("invalid publication URL: %w", err) 33 + } 34 + 35 + wellKnownPath := "/.well-known/site.standard.publication" 36 + if parsed.Path != "" && parsed.Path != "/" { 37 + wellKnownPath += parsed.Path 38 + } 39 + wellKnownURL := fmt.Sprintf("%s://%s%s", parsed.Scheme, parsed.Host, wellKnownPath) 40 + 41 + req, err := http.NewRequest("GET", wellKnownURL, nil) 42 + if err != nil { 43 + return fmt.Errorf("invalid URL: %w", err) 44 + } 45 + req.Header.Set("User-Agent", "Margin/1.0 (Standard.site verification)") 46 + 47 + resp, err := client.Do(req) 48 + if err != nil { 49 + return fmt.Errorf("failed to fetch %s: %w", wellKnownURL, err) 50 + } 51 + defer resp.Body.Close() 52 + 53 + if resp.StatusCode != http.StatusOK { 54 + return fmt.Errorf("well-known endpoint returned %d", resp.StatusCode) 55 + } 56 + 57 + body, err := io.ReadAll(io.LimitReader(resp.Body, 1024)) 58 + if err != nil { 59 + return fmt.Errorf("failed to read response: %w", err) 60 + } 61 + 62 + returnedURI := strings.TrimSpace(string(body)) 63 + if returnedURI != expectedURI { 64 + return fmt.Errorf("URI mismatch: expected %s, got %s", expectedURI, returnedURI) 65 + } 66 + 67 + return nil 68 + } 69 + 70 + func VerifyDocument(docURL, expectedURI string) error { 71 + req, err := http.NewRequest("GET", docURL, nil) 72 + if err != nil { 73 + return fmt.Errorf("invalid URL: %w", err) 74 + } 75 + req.Header.Set("User-Agent", "Margin/1.0 (Standard.site verification)") 76 + 77 + resp, err := client.Do(req) 78 + if err != nil { 79 + return fmt.Errorf("failed to fetch %s: %w", docURL, err) 80 + } 81 + defer resp.Body.Close() 82 + 83 + if resp.StatusCode != http.StatusOK { 84 + return fmt.Errorf("document URL returned %d", resp.StatusCode) 85 + } 86 + 87 + body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024)) 88 + if err != nil { 89 + return fmt.Errorf("failed to read document: %w", err) 90 + } 91 + 92 + html := string(body) 93 + 94 + matches := linkTagPattern.FindAllStringSubmatch(html, -1) 95 + for _, m := range matches { 96 + href := m[1] 97 + if href == "" { 98 + href = m[2] 99 + } 100 + if strings.TrimSpace(href) == expectedURI { 101 + return nil 102 + } 103 + } 104 + 105 + return fmt.Errorf("no matching <link rel=\"site.standard.document\"> tag found for %s", expectedURI) 106 + } 107 + 108 + func VerifyPublicationAsync(pubURL, uri string, onVerified func(string)) { 109 + go func() { 110 + if err := VerifyPublication(pubURL, uri); err != nil { 111 + return 112 + } 113 + logger.Info("Publication verified: %s", uri) 114 + if onVerified != nil { 115 + onVerified(uri) 116 + } 117 + }() 118 + } 119 + 120 + func VerifyDocumentAsync(docURL, uri string, onVerified func(string)) { 121 + go func() { 122 + if err := VerifyDocument(docURL, uri); err != nil { 123 + return 124 + } 125 + logger.Info("Document verified: %s", uri) 126 + if onVerified != nil { 127 + onVerified(uri) 128 + } 129 + }() 130 + }
+2
backend/internal/xrpc/records.go
··· 18 18 CollectionProfile = "at.margin.profile" 19 19 CollectionPreferences = "at.margin.preferences" 20 20 CollectionAPIKey = "at.margin.apikey" 21 + CollectionDocument = "site.standard.document" 22 + CollectionPublication = "site.standard.publication" 21 23 ) 22 24 23 25 const (
+10
web/src/App.tsx
··· 23 23 import About from "./views/About"; 24 24 import AdminModeration from "./views/core/AdminModeration"; 25 25 import Search from "./views/core/Search"; 26 + import Discover from "./views/core/Discover"; 26 27 27 28 function RootRoute() { 28 29 const user = useStore($user); ··· 63 64 element={ 64 65 <AppLayout> 65 66 <Search /> 67 + </AppLayout> 68 + } 69 + /> 70 + 71 + <Route 72 + path="/discover" 73 + element={ 74 + <AppLayout> 75 + <Discover /> 66 76 </AppLayout> 67 77 } 68 78 />
+58
web/src/api/client.ts
··· 1361 1361 return { items: [] }; 1362 1362 } 1363 1363 } 1364 + 1365 + export interface DocumentItem { 1366 + uri: string; 1367 + authorDid: string; 1368 + site: string; 1369 + path?: string; 1370 + title: string; 1371 + description?: string; 1372 + tags?: string[]; 1373 + canonicalUrl: string; 1374 + publishedAt: string; 1375 + } 1376 + 1377 + export interface DocumentsResponse { 1378 + items: DocumentItem[]; 1379 + totalItems: number; 1380 + } 1381 + 1382 + export async function getDocuments({ 1383 + sort = "new", 1384 + limit = 30, 1385 + offset = 0, 1386 + }: { 1387 + sort?: string; 1388 + limit?: number; 1389 + offset?: number; 1390 + }): Promise<DocumentsResponse> { 1391 + try { 1392 + const params = new URLSearchParams(); 1393 + if (sort) params.append("sort", sort); 1394 + params.append("limit", limit.toString()); 1395 + params.append("offset", offset.toString()); 1396 + 1397 + const res = await apiRequest(`/api/documents?${params.toString()}`, { 1398 + skipAuthRedirect: true, 1399 + }); 1400 + if (!res.ok) throw new Error("Failed to fetch documents"); 1401 + return await res.json(); 1402 + } catch (e) { 1403 + console.error("Failed to fetch documents:", e); 1404 + return { items: [], totalItems: 0 }; 1405 + } 1406 + } 1407 + 1408 + export async function getRecommendations( 1409 + limit = 20, 1410 + ): Promise<DocumentsResponse & { unavailable?: boolean }> { 1411 + try { 1412 + const res = await apiRequest(`/api/recommendations?limit=${limit}`); 1413 + if (res.status === 503) 1414 + return { items: [], totalItems: 0, unavailable: true }; 1415 + if (!res.ok) throw new Error("Failed to fetch recommendations"); 1416 + return await res.json(); 1417 + } catch (e) { 1418 + console.error("Failed to fetch recommendations:", e); 1419 + return { items: [], totalItems: 0 }; 1420 + } 1421 + }
+3
web/src/components/navigation/Sidebar.tsx
··· 13 13 PenSquare, 14 14 MessageSquareText, 15 15 Highlighter, 16 + Compass, 16 17 } from "lucide-react"; 17 18 import { useStore } from "@nanostores/react"; 18 19 import { $user, logout } from "../../store/auth"; ··· 43 44 44 45 const publicNavItems = [ 45 46 { icon: Home, label: "Feed", href: "/home", badge: undefined }, 47 + { icon: Compass, label: "Discover", href: "/discover", badge: undefined }, 46 48 { 47 49 icon: MessageSquareText, 48 50 label: "Annotations", ··· 65 67 66 68 const authNavItems = [ 67 69 { icon: Home, label: "Feed", href: "/home" }, 70 + { icon: Compass, label: "Discover", href: "/discover" }, 68 71 { 69 72 icon: Bell, 70 73 label: "Activity",
+19 -1
web/src/pages/privacy.astro
··· 11 11 12 12 <div class="prose prose-surface dark:prose-invert max-w-none"> 13 13 <h1 class="font-display font-bold text-3xl mb-2 text-surface-900 dark:text-white">Privacy Policy</h1> 14 - <p class="text-surface-500 dark:text-surface-400 mb-8">Last updated: February 24, 2026</p> 14 + <p class="text-surface-500 dark:text-surface-400 mb-8">Last updated: March 4, 2026</p> 15 15 16 16 <section class="mb-8"> 17 17 <h2 class="text-xl font-bold text-surface-900 dark:text-white mb-4">Overview</h2> ··· 42 42 <li>Collections you organize content into</li> 43 43 </ul> 44 44 45 + <h3 class="text-lg font-semibold text-surface-900 dark:text-white mb-2">Discover & Recommendations</h3> 46 + <p class="text-surface-700 dark:text-surface-300 mb-4"> 47 + To power the Discover page and personalized recommendations, we generate mathematical representations (embeddings) of: 48 + </p> 49 + <ul class="list-disc pl-5 mb-4 text-surface-700 dark:text-surface-300 space-y-1"> 50 + <li>Your annotations, highlights, and their associated tags</li> 51 + <li>Publicly published documents from the AT Protocol network</li> 52 + </ul> 53 + <p class="text-surface-700 dark:text-surface-300 mb-4"> 54 + These embeddings are used to build an interest profile that helps us suggest relevant content. Your interest profile is stored on our server and is not shared with other users. 55 + </p> 56 + 45 57 <h3 class="text-lg font-semibold text-surface-900 dark:text-white mb-2">Authentication</h3> 46 58 <p class="text-surface-700 dark:text-surface-300 mb-4"> 47 59 We store OAuth session tokens locally in your browser to keep you logged in. These tokens are used solely for authenticating API requests. ··· 56 68 <li>Sync your content across devices</li> 57 69 <li>Show your public annotations to other users</li> 58 70 <li>Enable social features like replies and likes</li> 71 + <li>Generate personalized content recommendations on the Discover page</li> 59 72 </ul> 73 + 74 + <h3 class="text-lg font-semibold text-surface-900 dark:text-white mb-2">Third-Party Services</h3> 75 + <p class="text-surface-700 dark:text-surface-300 mb-4"> 76 + We use <strong>OpenAI</strong> to generate text embeddings for powering recommendations. When generating embeddings, the text content of your annotations and public documents is sent to OpenAI's API. OpenAI processes this data according to their <a href="https://openai.com/policies/api-data-usage-policies" class="text-primary-600 dark:text-primary-400 hover:text-primary-700 dark:hover:text-primary-300 hover:underline" target="_blank" rel="noopener noreferrer">API data usage policy</a>, which states that API inputs are not used to train their models. No other third-party services receive your data. 77 + </p> 60 78 </section> 61 79 62 80 <section class="mb-8">
+24 -3
web/src/views/collections/CollectionDetail.tsx
··· 7 7 removeCollectionItem, 8 8 resolveHandle, 9 9 } from "../../api/client"; 10 - import { Loader2, ArrowLeft, Trash2, Plus } from "lucide-react"; 10 + import { Loader2, ArrowLeft, Trash2, Plus, ExternalLink } from "lucide-react"; 11 11 import CollectionIcon from "../../components/common/CollectionIcon"; 12 12 import ShareMenu from "../../components/modals/ShareMenu"; 13 13 import Card from "../../components/common/Card"; ··· 114 114 } 115 115 116 116 const isOwner = user?.did === collection.creator?.did; 117 + const isSemble = collection.uri?.includes("network.cosmik"); 118 + 119 + const sembleUrl = (() => { 120 + if (!isSemble) return ""; 121 + const parts = collection.uri.split("/"); 122 + const rk = parts[parts.length - 1]; 123 + const h = collection.creator?.handle || ""; 124 + return `https://semble.so/profile/${h}/collections/${rk}`; 125 + })(); 117 126 118 127 return ( 119 128 <div className="animate-fade-in max-w-2xl mx-auto"> ··· 162 171 type="Collection" 163 172 text={collection.name} 164 173 /> 165 - {isOwner && ( 174 + {isOwner && !isSemble && ( 166 175 <> 167 176 <button 168 177 onClick={() => setIsEditModalOpen(true)} ··· 180 189 </button> 181 190 </> 182 191 )} 192 + {isSemble && ( 193 + <a 194 + href={sembleUrl} 195 + target="_blank" 196 + rel="noopener noreferrer" 197 + className="inline-flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium rounded-lg bg-surface-100 dark:bg-surface-800 text-surface-600 dark:text-surface-300 hover:bg-surface-200 dark:hover:bg-surface-700 transition-colors" 198 + > 199 + <img src="/semble-logo.svg" alt="" className="w-3.5 h-3.5" /> 200 + View in Semble 201 + <ExternalLink size={12} /> 202 + </a> 203 + )} 183 204 </div> 184 205 </div> 185 206 </div> ··· 209 230 items.map((item) => ( 210 231 <div key={item.uri} className="relative group"> 211 232 <Card item={item} hideShare /> 212 - {isOwner && item.collectionItemUri && ( 233 + {isOwner && !isSemble && item.collectionItemUri && ( 213 234 <button 214 235 className="absolute top-3 right-3 p-1.5 bg-white/90 dark:bg-surface-800/90 backdrop-blur text-surface-400 dark:text-surface-500 hover:text-red-500 dark:hover:text-red-400 rounded-lg shadow-sm transition-all" 215 236 onClick={() => handleRemoveItem(item)}
+251
web/src/views/core/Discover.tsx
··· 1 + import React, { useState, useEffect, useCallback, useRef } from "react"; 2 + import { Loader2, ExternalLink, Compass, Tag } from "lucide-react"; 3 + import { useStore } from "@nanostores/react"; 4 + import { clsx } from "clsx"; 5 + import { getDocuments, getRecommendations } from "../../api/client"; 6 + import type { DocumentItem } from "../../api/client"; 7 + import { Tabs, EmptyState } from "../../components/ui"; 8 + import LayoutToggle from "../../components/ui/LayoutToggle"; 9 + import { $user } from "../../store/auth"; 10 + import { $feedLayout } from "../../store/feedLayout"; 11 + import { formatDistanceToNow } from "date-fns"; 12 + 13 + export default function Discover() { 14 + const user = useStore($user); 15 + const layout = useStore($feedLayout); 16 + const [activeTab, setActiveTab] = useState("new"); 17 + const [items, setItems] = useState<DocumentItem[]>([]); 18 + const [loading, setLoading] = useState(true); 19 + const [hasMore, setHasMore] = useState(false); 20 + const [offset, setOffset] = useState(0); 21 + const [recommendationsUnavailable, setRecommendationsUnavailable] = 22 + useState(false); 23 + const fetchIdRef = useRef(0); 24 + const limit = 30; 25 + 26 + const tabs = [ 27 + { id: "new", label: "New" }, 28 + { id: "popular", label: "Popular" }, 29 + ...(user ? [{ id: "recommended", label: "For You" }] : []), 30 + ]; 31 + 32 + const fetchItems = useCallback( 33 + async (tab: string, newOffset = 0, append = false) => { 34 + const id = ++fetchIdRef.current; 35 + setLoading(true); 36 + 37 + let data: { items: DocumentItem[]; totalItems: number }; 38 + if (tab === "recommended") { 39 + const res = await getRecommendations(limit); 40 + if ("unavailable" in res && res.unavailable) { 41 + setRecommendationsUnavailable(true); 42 + setLoading(false); 43 + return; 44 + } 45 + setRecommendationsUnavailable(false); 46 + data = res; 47 + } else { 48 + data = await getDocuments({ sort: tab, limit, offset: newOffset }); 49 + } 50 + 51 + if (id !== fetchIdRef.current) return; 52 + 53 + setItems((prev) => (append ? [...prev, ...data.items] : data.items)); 54 + setHasMore( 55 + tab !== "recommended" && 56 + newOffset + data.items.length < data.totalItems, 57 + ); 58 + setOffset(newOffset + data.items.length); 59 + setLoading(false); 60 + }, 61 + [limit], 62 + ); 63 + 64 + useEffect(() => { 65 + setItems([]); 66 + setOffset(0); 67 + fetchItems(activeTab, 0); 68 + }, [activeTab, fetchItems]); 69 + 70 + const handleTabChange = (id: string) => { 71 + if (id === activeTab) return; 72 + setActiveTab(id); 73 + window.scrollTo({ top: 0, behavior: "smooth" }); 74 + }; 75 + 76 + const loadMore = () => { 77 + fetchItems(activeTab, offset, true); 78 + }; 79 + 80 + return ( 81 + <div className="mx-auto max-w-2xl xl:max-w-none"> 82 + <div className="sticky top-0 z-10 bg-white/95 dark:bg-surface-800/95 backdrop-blur-sm pb-3 mb-2 -mx-1 px-1 pt-1 space-y-2"> 83 + <div className="flex items-center gap-2"> 84 + <Tabs tabs={tabs} activeTab={activeTab} onChange={handleTabChange} /> 85 + <LayoutToggle className="hidden sm:inline-flex ml-auto" /> 86 + </div> 87 + </div> 88 + 89 + {loading && items.length === 0 ? ( 90 + <div className="flex justify-center py-20"> 91 + <Loader2 className="w-6 h-6 animate-spin text-surface-400" /> 92 + </div> 93 + ) : activeTab === "recommended" && recommendationsUnavailable ? ( 94 + <EmptyState 95 + icon={<Compass size={40} />} 96 + title="Coming soon" 97 + message="Personalized recommendations aren't available on this server yet." 98 + /> 99 + ) : items.length === 0 ? ( 100 + <EmptyState 101 + icon={<Compass size={40} />} 102 + title="Nothing here yet" 103 + message={ 104 + activeTab === "recommended" 105 + ? "Start annotating and highlighting to get personalized recommendations." 106 + : "No documents have been discovered yet. Check back soon!" 107 + } 108 + /> 109 + ) : ( 110 + <div 111 + className={clsx( 112 + layout === "mosaic" 113 + ? "columns-1 sm:columns-2 xl:columns-3 2xl:columns-4 gap-4" 114 + : "space-y-3", 115 + "animate-fade-in", 116 + )} 117 + > 118 + {items.map((doc) => ( 119 + <div 120 + key={doc.uri} 121 + className={ 122 + layout === "mosaic" ? "break-inside-avoid mb-4" : undefined 123 + } 124 + > 125 + <DocumentCard doc={doc} layout={layout} /> 126 + </div> 127 + ))} 128 + 129 + {loading && ( 130 + <div className="flex justify-center py-6"> 131 + <Loader2 className="w-5 h-5 animate-spin text-surface-400" /> 132 + </div> 133 + )} 134 + 135 + {hasMore && !loading && ( 136 + <button 137 + onClick={loadMore} 138 + className="w-full py-3 text-sm font-medium text-surface-500 hover:text-surface-700 dark:text-surface-400 dark:hover:text-surface-200 hover:bg-surface-100 dark:hover:bg-surface-800 rounded-lg transition-colors" 139 + > 140 + Load more 141 + </button> 142 + )} 143 + </div> 144 + )} 145 + </div> 146 + ); 147 + } 148 + 149 + function DocumentCard({ 150 + doc, 151 + layout = "list", 152 + }: { 153 + doc: DocumentItem; 154 + layout?: "list" | "mosaic"; 155 + }) { 156 + const [ogData, setOgData] = useState<{ 157 + title?: string; 158 + description?: string; 159 + image?: string; 160 + icon?: string; 161 + } | null>(null); 162 + 163 + useEffect(() => { 164 + if (!doc.canonicalUrl) return; 165 + fetch(`/api/url-metadata?url=${encodeURIComponent(doc.canonicalUrl)}`) 166 + .then((res) => (res.ok ? res.json() : null)) 167 + .then((data) => data && setOgData(data)) 168 + .catch(() => {}); 169 + }, [doc.canonicalUrl]); 170 + 171 + const displayUrl = doc.canonicalUrl 172 + .replace(/^https?:\/\//, "") 173 + .replace(/\/$/, ""); 174 + 175 + const hostname = (() => { 176 + try { 177 + return new URL(doc.canonicalUrl).hostname; 178 + } catch { 179 + return null; 180 + } 181 + })(); 182 + 183 + return ( 184 + <a 185 + href={doc.canonicalUrl} 186 + target="_blank" 187 + rel="noopener noreferrer" 188 + className="card block hover:ring-1 hover:ring-black/10 dark:hover:ring-white/10 transition-all group overflow-hidden" 189 + > 190 + {ogData?.image && ( 191 + <div className="w-full h-40 bg-surface-100 dark:bg-surface-800 overflow-hidden"> 192 + <img 193 + src={ogData.image} 194 + alt="" 195 + className="w-full h-full object-cover" 196 + onError={(e) => (e.currentTarget.style.display = "none")} 197 + /> 198 + </div> 199 + )} 200 + <div className="p-4"> 201 + <div className="flex items-start justify-between gap-3"> 202 + <div className="min-w-0 flex-1"> 203 + <h3 className="font-display font-semibold text-surface-900 dark:text-white group-hover:text-primary-600 dark:group-hover:text-primary-400 transition-colors line-clamp-2"> 204 + {doc.title || displayUrl} 205 + </h3> 206 + {doc.description && ( 207 + <p className="mt-1 text-sm text-surface-500 dark:text-surface-400 line-clamp-2"> 208 + {doc.description} 209 + </p> 210 + )} 211 + <div className="mt-2 flex items-center gap-3 text-xs text-surface-400 dark:text-surface-500"> 212 + <span className="flex items-center gap-1 truncate"> 213 + {ogData?.icon ? ( 214 + <img 215 + src={ogData.icon} 216 + alt="" 217 + className="w-3 h-3 rounded-sm" 218 + onError={(e) => (e.currentTarget.style.display = "none")} 219 + /> 220 + ) : ( 221 + <ExternalLink size={12} /> 222 + )} 223 + {hostname || displayUrl} 224 + </span> 225 + {doc.publishedAt && ( 226 + <span> 227 + {formatDistanceToNow(new Date(doc.publishedAt), { 228 + addSuffix: true, 229 + })} 230 + </span> 231 + )} 232 + </div> 233 + {doc.tags && doc.tags.length > 0 && ( 234 + <div className="mt-2 flex flex-wrap gap-1.5"> 235 + {doc.tags.slice(0, 5).map((tag) => ( 236 + <span 237 + key={tag} 238 + className="inline-flex items-center gap-1 px-2 py-1 text-xs font-medium rounded-md bg-surface-100 dark:bg-surface-800 text-surface-600 dark:text-surface-400" 239 + > 240 + <Tag size={10} /> 241 + {tag} 242 + </span> 243 + ))} 244 + </div> 245 + )} 246 + </div> 247 + </div> 248 + </div> 249 + </a> 250 + ); 251 + }