this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add SQLite FTS5 full-text search

Replace LIKE '%term%' table scans with FTS5 indexed search for
SQLite. Uses external content FTS5 tables with database triggers
for automatic sync. MySQL retains existing LIKE-based fallback.

- Create FTS5 virtual tables for ircLink (title, url) and quote
(quote, author) with triggers for INSERT/UPDATE/DELETE sync
- Auto-rebuild FTS index from existing data on first run
- Sanitize user input with quoted word tokenization for safe
FTS5 queries (implicit AND, special character escaping)
- Switch test SQLite driver from CGO-based gorm.io/driver/sqlite
to pure-Go glebarez/sqlite to match production driver

+372 -18
+1 -1
internal/data/archive_test.go
··· 5 5 "testing" 6 6 "time" 7 7 8 - "gorm.io/driver/sqlite" 8 + "github.com/glebarez/sqlite" 9 9 "gorm.io/gorm" 10 10 ) 11 11
+113
internal/data/fts.go
··· 1 + package data 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "strings" 7 + ) 8 + 9 + // bootstrapFTS creates FTS5 virtual tables and triggers for SQLite. 10 + // All statements are idempotent (IF NOT EXISTS). 11 + func (s *GormStore) bootstrapFTS(ctx context.Context) error { 12 + // Check which FTS tables need to be created (and thus need initial population). 13 + // We check before creating because CREATE VIRTUAL TABLE IF NOT EXISTS 14 + // doesn't tell us whether it actually created the table. 15 + ftsTables := []struct { 16 + name string 17 + content string 18 + }{ 19 + {"ircLink_fts", "ircLink"}, 20 + {"quote_fts", "quote"}, 21 + } 22 + needsRebuild := make(map[string]bool) 23 + for _, t := range ftsTables { 24 + var exists int64 25 + if err := s.db.WithContext(ctx).Raw( 26 + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?", t.name, 27 + ).Scan(&exists).Error; err != nil { 28 + return fmt.Errorf("checking FTS table %s: %w", t.name, err) 29 + } 30 + needsRebuild[t.name] = (exists == 0) 31 + } 32 + 33 + statements := []string{ 34 + // FTS5 virtual tables (external content — no data duplication) 35 + `CREATE VIRTUAL TABLE IF NOT EXISTS ircLink_fts USING fts5( 36 + title, url, 37 + content='ircLink', 38 + content_rowid='ircLinkID' 39 + )`, 40 + `CREATE VIRTUAL TABLE IF NOT EXISTS quote_fts USING fts5( 41 + quote, author, 42 + content='quote', 43 + content_rowid='quoteID' 44 + )`, 45 + 46 + // ircLink triggers 47 + `CREATE TRIGGER IF NOT EXISTS ircLink_fts_ai AFTER INSERT ON ircLink BEGIN 48 + INSERT INTO ircLink_fts(rowid, title, url) VALUES (new.ircLinkID, new.title, new.url); 49 + END`, 50 + `CREATE TRIGGER IF NOT EXISTS ircLink_fts_ad AFTER DELETE ON ircLink BEGIN 51 + INSERT INTO ircLink_fts(ircLink_fts, rowid, title, url) VALUES('delete', old.ircLinkID, old.title, old.url); 52 + END`, 53 + `CREATE TRIGGER IF NOT EXISTS ircLink_fts_au AFTER UPDATE ON ircLink BEGIN 54 + INSERT INTO ircLink_fts(ircLink_fts, rowid, title, url) VALUES('delete', old.ircLinkID, old.title, old.url); 55 + INSERT INTO ircLink_fts(rowid, title, url) VALUES (new.ircLinkID, new.title, new.url); 56 + END`, 57 + 58 + // quote triggers 59 + `CREATE TRIGGER IF NOT EXISTS quote_fts_ai AFTER INSERT ON quote BEGIN 60 + INSERT INTO quote_fts(rowid, quote, author) VALUES (new.quoteID, new.quote, new.author); 61 + END`, 62 + `CREATE TRIGGER IF NOT EXISTS quote_fts_ad AFTER DELETE ON quote BEGIN 63 + INSERT INTO quote_fts(quote_fts, rowid, quote, author) VALUES('delete', old.quoteID, old.quote, old.author); 64 + END`, 65 + `CREATE TRIGGER IF NOT EXISTS quote_fts_au AFTER UPDATE ON quote BEGIN 66 + INSERT INTO quote_fts(quote_fts, rowid, quote, author) VALUES('delete', old.quoteID, old.quote, old.author); 67 + INSERT INTO quote_fts(rowid, quote, author) VALUES (new.quoteID, new.quote, new.author); 68 + END`, 69 + } 70 + 71 + for _, stmt := range statements { 72 + if err := s.db.WithContext(ctx).Exec(stmt).Error; err != nil { 73 + return fmt.Errorf("FTS5 setup failed: %w", err) 74 + } 75 + } 76 + 77 + // Rebuild FTS indexes for any newly created tables that have existing content data. 78 + for _, t := range ftsTables { 79 + if !needsRebuild[t.name] { 80 + continue 81 + } 82 + var contentCount int64 83 + if err := s.db.WithContext(ctx).Raw("SELECT COUNT(*) FROM " + t.content).Scan(&contentCount).Error; err != nil { 84 + return fmt.Errorf("counting %s: %w", t.content, err) 85 + } 86 + if contentCount == 0 { 87 + continue 88 + } 89 + if err := s.db.WithContext(ctx).Exec( 90 + "INSERT INTO " + t.name + "(" + t.name + ") VALUES('rebuild')", 91 + ).Error; err != nil { 92 + return fmt.Errorf("rebuilding %s: %w", t.name, err) 93 + } 94 + } 95 + 96 + return nil 97 + } 98 + 99 + // buildFTSQuery converts user input into a safe FTS5 query string. 100 + // Each word is double-quoted to escape FTS5 special characters. 101 + // Multiple words use implicit AND semantics. 102 + func buildFTSQuery(input string) string { 103 + words := strings.Fields(input) 104 + if len(words) == 0 { 105 + return "" 106 + } 107 + quoted := make([]string, len(words)) 108 + for i, w := range words { 109 + w = strings.ReplaceAll(w, `"`, `""`) 110 + quoted[i] = `"` + w + `"` 111 + } 112 + return strings.Join(quoted, " ") 113 + }
+205
internal/data/fts_test.go
··· 1 + package data 2 + 3 + import ( 4 + "context" 5 + "testing" 6 + ) 7 + 8 + func TestBuildFTSQuery(t *testing.T) { 9 + tests := []struct { 10 + input string 11 + expected string 12 + }{ 13 + {"hello world", `"hello" "world"`}, 14 + {"", ""}, 15 + {"single", `"single"`}, 16 + {`has "quotes" inside`, `"has" """quotes""" "inside"`}, 17 + {"special*chars(here)", `"special*chars(here)"`}, 18 + {" extra spaces ", `"extra" "spaces"`}, 19 + {"UPPER lower MiXeD", `"UPPER" "lower" "MiXeD"`}, 20 + } 21 + 22 + for _, tt := range tests { 23 + result := buildFTSQuery(tt.input) 24 + if result != tt.expected { 25 + t.Errorf("buildFTSQuery(%q) = %q, want %q", tt.input, result, tt.expected) 26 + } 27 + } 28 + } 29 + 30 + func TestFTS5_BootstrapCreatesTables(t *testing.T) { 31 + store := newTestStore(t) 32 + ctx := context.Background() 33 + 34 + // Verify FTS tables exist by querying them 35 + var count int64 36 + err := store.db.WithContext(ctx).Raw(`SELECT COUNT(*) FROM ircLink_fts`).Scan(&count).Error 37 + if err != nil { 38 + t.Fatalf("ircLink_fts table should exist after Bootstrap: %v", err) 39 + } 40 + 41 + err = store.db.WithContext(ctx).Raw(`SELECT COUNT(*) FROM quote_fts`).Scan(&count).Error 42 + if err != nil { 43 + t.Fatalf("quote_fts table should exist after Bootstrap: %v", err) 44 + } 45 + } 46 + 47 + func TestFTS5_TriggersPopulateOnInsert(t *testing.T) { 48 + store := newTestStore(t) 49 + ctx := context.Background() 50 + 51 + _, err := store.InsertIRCLink(ctx, &IRCLink{ 52 + User: "alice", Title: "FTS Trigger Test", URL: "http://fts.example.com", ContentType: "text/html", 53 + }) 54 + if err != nil { 55 + t.Fatalf("InsertIRCLink failed: %v", err) 56 + } 57 + 58 + var count int64 59 + err = store.db.WithContext(ctx).Raw(`SELECT COUNT(*) FROM ircLink_fts WHERE ircLink_fts MATCH '"Trigger"'`).Scan(&count).Error 60 + if err != nil { 61 + t.Fatalf("FTS query failed: %v", err) 62 + } 63 + if count != 1 { 64 + t.Errorf("expected 1 FTS match after insert, got %d", count) 65 + } 66 + } 67 + 68 + func TestFTS5_TriggersRemoveOnDelete(t *testing.T) { 69 + store := newTestStore(t) 70 + ctx := context.Background() 71 + 72 + id, err := store.InsertIRCLink(ctx, &IRCLink{ 73 + User: "alice", Title: "Deletable Link", URL: "http://delete.example.com", ContentType: "text/html", 74 + }) 75 + if err != nil { 76 + t.Fatalf("InsertIRCLink failed: %v", err) 77 + } 78 + 79 + if err := store.DeleteIRCLink(ctx, id); err != nil { 80 + t.Fatalf("DeleteIRCLink failed: %v", err) 81 + } 82 + 83 + var count int64 84 + err = store.db.WithContext(ctx).Raw(`SELECT COUNT(*) FROM ircLink_fts WHERE ircLink_fts MATCH '"Deletable"'`).Scan(&count).Error 85 + if err != nil { 86 + t.Fatalf("FTS query failed: %v", err) 87 + } 88 + if count != 0 { 89 + t.Errorf("expected 0 FTS matches after delete, got %d", count) 90 + } 91 + } 92 + 93 + func TestFTS5_QuoteTriggersPopulateOnInsert(t *testing.T) { 94 + store := newTestStore(t) 95 + ctx := context.Background() 96 + 97 + _, err := store.InsertQuote(ctx, &Quote{Quote: "FTS quote trigger test", Author: "tester", Poster: "poster"}) 98 + if err != nil { 99 + t.Fatalf("InsertQuote failed: %v", err) 100 + } 101 + 102 + var count int64 103 + err = store.db.WithContext(ctx).Raw(`SELECT COUNT(*) FROM quote_fts WHERE quote_fts MATCH '"trigger"'`).Scan(&count).Error 104 + if err != nil { 105 + t.Fatalf("FTS query failed: %v", err) 106 + } 107 + if count != 1 { 108 + t.Errorf("expected 1 FTS match after insert, got %d", count) 109 + } 110 + } 111 + 112 + func TestFTS5_WordBoundaryMatching(t *testing.T) { 113 + store := newTestStore(t) 114 + ctx := context.Background() 115 + 116 + _, err := store.InsertIRCLink(ctx, &IRCLink{ 117 + User: "alice", Title: "Golang Tutorial Guide", URL: "http://example.com/golang", ContentType: "text/html", 118 + }) 119 + if err != nil { 120 + t.Fatalf("InsertIRCLink failed: %v", err) 121 + } 122 + 123 + // Whole word matches 124 + links, err := store.SearchIRCLinks(ctx, "Golang", ClientFilter{}) 125 + if err != nil { 126 + t.Fatalf("SearchIRCLinks failed: %v", err) 127 + } 128 + if len(links) != 1 { 129 + t.Errorf("expected 1 result for whole word 'Golang', got %d", len(links)) 130 + } 131 + 132 + // Substring should NOT match with FTS5 133 + links, err = store.SearchIRCLinks(ctx, "olan", ClientFilter{}) 134 + if err != nil { 135 + t.Fatalf("SearchIRCLinks failed: %v", err) 136 + } 137 + if len(links) != 0 { 138 + t.Errorf("expected 0 results for substring 'olan' with FTS5, got %d", len(links)) 139 + } 140 + } 141 + 142 + func TestFTS5_MultiWordSearch(t *testing.T) { 143 + store := newTestStore(t) 144 + ctx := context.Background() 145 + 146 + _, err := store.InsertIRCLink(ctx, &IRCLink{ 147 + User: "alice", Title: "Golang Tutorial Guide", URL: "http://example.com/go", ContentType: "text/html", 148 + }) 149 + if err != nil { 150 + t.Fatalf("InsertIRCLink failed: %v", err) 151 + } 152 + _, err = store.InsertIRCLink(ctx, &IRCLink{ 153 + User: "bob", Title: "Rust Tutorial", URL: "http://example.com/rust", ContentType: "text/html", 154 + }) 155 + if err != nil { 156 + t.Fatalf("InsertIRCLink failed: %v", err) 157 + } 158 + 159 + // Multi-word AND: both words must appear 160 + links, err := store.SearchIRCLinks(ctx, "Golang Tutorial", ClientFilter{}) 161 + if err != nil { 162 + t.Fatalf("SearchIRCLinks failed: %v", err) 163 + } 164 + if len(links) != 1 { 165 + t.Errorf("expected 1 result for 'Golang Tutorial', got %d", len(links)) 166 + } 167 + if len(links) > 0 && links[0].Title != "Golang Tutorial Guide" { 168 + t.Errorf("expected 'Golang Tutorial Guide', got %q", links[0].Title) 169 + } 170 + } 171 + 172 + func TestFTS5_RebuildFromExistingData(t *testing.T) { 173 + store := newTestStore(t) 174 + ctx := context.Background() 175 + 176 + // Insert data (triggers will populate FTS) 177 + _, err := store.InsertIRCLink(ctx, &IRCLink{ 178 + User: "alice", Title: "Pre-existing Link", URL: "http://example.com/old", ContentType: "text/html", 179 + }) 180 + if err != nil { 181 + t.Fatalf("InsertIRCLink failed: %v", err) 182 + } 183 + 184 + // Drop FTS tables and triggers to simulate a pre-FTS database 185 + store.db.Exec("DROP TABLE IF EXISTS ircLink_fts") 186 + store.db.Exec("DROP TABLE IF EXISTS quote_fts") 187 + store.db.Exec("DROP TRIGGER IF EXISTS ircLink_fts_ai") 188 + store.db.Exec("DROP TRIGGER IF EXISTS ircLink_fts_ad") 189 + store.db.Exec("DROP TRIGGER IF EXISTS ircLink_fts_au") 190 + store.db.Exec("DROP TRIGGER IF EXISTS quote_fts_ai") 191 + store.db.Exec("DROP TRIGGER IF EXISTS quote_fts_ad") 192 + store.db.Exec("DROP TRIGGER IF EXISTS quote_fts_au") 193 + 194 + // Re-run bootstrapFTS — should detect missing tables and rebuild from existing data 195 + if err := store.bootstrapFTS(ctx); err != nil { 196 + t.Fatalf("bootstrapFTS failed: %v", err) 197 + } 198 + 199 + // Verify FTS index was populated from existing content 200 + var count int64 201 + store.db.WithContext(ctx).Raw(`SELECT COUNT(*) FROM ircLink_fts WHERE ircLink_fts MATCH '"Pre-existing"'`).Scan(&count) 202 + if count != 1 { 203 + t.Errorf("expected 1 FTS match after rebuild, got %d", count) 204 + } 205 + }
+51 -15
internal/data/gorm_store.go
··· 36 36 } 37 37 38 38 func (s *GormStore) Bootstrap(ctx context.Context) error { 39 - return s.db.AutoMigrate(&IRCLink{}, &Image{}, &Quote{}, &LinkPreview{}, &Tag{}, &ArchiveLookup{}) 39 + if err := s.db.AutoMigrate(&IRCLink{}, &Image{}, &Quote{}, &LinkPreview{}, &Tag{}, &ArchiveLookup{}); err != nil { 40 + return err 41 + } 42 + 43 + if s.db.Dialector.Name() == "sqlite" { 44 + if err := s.bootstrapFTS(ctx); err != nil { 45 + return fmt.Errorf("FTS5 bootstrap failed: %w", err) 46 + } 47 + } 48 + return nil 40 49 } 41 50 42 51 func applyClientFilter(query *gorm.DB, filter ClientFilter) *gorm.DB { ··· 132 141 133 142 func (s *GormStore) SearchIRCLinks(ctx context.Context, query string, filter ClientFilter) ([]IRCLink, error) { 134 143 var links []IRCLink 135 - // Simple LIKE search for cross-db compatibility 136 - term := "%" + escapeLike(query) + "%" 144 + tagTerm := "%" + escapeLike(query) + "%" 145 + 137 146 // Exclude links with cached error previews using tiered TTLs: 138 147 // - Recent links (< 10 days old): error cache expires after 24h 139 148 // - Old links (>= 10 days old): error cache expires after 60 days 140 - // CAST is required because glebarez/sqlite stores []byte as BLOB, 141 - // and SQLite's LIKE doesn't match text patterns against BLOBs. 142 - // MySQL doesn't support CAST(... AS TEXT), so use CHAR instead. 143 149 castType := "TEXT" 144 150 if s.db.Dialector.Name() == "mysql" { 145 151 castType = "CHAR" ··· 147 153 recentCutoff := time.Now().Add(-24 * time.Hour) 148 154 oldCutoff := time.Now().Add(-60 * 24 * time.Hour) 149 155 linkAgeCutoff := time.Now().Add(-10 * 24 * time.Hour) 150 - q := s.db.WithContext(ctx). 151 - Where(`(title LIKE ? OR url LIKE ? OR ircLinkID IN (SELECT resource_id FROM tags WHERE resource_type = 'link' AND tag LIKE ?)) 152 - AND url NOT IN ( 156 + 157 + errorExclusion := `AND url NOT IN ( 153 158 SELECT lp.url FROM link_previews lp 154 - WHERE CAST(lp.data AS `+castType+`) LIKE '%"error":%' 159 + WHERE CAST(lp.data AS ` + castType + `) LIKE '%"error":%' 155 160 AND ( 156 161 (EXISTS (SELECT 1 FROM ircLink il WHERE il.url = lp.url AND il.timestamp > ?) AND lp.updated_at > ?) 157 162 OR 158 163 (NOT EXISTS (SELECT 1 FROM ircLink il WHERE il.url = lp.url AND il.timestamp > ?) AND lp.updated_at > ?) 159 164 ) 160 - )`, term, term, term, linkAgeCutoff, recentCutoff, linkAgeCutoff, oldCutoff) 165 + )` 166 + 167 + var q *gorm.DB 168 + if s.db.Dialector.Name() == "sqlite" { 169 + ftsQuery := buildFTSQuery(query) 170 + q = s.db.WithContext(ctx). 171 + Where(`(ircLinkID IN (SELECT rowid FROM ircLink_fts WHERE ircLink_fts MATCH ?) 172 + OR ircLinkID IN (SELECT resource_id FROM tags WHERE resource_type = 'link' AND tag LIKE ?)) 173 + `+errorExclusion, 174 + ftsQuery, tagTerm, 175 + linkAgeCutoff, recentCutoff, linkAgeCutoff, oldCutoff) 176 + } else { 177 + term := "%" + escapeLike(query) + "%" 178 + q = s.db.WithContext(ctx). 179 + Where(`(title LIKE ? OR url LIKE ? OR ircLinkID IN (SELECT resource_id FROM tags WHERE resource_type = 'link' AND tag LIKE ?)) 180 + `+errorExclusion, 181 + term, term, tagTerm, 182 + linkAgeCutoff, recentCutoff, linkAgeCutoff, oldCutoff) 183 + } 184 + 161 185 q = applyClientFilter(q, filter) 162 186 err := q.Order("clicks DESC"). 163 187 Limit(50). ··· 167 191 168 192 func (s *GormStore) SearchQuotes(ctx context.Context, query string, filter ClientFilter) ([]Quote, error) { 169 193 var quotes []Quote 170 - // Simple LIKE search for cross-db compatibility 171 - term := "%" + escapeLike(query) + "%" 172 - q := s.db.WithContext(ctx). 173 - Where("quote LIKE ? OR author LIKE ? OR quoteID IN (SELECT resource_id FROM tags WHERE resource_type = 'quote' AND tag LIKE ?)", term, term, term) 194 + tagTerm := "%" + escapeLike(query) + "%" 195 + 196 + var q *gorm.DB 197 + if s.db.Dialector.Name() == "sqlite" { 198 + ftsQuery := buildFTSQuery(query) 199 + q = s.db.WithContext(ctx). 200 + Where(`quoteID IN (SELECT rowid FROM quote_fts WHERE quote_fts MATCH ?) 201 + OR quoteID IN (SELECT resource_id FROM tags WHERE resource_type = 'quote' AND tag LIKE ?)`, 202 + ftsQuery, tagTerm) 203 + } else { 204 + term := "%" + escapeLike(query) + "%" 205 + q = s.db.WithContext(ctx). 206 + Where("quote LIKE ? OR author LIKE ? OR quoteID IN (SELECT resource_id FROM tags WHERE resource_type = 'quote' AND tag LIKE ?)", 207 + term, term, tagTerm) 208 + } 209 + 174 210 q = applyClientFilter(q, filter) 175 211 err := q.Order("timestamp DESC"). 176 212 Limit(50).
+1 -1
internal/data/hot_test.go
··· 5 5 "testing" 6 6 "time" 7 7 8 - "gorm.io/driver/sqlite" 8 + "github.com/glebarez/sqlite" 9 9 "gorm.io/gorm" 10 10 ) 11 11
+1 -1
internal/data/preview_test.go
··· 5 5 "testing" 6 6 "time" 7 7 8 - "gorm.io/driver/sqlite" 8 + "github.com/glebarez/sqlite" 9 9 "gorm.io/gorm" 10 10 ) 11 11