cli + tui to publish to leaflet (wip) & manage tasks, notes & watch/read lists 馃崈
charm leaflet readability golang
29
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 380 lines 11 kB view raw
1package documents 2 3import ( 4 "testing" 5 "time" 6) 7 8func TestTokenizer(t *testing.T) { 9 tokenizer := NewTokenizer() 10 11 t.Run("Basic tokenization", func(t *testing.T) { 12 t.Run("tokenizes simple text", func(t *testing.T) { 13 tokens := tokenizer.Tokenize("Hello World") 14 if len(tokens) != 2 { 15 t.Fatalf("expected 2 tokens, got %d", len(tokens)) 16 } 17 if tokens[0] != "hello" || tokens[1] != "world" { 18 t.Errorf("expected [hello world], got %v", tokens) 19 } 20 }) 21 22 t.Run("lowercases all tokens", func(t *testing.T) { 23 tokens := tokenizer.Tokenize("UPPERCASE MiXeD lowercase") 24 if len(tokens) != 3 { 25 t.Fatalf("expected 3 tokens, got %d", len(tokens)) 26 } 27 for _, token := range tokens { 28 if token != "uppercase" && token != "mixed" && token != "lowercase" { 29 t.Errorf("unexpected token: %s", token) 30 } 31 } 32 }) 33 34 t.Run("handles punctuation", func(t *testing.T) { 35 tokens := tokenizer.Tokenize("Hello, world! How are you?") 36 expected := []string{"hello", "world", "how", "are", "you"} 37 if len(tokens) != len(expected) { 38 t.Fatalf("expected %d tokens, got %d", len(expected), len(tokens)) 39 } 40 for i, token := range tokens { 41 if token != expected[i] { 42 t.Errorf("token %d: expected %s, got %s", i, expected[i], token) 43 } 44 } 45 }) 46 }) 47 48 t.Run("Unicode support", func(t *testing.T) { 49 t.Run("tokenizes unicode characters", func(t *testing.T) { 50 tokens := tokenizer.Tokenize("caf茅 r茅sum茅 na茂ve") 51 if len(tokens) != 3 { 52 t.Fatalf("expected 3 tokens, got %d", len(tokens)) 53 } 54 }) 55 56 t.Run("handles emoji and special characters", func(t *testing.T) { 57 tokens := tokenizer.Tokenize("hello 馃榾 world") 58 if len(tokens) != 2 { 59 t.Fatalf("expected 2 tokens (emoji excluded), got %d", len(tokens)) 60 } 61 if tokens[0] != "hello" || tokens[1] != "world" { 62 t.Errorf("expected [hello world], got %v", tokens) 63 } 64 }) 65 66 t.Run("tokenizes CJK characters", func(t *testing.T) { 67 tokens := tokenizer.Tokenize("浣犲ソ 涓栫晫") 68 if len(tokens) != 2 { 69 t.Fatalf("expected 2 tokens, got %d", len(tokens)) 70 } 71 }) 72 }) 73 74 t.Run("Numbers", func(t *testing.T) { 75 t.Run("tokenizes numbers", func(t *testing.T) { 76 tokens := tokenizer.Tokenize("test 123 456") 77 if len(tokens) != 3 { 78 t.Fatalf("expected 3 tokens, got %d", len(tokens)) 79 } 80 if tokens[1] != "123" || tokens[2] != "456" { 81 t.Errorf("expected numbers to be tokenized, got %v", tokens) 82 } 83 }) 84 85 t.Run("handles mixed alphanumeric", func(t *testing.T) { 86 tokens := tokenizer.Tokenize("version 2 released") 87 if len(tokens) != 3 { 88 t.Fatalf("expected 3 tokens, got %d", len(tokens)) 89 } 90 }) 91 }) 92 93 t.Run("Edge cases", func(t *testing.T) { 94 t.Run("handles empty string", func(t *testing.T) { 95 tokens := tokenizer.Tokenize("") 96 if len(tokens) != 0 { 97 t.Errorf("expected 0 tokens for empty string, got %d", len(tokens)) 98 } 99 }) 100 101 t.Run("handles whitespace only", func(t *testing.T) { 102 tokens := tokenizer.Tokenize(" \t\n ") 103 if len(tokens) != 0 { 104 t.Errorf("expected 0 tokens for whitespace, got %d", len(tokens)) 105 } 106 }) 107 108 t.Run("handles punctuation only", func(t *testing.T) { 109 tokens := tokenizer.Tokenize("!@#$%^&*()") 110 if len(tokens) != 0 { 111 t.Errorf("expected 0 tokens for punctuation only, got %d", len(tokens)) 112 } 113 }) 114 }) 115} 116 117func TestTokenFrequency(t *testing.T) { 118 t.Run("counts term frequencies", func(t *testing.T) { 119 tokens := []string{"hello", "world", "hello", "test"} 120 freq := TokenFrequency(tokens) 121 122 if freq["hello"] != 2 { 123 t.Errorf("expected hello frequency 2, got %d", freq["hello"]) 124 } 125 if freq["world"] != 1 { 126 t.Errorf("expected world frequency 1, got %d", freq["world"]) 127 } 128 if freq["test"] != 1 { 129 t.Errorf("expected test frequency 1, got %d", freq["test"]) 130 } 131 }) 132 133 t.Run("handles empty token list", func(t *testing.T) { 134 freq := TokenFrequency([]string{}) 135 if len(freq) != 0 { 136 t.Errorf("expected empty frequency map, got %d entries", len(freq)) 137 } 138 }) 139 140 t.Run("handles single token", func(t *testing.T) { 141 freq := TokenFrequency([]string{"single"}) 142 if freq["single"] != 1 { 143 t.Errorf("expected frequency 1, got %d", freq["single"]) 144 } 145 }) 146} 147 148func TestBuildIndex(t *testing.T) { 149 now := time.Now() 150 151 t.Run("builds index from documents", func(t *testing.T) { 152 docs := []Document{ 153 {ID: 1, Title: "Go Programming", Body: "Go is a great language", CreatedAt: now, DocKind: int64(NoteDoc)}, 154 {ID: 2, Title: "Python Guide", Body: "Python is versatile", CreatedAt: now, DocKind: int64(ArticleDoc)}, 155 } 156 157 idx := BuildIndex(docs) 158 159 if idx.NumDocs != 2 { 160 t.Errorf("expected NumDocs 2, got %d", idx.NumDocs) 161 } 162 163 if len(idx.DocLengths) != 2 { 164 t.Errorf("expected 2 document lengths, got %d", len(idx.DocLengths)) 165 } 166 167 if idx.DocLengths[1] <= 0 || idx.DocLengths[2] <= 0 { 168 t.Error("document lengths should be positive") 169 } 170 171 if _, exists := idx.Postings["go"]; !exists { 172 t.Error("expected 'go' to be in postings") 173 } 174 if _, exists := idx.Postings["python"]; !exists { 175 t.Error("expected 'python' to be in postings") 176 } 177 }) 178 179 t.Run("handles empty document list", func(t *testing.T) { 180 idx := BuildIndex([]Document{}) 181 if idx.NumDocs != 0 { 182 t.Errorf("expected NumDocs 0, got %d", idx.NumDocs) 183 } 184 if len(idx.Postings) != 0 { 185 t.Errorf("expected empty postings, got %d entries", len(idx.Postings)) 186 } 187 }) 188 189 t.Run("calculates term frequencies correctly", func(t *testing.T) { 190 docs := []Document{ 191 {ID: 1, Title: "test", Body: "test test test", CreatedAt: now, DocKind: int64(NoteDoc)}, 192 } 193 194 idx := BuildIndex(docs) 195 196 postings := idx.Postings["test"] 197 if len(postings) != 1 { 198 t.Fatalf("expected 1 posting for 'test', got %d", len(postings)) 199 } 200 201 if postings[0].TF != 4 { 202 t.Errorf("expected TF 4 (title + 3 in body), got %d", postings[0].TF) 203 } 204 }) 205 206 t.Run("builds postings for multiple documents with same term", func(t *testing.T) { 207 docs := []Document{ 208 {ID: 1, Title: "Go", Body: "Go is great", CreatedAt: now, DocKind: int64(NoteDoc)}, 209 {ID: 2, Title: "Go Tutorial", Body: "Learn Go", CreatedAt: now, DocKind: int64(NoteDoc)}, 210 } 211 212 idx := BuildIndex(docs) 213 214 postings := idx.Postings["go"] 215 if len(postings) != 2 { 216 t.Fatalf("expected 2 postings for 'go', got %d", len(postings)) 217 } 218 }) 219} 220 221func TestIndexSearch(t *testing.T) { 222 now := time.Now() 223 224 t.Run("Search functionality", func(t *testing.T) { 225 t.Run("returns empty results for empty query", func(t *testing.T) { 226 docs := []Document{ 227 {ID: 1, Title: "Test", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 228 } 229 idx := BuildIndex(docs) 230 231 results, err := idx.Search("", 10) 232 if err != nil { 233 t.Fatalf("unexpected error: %v", err) 234 } 235 if len(results) != 0 { 236 t.Errorf("expected 0 results for empty query, got %d", len(results)) 237 } 238 }) 239 240 t.Run("finds matching documents", func(t *testing.T) { 241 docs := []Document{ 242 {ID: 1, Title: "Go Programming", Body: "Learn Go language", CreatedAt: now, DocKind: int64(NoteDoc)}, 243 {ID: 2, Title: "Python Guide", Body: "Python is versatile", CreatedAt: now, DocKind: int64(ArticleDoc)}, 244 } 245 idx := BuildIndex(docs) 246 247 results, err := idx.Search("go", 10) 248 if err != nil { 249 t.Fatalf("unexpected error: %v", err) 250 } 251 252 if len(results) != 1 { 253 t.Fatalf("expected 1 result, got %d", len(results)) 254 } 255 256 if results[0].DocID != 1 { 257 t.Errorf("expected DocID 1, got %d", results[0].DocID) 258 } 259 260 if results[0].Score <= 0 { 261 t.Error("expected positive score") 262 } 263 }) 264 265 t.Run("ranks documents by relevance", func(t *testing.T) { 266 docs := []Document{ 267 {ID: 1, Title: "Go", Body: "tutorial python rust", CreatedAt: now, DocKind: int64(NoteDoc)}, 268 {ID: 2, Title: "Go Programming", Body: "advanced go tutorial", CreatedAt: now, DocKind: int64(NoteDoc)}, 269 {ID: 3, Title: "Python", Body: "different language", CreatedAt: now, DocKind: int64(NoteDoc)}, 270 } 271 idx := BuildIndex(docs) 272 273 results, err := idx.Search("go", 10) 274 if err != nil { 275 t.Fatalf("unexpected error: %v", err) 276 } 277 278 if len(results) != 2 { 279 t.Fatalf("expected 2 results, got %d", len(results)) 280 } 281 282 if results[0].DocID != 2 { 283 t.Errorf("expected document 2 to rank higher (has more 'go' terms)") 284 } 285 286 if results[0].Score <= results[1].Score { 287 t.Errorf("expected first result to have higher score, got %f <= %f", results[0].Score, results[1].Score) 288 } 289 }) 290 291 t.Run("respects limit parameter", func(t *testing.T) { 292 docs := []Document{ 293 {ID: 1, Title: "test one", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 294 {ID: 2, Title: "test two", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 295 {ID: 3, Title: "test three", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 296 } 297 idx := BuildIndex(docs) 298 299 results, err := idx.Search("test", 2) 300 if err != nil { 301 t.Fatalf("unexpected error: %v", err) 302 } 303 304 if len(results) != 2 { 305 t.Errorf("expected 2 results with limit=2, got %d", len(results)) 306 } 307 }) 308 309 t.Run("handles multi-term queries", func(t *testing.T) { 310 docs := []Document{ 311 {ID: 1, Title: "Go Programming", Body: "advanced tutorial", CreatedAt: now, DocKind: int64(NoteDoc)}, 312 {ID: 2, Title: "Go Basics", Body: "beginner tutorial", CreatedAt: now, DocKind: int64(NoteDoc)}, 313 {ID: 3, Title: "Python", Body: "different language", CreatedAt: now, DocKind: int64(NoteDoc)}, 314 } 315 idx := BuildIndex(docs) 316 317 results, err := idx.Search("go tutorial", 10) 318 if err != nil { 319 t.Fatalf("unexpected error: %v", err) 320 } 321 322 if len(results) != 2 { 323 t.Errorf("expected 2 results, got %d", len(results)) 324 } 325 }) 326 327 t.Run("returns no results for non-matching query", func(t *testing.T) { 328 docs := []Document{ 329 {ID: 1, Title: "Go", Body: "programming", CreatedAt: now, DocKind: int64(NoteDoc)}, 330 } 331 idx := BuildIndex(docs) 332 333 results, err := idx.Search("rust", 10) 334 if err != nil { 335 t.Fatalf("unexpected error: %v", err) 336 } 337 338 if len(results) != 0 { 339 t.Errorf("expected 0 results for non-matching query, got %d", len(results)) 340 } 341 }) 342 343 t.Run("handles zero limit", func(t *testing.T) { 344 docs := []Document{ 345 {ID: 1, Title: "test", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 346 } 347 idx := BuildIndex(docs) 348 349 results, err := idx.Search("test", 0) 350 if err != nil { 351 t.Fatalf("unexpected error: %v", err) 352 } 353 354 if len(results) != 1 { 355 t.Errorf("expected all results with limit=0, got %d", len(results)) 356 } 357 }) 358 359 t.Run("tie-breaking uses DocID", func(t *testing.T) { 360 docs := []Document{ 361 {ID: 1, Title: "test", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 362 {ID: 2, Title: "test", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 363 } 364 idx := BuildIndex(docs) 365 366 results, err := idx.Search("test", 10) 367 if err != nil { 368 t.Fatalf("unexpected error: %v", err) 369 } 370 371 if len(results) != 2 { 372 t.Fatalf("expected 2 results, got %d", len(results)) 373 } 374 375 if results[0].DocID <= results[1].DocID { 376 t.Error("expected higher DocID first when scores are equal") 377 } 378 }) 379 }) 380}