Monorepo for Tangled tangled.org
772
fork

Configure Feed

Select the types of activity you want to include in your feed.

appview/indexer: add numeric fields to repo index for sorting

to sort by number of stars/issues, we have to throw these into the bleve
index. as a result, we also need to reindex repos when new
issues/stars/pulls land on a repo.

Signed-off-by: oppiliappan <me@oppi.li>

+113 -6
+113 -6
appview/indexer/repos/indexer.go
··· 6 6 "errors" 7 7 "log" 8 8 "os" 9 + "time" 9 10 10 11 "github.com/blevesearch/bleve/v2" 11 12 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" ··· 16 17 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 17 18 "github.com/blevesearch/bleve/v2/index/upsidedown" 18 19 "github.com/blevesearch/bleve/v2/mapping" 20 + "github.com/blevesearch/bleve/v2/search" 19 21 "github.com/blevesearch/bleve/v2/search/query" 20 22 "tangled.org/core/appview/db" 21 23 "tangled.org/core/appview/indexer/base36" ··· 32 34 unicodeNormalizeName = "unicodeNormalize" 33 35 34 36 // Bump this when the index mapping changes to trigger a rebuild. 35 - repoIndexerVersion = 5 37 + repoIndexerVersion = 6 36 38 ) 37 39 38 40 type Indexer struct { ··· 89 91 trigramFieldMapping.IncludeInAll = false 90 92 trigramFieldMapping.Analyzer = "trigram" 91 93 94 + // numeric field mapping for sorting by counts 95 + numericFieldMapping := bleve.NewNumericFieldMapping() 96 + numericFieldMapping.Store = false 97 + numericFieldMapping.IncludeInAll = false 98 + numericFieldMapping.DocValues = true // required for sorting 99 + 100 + // datetime field mapping for sorting by creation date 101 + dateFieldMapping := bleve.NewDateTimeFieldMapping() 102 + dateFieldMapping.Store = false 103 + dateFieldMapping.IncludeInAll = false 104 + dateFieldMapping.DocValues = true // required for sorting 105 + 106 + // boolean field mapping for fork detection 107 + booleanFieldMapping := bleve.NewBooleanFieldMapping() 108 + booleanFieldMapping.Store = false 109 + booleanFieldMapping.IncludeInAll = false 110 + 92 111 // text fields 93 112 docMapping.AddFieldMappingsAt("name", textFieldMapping) 94 113 docMapping.AddFieldMappingsAt("name_trigram", trigramFieldMapping) ··· 103 122 docMapping.AddFieldMappingsAt("knot", keywordFieldMapping) 104 123 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 105 124 125 + // fork indicator for down-ranking 126 + docMapping.AddFieldMappingsAt("is_fork", booleanFieldMapping) 127 + 128 + // sortable numeric fields 129 + docMapping.AddFieldMappingsAt("star_count", numericFieldMapping) 130 + docMapping.AddFieldMappingsAt("issue_count", numericFieldMapping) 131 + docMapping.AddFieldMappingsAt("pull_count", numericFieldMapping) 132 + 133 + // sortable date field 134 + docMapping.AddFieldMappingsAt("created", dateFieldMapping) 135 + 106 136 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 107 137 "type": unicodenorm.Name, 108 138 "form": unicodenorm.NFC, ··· 238 268 TopicsExact []string `json:"topics_exact"` 239 269 Knot string `json:"knot"` 240 270 Language string `json:"language"` 271 + IsFork bool `json:"is_fork"` 272 + 273 + // sortable fields 274 + StarCount int `json:"star_count"` 275 + IssueCount int `json:"issue_count"` 276 + PullCount int `json:"pull_count"` 277 + Created time.Time `json:"created"` 241 278 } 242 279 243 280 func makeRepoData(repo *models.Repo) *repoData { 244 281 var language string 282 + var starCount, issueCount, pullCount int 283 + 245 284 if repo.RepoStats != nil { 246 285 language = repo.RepoStats.Language 286 + starCount = repo.RepoStats.StarCount 287 + issueCount = repo.RepoStats.IssueCount.Open + repo.RepoStats.IssueCount.Closed 288 + pullCount = repo.RepoStats.PullCount.Open + 289 + repo.RepoStats.PullCount.Merged + 290 + repo.RepoStats.PullCount.Closed 247 291 } 292 + 293 + isFork := repo.Source != "" 294 + 248 295 return &repoData{ 249 296 ID: repo.Id, 250 297 RepoAt: repo.RepoAt().String(), ··· 257 304 TopicsExact: repo.Topics, 258 305 Knot: repo.Knot, 259 306 Language: language, 307 + IsFork: isFork, 308 + StarCount: starCount, 309 + IssueCount: issueCount, 310 + PullCount: pullCount, 311 + Created: repo.Created, 260 312 } 261 313 } 262 314 ··· 266 318 } 267 319 268 320 type SearchResult struct { 269 - Hits []int64 270 - Total uint64 321 + Hits []int64 322 + Total uint64 323 + Duration time.Duration 271 324 } 272 325 273 326 const maxBatchSize = 20 ··· 287 340 return ix.indexer.Delete(base36.Encode(repoID)) 288 341 } 289 342 343 + func (ix *Indexer) TotalDocCount() (uint64, error) { 344 + return ix.indexer.DocCount() 345 + } 346 + 290 347 func (ix *Indexer) Search(ctx context.Context, opts models.RepoSearchOptions) (*SearchResult, error) { 291 348 var musts []query.Query 292 349 var mustNots []query.Query ··· 355 412 } 356 413 indexerQuery.AddMust(musts...) 357 414 indexerQuery.AddMustNot(mustNots...) 358 - searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false) 415 + 416 + // use a disjunction where: 417 + // - Non-forks get normal relevance score 418 + // - Forks match but get penalized with lower boost 419 + finalQuery := bleve.NewDisjunctionQuery() 420 + 421 + // add the main query 422 + finalQuery.AddQuery(indexerQuery) 423 + 424 + // add a boosted query for non-forks 425 + notForkQuery := bleve.NewBooleanQuery() 426 + notForkQuery.AddMust(indexerQuery) 427 + isForkQuery := bleve.NewBoolFieldQuery(true) 428 + isForkQuery.SetField("is_fork") 429 + notForkQuery.AddMustNot(isForkQuery) 430 + notForkQuery.SetBoost(2.0) 431 + finalQuery.AddQuery(notForkQuery) 432 + 433 + // use minimum of 1 to ensure all results match at least one clause 434 + finalQuery.SetMin(1) 435 + 436 + searchReq := bleve.NewSearchRequestOptions(finalQuery, opts.Page.Limit, opts.Page.Offset, false) 437 + 438 + if opts.SortField != "" && opts.SortField != "relevance" { 439 + var sortField string 440 + 441 + switch opts.SortField { 442 + case "created": 443 + sortField = "created" 444 + case "stars": 445 + sortField = "star_count" 446 + case "issues": 447 + sortField = "issue_count" 448 + case "pulls": 449 + sortField = "pull_count" 450 + default: 451 + // invalid field, fall back to relevance 452 + sortField = "" 453 + } 454 + 455 + if sortField != "" { 456 + searchReq.SortByCustom(search.SortOrder{ 457 + &search.SortField{ 458 + Field: sortField, 459 + Desc: opts.SortDesc, 460 + }, 461 + }) 462 + } 463 + } 464 + 359 465 res, err := ix.indexer.SearchInContext(ctx, searchReq) 360 466 if err != nil { 361 467 return nil, nil 362 468 } 363 469 ret := &SearchResult{ 364 - Total: res.Total, 365 - Hits: make([]int64, len(res.Hits)), 470 + Total: res.Total, 471 + Duration: res.Took, 472 + Hits: make([]int64, len(res.Hits)), 366 473 } 367 474 for i, hit := range res.Hits { 368 475 id, err := base36.Decode(hit.ID)