Monorepo for Tangled
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

appview/indexer/repos: boost popular repos in search

repos with more stars go higher up in the search results.

the final result order is a bit magic right now, generally speaking:
forks go lower, more stars go higher, for now.

some more areas of improvement:

- boosting for prefix matches
- boosting for exact matches
- boosting for matches in repo name over repo desc/topic/website etc.

Signed-off-by: oppiliappan <me@oppi.li>

+135 -2
+6
appview/indexer/bleve/query.go
··· 31 31 q.FieldVal = field 32 32 return q 33 33 } 34 + 35 + func NumericRangeQuery(field string, min, max *float64) query.Query { 36 + q := bleve.NewNumericRangeQuery(min, max) 37 + q.FieldVal = field 38 + return q 39 + }
+33 -2
appview/indexer/repos/indexer.go
··· 414 414 indexerQuery.AddMustNot(mustNots...) 415 415 416 416 // use a disjunction where: 417 - // - Non-forks get normal relevance score 418 - // - Forks match but get penalized with lower boost 417 + // - repos with more stars get higher boost 418 + // - non-forks get a boost 419 + // - boosts stack 419 420 finalQuery := bleve.NewDisjunctionQuery() 420 421 421 422 // add the main query ··· 429 430 notForkQuery.AddMustNot(isForkQuery) 430 431 notForkQuery.SetBoost(2.0) 431 432 finalQuery.AddQuery(notForkQuery) 433 + 434 + // add boosted queries for repos with more stars 435 + // 10-99 stars 436 + tier2Query := bleve.NewBooleanQuery() 437 + tier2Query.AddMust(indexerQuery) 438 + min10 := float64(10) 439 + max99 := float64(99) 440 + starRange2 := bleveutil.NumericRangeQuery("star_count", &min10, &max99) 441 + tier2Query.AddMust(starRange2) 442 + tier2Query.SetBoost(1.5) 443 + finalQuery.AddQuery(tier2Query) 444 + 445 + // 100-999 stars 446 + tier3Query := bleve.NewBooleanQuery() 447 + tier3Query.AddMust(indexerQuery) 448 + min100 := float64(100) 449 + max999 := float64(999) 450 + starRange3 := bleveutil.NumericRangeQuery("star_count", &min100, &max999) 451 + tier3Query.AddMust(starRange3) 452 + tier3Query.SetBoost(2.5) 453 + finalQuery.AddQuery(tier3Query) 454 + 455 + // 1000+ stars 456 + tier4Query := bleve.NewBooleanQuery() 457 + tier4Query.AddMust(indexerQuery) 458 + min1000 := float64(1000) 459 + starRange4 := bleveutil.NumericRangeQuery("star_count", &min1000, nil) 460 + tier4Query.AddMust(starRange4) 461 + tier4Query.SetBoost(4.0) 462 + finalQuery.AddQuery(tier4Query) 432 463 433 464 // use minimum of 1 to ensure all results match at least one clause 434 465 finalQuery.SetMin(1)
+96
appview/indexer/repos/indexer_test.go
··· 637 637 assert.Equal(t, uint64(1), result.Total) 638 638 assert.Contains(t, result.Hits, int64(2)) 639 639 } 640 + 641 + func TestStarCountBoosting(t *testing.T) { 642 + ix, cleanup := setupTestIndexer(t) 643 + defer cleanup() 644 + 645 + ctx := context.Background() 646 + 647 + err := ix.Index(ctx, 648 + models.Repo{ 649 + Id: 1, 650 + Did: "did:plc:alice", 651 + Name: "repo", 652 + Description: "testing", 653 + RepoStats: &models.RepoStats{StarCount: 5000}, 654 + }, 655 + models.Repo{ 656 + Id: 2, 657 + Did: "did:plc:bob", 658 + Name: "repo", 659 + Description: "testing", 660 + RepoStats: &models.RepoStats{StarCount: 150}, 661 + }, 662 + models.Repo{ 663 + Id: 3, 664 + Did: "did:plc:charlie", 665 + Name: "repo", 666 + Description: "testing", 667 + RepoStats: &models.RepoStats{StarCount: 5}, 668 + }, 669 + models.Repo{ 670 + Id: 4, 671 + Did: "did:plc:dana", 672 + Name: "repo", 673 + Description: "testing", 674 + RepoStats: &models.RepoStats{StarCount: 25}, 675 + }, 676 + ) 677 + require.NoError(t, err) 678 + 679 + // search for "testing" - should rank by star count when all else equal 680 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 681 + Keywords: []string{"testing"}, 682 + Page: pagination.Page{Limit: 10}, 683 + }) 684 + require.NoError(t, err) 685 + assert.Equal(t, uint64(4), result.Total) 686 + 687 + // verify that repos with more stars rank higher than those with fewer 688 + popularIdx := -1 689 + smallIdx := -1 690 + for i, hit := range result.Hits { 691 + if hit == 1 { // 5000 stars 692 + popularIdx = i 693 + } 694 + if hit == 3 { // 5 stars 695 + smallIdx = i 696 + } 697 + } 698 + assert.True(t, popularIdx < smallIdx, "repo with 5000 stars should rank above repo with 5 stars") 699 + } 700 + 701 + func TestStarBoostingWithForkPenalty(t *testing.T) { 702 + ix, cleanup := setupTestIndexer(t) 703 + defer cleanup() 704 + 705 + ctx := context.Background() 706 + 707 + err := ix.Index(ctx, 708 + models.Repo{ 709 + Id: 1, 710 + Did: "did:plc:alice", 711 + Name: "original-popular", 712 + Description: "test project", 713 + Source: "", 714 + RepoStats: &models.RepoStats{StarCount: 100}, 715 + }, 716 + models.Repo{ 717 + Id: 2, 718 + Did: "did:plc:bob", 719 + Name: "fork-very-popular", 720 + Description: "test project", 721 + Source: "did:plc:someone/original", 722 + RepoStats: &models.RepoStats{StarCount: 1000}, 723 + }, 724 + ) 725 + require.NoError(t, err) 726 + 727 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 728 + Keywords: []string{"project"}, 729 + Page: pagination.Page{Limit: 10}, 730 + }) 731 + require.NoError(t, err) 732 + 733 + // fork with 1000 stars (4.0x) vs non-fork with 100 stars (2.0 * 2.5 = 5.0x) 734 + assert.Equal(t, int64(1), result.Hits[0], "non-fork with fewer stars can still rank higher due to combined boost") 735 + }