loading up the forgejo repo on tangled to test page performance
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: include a default robots.txt to reduce the impact of crawlers (#7387)

- Add a strong strict default robots.txt, if one is not provided by the instance administrators.
- Remove code for the legacy public asset path, the error has been logged for a few releases already (existed since v1.21).
- Resolves forgejo/forgejo#923

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/7387
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Reviewed-by: 0ko <0ko@noreply.codeberg.org>
Co-authored-by: Gusted <postmaster@gusted.xyz>
Co-committed-by: Gusted <postmaster@gusted.xyz>

authored by

Gusted
Gusted
and committed by
Gusted
bb4e1f42 51caba69

+87 -11
-3
cmd/web.go
··· 198 198 for fn := range publicFilesSet.Seq() { 199 199 log.Error("Found legacy public asset %q in CustomPath. Please move it to %s/public/assets/%s", fn, setting.CustomPath, fn) 200 200 } 201 - if _, err := os.Stat(filepath.Join(setting.CustomPath, "robots.txt")); err == nil { 202 - log.Error(`Found legacy public asset "robots.txt" in CustomPath. Please move it to %s/public/robots.txt`, setting.CustomPath) 203 - } 204 201 205 202 routers.InitWebInstalled(graceful.GetManager().HammerContext()) 206 203
+87 -8
routers/web/misc/misc.go
··· 33 33 w.WriteHeader(http.StatusOK) 34 34 } 35 35 36 + func StaticRedirect(target string) func(w http.ResponseWriter, req *http.Request) { 37 + return func(w http.ResponseWriter, req *http.Request) { 38 + http.Redirect(w, req, path.Join(setting.StaticURLPrefix, target), http.StatusMovedPermanently) 39 + } 40 + } 41 + 42 + var defaultRobotsTxt = []byte(`# The default Forgejo robots.txt 43 + # For more information: https://forgejo.org/docs/latest/admin/search-engines-indexation/ 44 + 45 + User-agent: * 46 + Disallow: /api/ 47 + Disallow: /avatars/ 48 + Disallow: /user/ 49 + Disallow: /swagger.*.json 50 + Disallow: /explore/*?* 51 + 52 + Disallow: /repo/create 53 + Disallow: /repo/migrate 54 + Disallow: /org/create 55 + Disallow: /*/*/fork 56 + 57 + Disallow: /*/*/watchers 58 + Disallow: /*/*/stargazers 59 + Disallow: /*/*/forks 60 + 61 + Disallow: /*/*/src/ 62 + Disallow: /*/*/blame/ 63 + Disallow: /*/*/commit/ 64 + Disallow: /*/*/commits/ 65 + Disallow: /*/*/raw/ 66 + Disallow: /*/*/media/ 67 + Disallow: /*/*/tags 68 + Disallow: /*/*/graph 69 + Disallow: /*/*/branches 70 + Disallow: /*/*/compare 71 + Disallow: /*/*/lastcommit/ 72 + Disallow: /*/*/rss/branch/ 73 + Disallow: /*/*/atom/branch/ 74 + 75 + Disallow: /*/*/activity 76 + Disallow: /*/*/activity_author_data 77 + 78 + Disallow: /*/*/actions 79 + Disallow: /*/*/projects 80 + Disallow: /*/*/labels 81 + Disallow: /*/*/milestones 82 + 83 + Disallow: /*/*/find/ 84 + Disallow: /*/*/tree-list/ 85 + Disallow: /*/*/search/ 86 + Disallow: /*/-/code 87 + 88 + Disallow: /*/*/issues/new 89 + Disallow: /*/*/pulls/*/files 90 + Disallow: /*/*/pulls/*/commits 91 + 92 + Disallow: /attachments/ 93 + Disallow: /*/*/attachments/ 94 + Disallow: /*/*/issues/*/attachments/ 95 + Disallow: /*/*/pulls/*/attachments/ 96 + Disallow: /*/*/releases/attachments 97 + Disallow: /*/*/releases/download 98 + 99 + Disallow: /*/*/archive/ 100 + Disallow: /*.bundle$ 101 + Disallow: /*.patch$ 102 + Disallow: /*.diff$ 103 + Disallow: /*.atom$ 104 + Disallow: /*.rss$ 105 + 106 + Disallow: /*lang=* 107 + Disallow: /*redirect_to=* 108 + Disallow: /*tab=* 109 + Disallow: /*q=* 110 + Disallow: /*sort=* 111 + Disallow: /*repo-search-archived=* 112 + `) 113 + 36 114 func RobotsTxt(w http.ResponseWriter, req *http.Request) { 115 + httpcache.SetCacheControlInHeader(w.Header(), setting.StaticCacheTime) 116 + w.Header().Set("Content-Type", "text/plain") 117 + 37 118 robotsTxt := util.FilePathJoinAbs(setting.CustomPath, "public/robots.txt") 38 - if ok, _ := util.IsExist(robotsTxt); !ok { 39 - robotsTxt = util.FilePathJoinAbs(setting.CustomPath, "robots.txt") // the legacy "robots.txt" 119 + if ok, _ := util.IsExist(robotsTxt); ok { 120 + http.ServeFile(w, req, robotsTxt) 121 + return 40 122 } 41 - httpcache.SetCacheControlInHeader(w.Header(), setting.StaticCacheTime) 42 - http.ServeFile(w, req, robotsTxt) 43 - } 44 123 45 - func StaticRedirect(target string) func(w http.ResponseWriter, req *http.Request) { 46 - return func(w http.ResponseWriter, req *http.Request) { 47 - http.Redirect(w, req, path.Join(setting.StaticURLPrefix, target), http.StatusMovedPermanently) 124 + _, err := w.Write(defaultRobotsTxt) 125 + if err != nil { 126 + log.Error("failed to write robots.txt: %v", err) 48 127 } 49 128 }