Monorepo for Tangled
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

appview,knotmirror: prefer knotmirror to load blobs

also use octet-stream for blobs larger than 1MB to avoid OOM

Signed-off-by: Seongmin Lee <git@boltless.me>

authored by

Seongmin Lee and committed by
Tangled
8dcd952d a9d2f94d

+262 -89
+2 -1
api/tangled/repoblob.go
··· 30 30 // content: File content (base64 encoded for binary files) 31 31 Content *string `json:"content,omitempty" cborgen:"content,omitempty"` 32 32 // encoding: Content encoding 33 - Encoding *string `json:"encoding,omitempty" cborgen:"encoding,omitempty"` 33 + Encoding *string `json:"encoding,omitempty" cborgen:"encoding,omitempty"` 34 + FileTooLarge *bool `json:"fileTooLarge,omitempty" cborgen:"fileTooLarge,omitempty"` 34 35 // isBinary: Whether the file is binary 35 36 IsBinary *bool `json:"isBinary,omitempty" cborgen:"isBinary,omitempty"` 36 37 LastCommit *RepoBlob_LastCommit `json:"lastCommit,omitempty" cborgen:"lastCommit,omitempty"`
+4
appview/config/config.go
··· 73 73 SharedSecret string `env:"SHARED_SECRET"` 74 74 } 75 75 76 + func (c *CamoConfig) Enabled() bool { 77 + return c.SharedSecret != "" 78 + } 79 + 76 80 type AvatarConfig struct { 77 81 Host string `env:"HOST, default=https://avatar.tangled.sh"` 78 82 SharedSecret string `env:"SHARED_SECRET"`
+1
appview/models/repo.go
··· 144 144 HasTextView bool // can show as code/text 145 145 HasRenderedView bool // can show rendered (markup/image/video/submodule) 146 146 HasRawView bool // can download raw (everything except submodule) 147 + FileTooLarge bool // file too large (ignored for image files) 147 148 148 149 // current display mode 149 150 ShowingRendered bool // currently in rendered mode
+8 -33
appview/pages/markup/markdown.go
··· 25 25 "go.abhg.dev/goldmark/mermaid" 26 26 htmlparse "golang.org/x/net/html" 27 27 28 - "tangled.org/core/api/tangled" 29 28 textension "tangled.org/core/appview/pages/markup/extension" 30 29 "tangled.org/core/appview/pages/repoinfo" 31 30 ) ··· 177 176 switch node.Type { 178 177 case htmlparse.ElementNode: 179 178 switch node.Data { 179 + case "a": 180 + // TODO: transform `./` or `/` links to tree link 180 181 case "img", "source": 181 182 for i, attr := range node.Attr { 182 183 if attr.Key != "src" { ··· 185 186 186 187 camoUrl, _ := url.Parse(ctx.CamoUrl) 187 188 dstUrl, _ := url.Parse(attr.Val) 188 - if dstUrl.Host != camoUrl.Host { 189 - attr.Val = ctx.imageFromKnotTransformer(attr.Val) 189 + if camoUrl != nil && dstUrl != nil && dstUrl.Host != camoUrl.Host { 190 + attr.Val = ctx.imageToRawTransformer(attr.Val) 190 191 attr.Val = ctx.camoImageLinkTransformer(attr.Val) 191 192 node.Attr[i] = attr 192 193 } ··· 224 225 case *ast.Heading: 225 226 a.rctx.anchorHeadingTransformer(n) 226 227 case *ast.Link: 228 + // TODO: run this on HTML transformation instead 227 229 a.rctx.relativeLinkTransformer(n) 228 - case *ast.Image: 229 - a.rctx.imageFromKnotAstTransformer(n) 230 - a.rctx.camoImageLinkAstTransformer(n) 231 230 } 232 231 case RendererTypeDefault: 233 232 switch n := n.(type) { 234 233 case *ast.Heading: 235 234 a.rctx.anchorHeadingTransformer(n) 236 - case *ast.Image: 237 - a.rctx.imageFromKnotAstTransformer(n) 238 - a.rctx.camoImageLinkAstTransformer(n) 239 235 } 240 236 } 241 237 ··· 257 253 link.Destination = []byte(newPath) 258 254 } 259 255 260 - func (rctx *RenderContext) imageFromKnotTransformer(dst string) string { 256 + func (rctx *RenderContext) imageToRawTransformer(dst string) string { 261 257 if isAbsoluteUrl(dst) { 262 258 return dst 263 - } 264 - 265 - scheme := "https" 266 - if rctx.IsDev { 267 - scheme = "http" 268 259 } 269 260 270 261 actualPath := rctx.actualPath(dst) 271 262 272 - repoName := fmt.Sprintf("%s/%s", rctx.RepoInfo.OwnerDid, rctx.RepoInfo.Name) 273 - 274 - query := fmt.Sprintf("repo=%s&ref=%s&path=%s&raw=true", 275 - url.QueryEscape(repoName), url.QueryEscape(rctx.RepoInfo.Ref), actualPath) 276 - 277 - parsedURL := &url.URL{ 278 - Scheme: scheme, 279 - Host: rctx.Knot, 280 - Path: path.Join("/xrpc", tangled.RepoBlobNSID), 281 - RawQuery: query, 282 - } 283 - newPath := parsedURL.String() 284 - return newPath 285 - } 286 - 287 - func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) { 288 - dst := string(img.Destination) 289 - img.Destination = []byte(rctx.imageFromKnotTransformer(dst)) 263 + newDest := path.Join("/", rctx.RepoInfo.FullName(), "raw", rctx.RepoInfo.Ref, actualPath) 264 + return newDest 290 265 } 291 266 292 267 func (rctx *RenderContext) anchorHeadingTransformer(h *ast.Heading) {
+4
appview/pages/templates/repo/blob.html
··· 107 107 <div id="blob-contents" class="whitespace-pre peer-target:bg-yellow-200 dark:peer-target:bg-yellow-900">{{ code .BlobView.Contents .Path | escapeHtml }}</div> 108 108 {{ end }} 109 109 </div> 110 + {{ else if .BlobView.FileTooLarge }} 111 + <p class="text-center text-gray-400 dark:text-gray-500"> 112 + This file is too large to render. <a href="/{{ .RepoInfo.FullName }}/raw/{{ pathEscape .Ref }}/{{ .Path }}">View raw.</a>. 113 + </p> 110 114 {{ else if .BlobView.ContentType.IsMarkup }} 111 115 <div class="overflow-auto relative {{ $wrapContentClasses }}"> 112 116 {{ if .BlobView.ShowingRendered }}
+17 -52
appview/repo/blob.go
··· 51 51 filePath := chi.URLParam(r, "*") 52 52 filePath, _ = url.PathUnescape(filePath) 53 53 54 - scheme := "http" 55 - if !rp.config.Core.Dev { 56 - scheme = "https" 57 - } 58 - host := fmt.Sprintf("%s://%s", scheme, f.Knot) 59 - xrpcc := &indigoxrpc.Client{ 60 - Host: host, 61 - } 62 - resp, err := tangled.RepoBlob(r.Context(), xrpcc, filePath, false, ref, f.RepoIdentifier()) 54 + xrpcc := &indigoxrpc.Client{Host: rp.config.KnotMirror.Url} 55 + resp, err := tangled.RepoBlob(r.Context(), xrpcc, filePath, false, ref, f.RepoAt().String()) 63 56 if xrpcerr := xrpcclient.HandleXrpcErr(err); xrpcerr != nil { 64 57 l.Error("failed to call XRPC repo.blob", "xrpcerr", xrpcerr, "err", err) 65 58 rp.pages.Error503(w) ··· 135 128 filePath := chi.URLParam(r, "*") 136 129 filePath, _ = url.PathUnescape(filePath) 137 130 138 - scheme := "http" 139 - if !rp.config.Core.Dev { 140 - scheme = "https" 141 - } 142 - repo := f.RepoIdentifier() 143 - baseURL := &url.URL{ 144 - Scheme: scheme, 145 - Host: f.Knot, 146 - Path: "/xrpc/sh.tangled.repo.blob", 147 - } 148 - query := baseURL.Query() 149 - query.Set("repo", repo) 150 - query.Set("ref", ref) 151 - query.Set("path", filePath) 152 - query.Set("raw", "true") 153 - baseURL.RawQuery = query.Encode() 154 - blobURL := baseURL.String() 131 + blobURL := generateBlobURL(rp.config, f, ref, filePath) 132 + 155 133 req, err := http.NewRequest("GET", blobURL, nil) 156 134 if err != nil { 157 135 l.Error("failed to create request", "err", err) ··· 187 165 } 188 166 189 167 contentType := resp.Header.Get("Content-Type") 190 - body, err := io.ReadAll(resp.Body) 191 - if err != nil { 192 - l.Error("error reading response body from knotserver", "err", err) 193 - w.WriteHeader(http.StatusInternalServerError) 194 - return 195 - } 196 168 197 169 // Normalize to bare media type before classification; strips parameters 198 170 // (e.g. "; charset=utf-8") and prevents bypass attempts like ··· 208 180 // Serve all textual content as plain text so the browser never 209 181 // interprets knot-supplied markup or scripts. 210 182 w.Header().Set("Content-Type", "text/plain; charset=utf-8") 211 - w.Write(body) 212 - case safeBinaryMIMEType(mediaType): 183 + case safeBinaryMIMEType(mediaType) || contentType == "application/octet-stream": 213 184 // Use the normalized type, never the raw knot-supplied string. 214 185 w.Header().Set("Content-Type", mediaType) 215 - w.Write(body) 216 186 default: 217 187 w.WriteHeader(http.StatusUnsupportedMediaType) 218 188 w.Write([]byte("unsupported content type")) 189 + return 190 + } 191 + if _, err := io.Copy(w, resp.Body); err != nil { 192 + l.Error("error streaming knotmirror response", "err", err) 193 + w.WriteHeader(http.StatusInternalServerError) 194 + return 219 195 } 220 196 } 221 197 ··· 241 217 } 242 218 243 219 // Determine if binary 244 - if resp.IsBinary != nil && *resp.IsBinary { 220 + if (resp.IsBinary != nil && *resp.IsBinary) || (resp.FileTooLarge != nil && *resp.FileTooLarge) { 245 221 view.ContentSrc = generateBlobURL(config, repo, ref, filePath) 246 222 ext := strings.ToLower(filepath.Ext(resp.Path)) 247 223 ··· 295 271 } 296 272 297 273 func generateBlobURL(config *config.Config, repo *models.Repo, ref, filePath string) string { 298 - scheme := "http" 299 - if !config.Core.Dev { 300 - scheme = "https" 301 - } 302 - 303 - repoName := repo.RepoIdentifier() 304 - baseURL := &url.URL{ 305 - Scheme: scheme, 306 - Host: repo.Knot, 307 - Path: "/xrpc/sh.tangled.repo.blob", 308 - } 309 - query := baseURL.Query() 310 - query.Set("repo", repoName) 274 + query := url.Values{} 275 + query.Set("repo", string(repo.RepoAt())) 311 276 query.Set("ref", ref) 312 277 query.Set("path", filePath) 313 278 query.Set("raw", "true") 314 - baseURL.RawQuery = query.Encode() 315 - blobURL := baseURL.String() 316 279 317 - if !config.Core.Dev { 280 + blobURL := fmt.Sprintf("%s/xrpc/%s?%s", config.KnotMirror.Url, tangled.GitTempGetBlobNSID, query.Encode()) 281 + 282 + if config.Camo.Enabled() { 318 283 return markup.GenerateCamoURL(config.Camo.Host, config.Camo.SharedSecret, blobURL) 319 284 } 320 285 return blobURL
+52 -3
knotmirror/xrpc/git_get_blob.go
··· 2 2 3 3 import ( 4 4 "context" 5 + "crypto/sha256" 5 6 "fmt" 6 7 "io" 7 8 "net/http" 9 + "path/filepath" 8 10 "slices" 11 + "strings" 9 12 10 13 "github.com/bluesky-social/indigo/atproto/atclient" 11 14 "github.com/bluesky-social/indigo/atproto/syntax" ··· 51 54 } 52 55 defer reader.Close() 53 56 54 - w.Header().Set("Content-Type", "application/octet-stream") 55 - if _, err := io.Copy(w, reader); err != nil { 56 - l.Error("failed to serve the blob", "err", err) 57 + // default to octet-stream for large blobs 58 + if file.Size > 1000*1000 { // 1MB 59 + w.Header().Set("Content-Type", "application/octet-stream") 60 + if _, err := io.Copy(w, reader); err != nil { 61 + l.Error("failed to serve the blob", "err", err) 62 + } 63 + return 64 + } 65 + 66 + contents, err := io.ReadAll(reader) 67 + if err != nil { 68 + l.Error("failed to read blob content", "err", err) 69 + writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to read the blob"}) 70 + return 71 + } 72 + 73 + mimeType := http.DetectContentType(contents) 74 + // override MIME types for formats that http.DetectContentType does not recognize 75 + switch filepath.Ext(path) { 76 + case ".svg": 77 + mimeType = "image/svg+xml" 78 + case ".avif": 79 + mimeType = "image/avif" 80 + case ".jxl": 81 + mimeType = "image/jxl" 82 + case ".heic", ".heif": 83 + mimeType = "image/heif" 84 + } 85 + 86 + switch { 87 + case strings.HasPrefix(mimeType, "image/"), strings.HasPrefix(mimeType, "video/"): 88 + eTag := fmt.Sprintf("\"%x\"", sha256.Sum256(contents)) 89 + if clientETag := r.Header.Get("If-None-Match"); clientETag == eTag { 90 + w.WriteHeader(http.StatusNotModified) 91 + return 92 + } 93 + w.Header().Set("ETag", eTag) 94 + w.Header().Set("Content-Type", mimeType) 95 + 96 + case strings.HasPrefix(mimeType, "text/") || isTextualMimeType(mimeType): 97 + w.Header().Set("Cache-Control", "public, no-cache") 98 + // seve all text content as text/plain 99 + w.Header().Set("Content-Type", "text/plain; charset=utf-8") 100 + 101 + default: 102 + l.Error("attempted to serve disallowed file type", "mimetype", mimeType) 103 + writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InvalidRequest", Message: "only image, video, and text files can be accessed directly"}) 104 + return 57 105 } 106 + w.Write(contents) 58 107 } 59 108 60 109 func (x *Xrpc) getFile(ctx context.Context, repo syntax.ATURI, ref, path string) (*object.File, error) {
+170
knotmirror/xrpc/repo_blob.go
··· 1 + package xrpc 2 + 3 + import ( 4 + "context" 5 + "encoding/base64" 6 + "fmt" 7 + "io" 8 + "net/http" 9 + "path/filepath" 10 + "strings" 11 + "time" 12 + 13 + "github.com/bluesky-social/indigo/atproto/atclient" 14 + "github.com/bluesky-social/indigo/atproto/syntax" 15 + "tangled.org/core/api/tangled" 16 + "tangled.org/core/knotserver/git" 17 + ) 18 + 19 + // TODO(boltless): rewrite lexicon in new NSID 20 + func (x *Xrpc) RepoBlob(w http.ResponseWriter, r *http.Request) { 21 + var ( 22 + repoQuery = r.URL.Query().Get("repo") 23 + ref = r.URL.Query().Get("ref") // ref can be empty (git.Open handles this) 24 + path = r.URL.Query().Get("path") 25 + ) 26 + 27 + repo, err := syntax.ParseATURI(repoQuery) 28 + if err != nil || repo.RecordKey() == "" { 29 + writeJson(w, http.StatusBadRequest, atclient.ErrorBody{Name: "BadRequest", Message: fmt.Sprintf("repo parameter invalid: %s", repoQuery)}) 30 + return 31 + } 32 + 33 + l := x.logger.With("repo", repo, "ref", ref, "path", path) 34 + 35 + if path == "" { 36 + writeJson(w, http.StatusBadRequest, atclient.ErrorBody{Name: "BadRequest", Message: "missing path parameter"}) 37 + return 38 + } 39 + 40 + gr, err := x.getRepo(r.Context(), repo, ref) 41 + if err != nil { 42 + l.Warn("local mirror failed, trying proxy", "err", err) 43 + if x.proxyToKnot(w, r, repo) { 44 + return 45 + } 46 + writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to get blob"}) 47 + return 48 + } 49 + 50 + // first check if this path is a submodule 51 + submodule, err := gr.Submodule(path) 52 + if err != nil { 53 + // this is okay, continue and try to treat it as a regular file 54 + } else { 55 + writeJson(w, http.StatusOK, tangled.RepoBlob_Output{ 56 + Ref: ref, 57 + Path: path, 58 + Submodule: &tangled.RepoBlob_Submodule{ 59 + Name: submodule.Name, 60 + Url: submodule.URL, 61 + Branch: &submodule.Branch, 62 + }, 63 + }) 64 + return 65 + } 66 + 67 + file, err := x.getFile(r.Context(), repo, ref, path) 68 + if err != nil { 69 + l.Warn("local mirror failed, trying proxy", "err", err) 70 + if x.proxyToKnot(w, r, repo) { 71 + return 72 + } 73 + writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to get blob"}) 74 + return 75 + } 76 + 77 + if file.Size > 1000*1000 { // 1MB 78 + fileTooLarge := true 79 + writeJson(w, http.StatusOK, tangled.RepoBlob_Output{ 80 + Ref: ref, 81 + Path: path, 82 + Size: &file.Size, 83 + FileTooLarge: &fileTooLarge, 84 + }) 85 + return 86 + } 87 + 88 + reader, err := file.Reader() 89 + if err != nil { 90 + l.Error("failed to read blob", "err", err) 91 + writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to read the blob"}) 92 + return 93 + } 94 + contents, err := io.ReadAll(reader) 95 + if err != nil { 96 + l.Error("failed to read blob content", "err", err) 97 + writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to read the blob"}) 98 + return 99 + } 100 + 101 + mimeType := http.DetectContentType(contents) 102 + // override MIME types for formats that http.DetectContentType does not recognize 103 + switch filepath.Ext(path) { 104 + case ".svg": 105 + mimeType = "image/svg+xml" 106 + case ".avif": 107 + mimeType = "image/avif" 108 + case ".jxl": 109 + mimeType = "image/jxl" 110 + case ".heic", ".heif": 111 + mimeType = "image/heif" 112 + } 113 + 114 + isBinary := !(strings.HasPrefix(mimeType, "text/") || isTextualMimeType(mimeType)) 115 + 116 + // include content for text blob or svg 117 + var content *string 118 + if !isBinary { 119 + content = new(string) 120 + *content = string(contents) 121 + } else if filepath.Ext(path) == ".svg" { 122 + content = new(string) 123 + *content = base64.StdEncoding.EncodeToString(contents) 124 + } 125 + 126 + response := tangled.RepoBlob_Output{ 127 + Ref: ref, 128 + Path: path, 129 + Size: &file.Size, 130 + IsBinary: &isBinary, 131 + Content: content, 132 + } 133 + 134 + ctx, cancel := context.WithTimeout(r.Context(), 2*time.Second) 135 + defer cancel() 136 + 137 + lastCommit, err := gr.LastCommitFile(ctx, path) 138 + if err == nil && lastCommit != nil { 139 + response.LastCommit = &tangled.RepoBlob_LastCommit{ 140 + Hash: lastCommit.Hash.String(), 141 + Message: lastCommit.Message, 142 + When: lastCommit.When.Format(time.RFC3339), 143 + } 144 + 145 + // try to get author information 146 + commit, err := gr.Commit(lastCommit.Hash) 147 + if err == nil { 148 + response.LastCommit.Author = &tangled.RepoBlob_Signature{ 149 + Name: commit.Author.Name, 150 + Email: commit.Author.Email, 151 + } 152 + } 153 + } 154 + 155 + writeJson(w, http.StatusOK, response) 156 + } 157 + 158 + func (x *Xrpc) getRepo(ctx context.Context, repo syntax.ATURI, ref string) (*git.GitRepo, error) { 159 + repoPath, err := x.makeRepoPath(ctx, repo) 160 + if err != nil { 161 + return nil, fmt.Errorf("resolving repo at-uri: %w", err) 162 + } 163 + 164 + gr, err := git.Open(repoPath, ref) 165 + if err != nil { 166 + return nil, fmt.Errorf("opening git repo: %w", err) 167 + } 168 + 169 + return gr, nil 170 + }
+1
knotmirror/xrpc/xrpc.go
··· 55 55 r.Get("/"+tangled.GitTempListCommitsNSID, x.ListCommits) 56 56 r.Get("/"+tangled.GitTempListLanguagesNSID, x.ListLanguages) 57 57 r.Get("/"+tangled.GitTempListTagsNSID, x.ListTags) 58 + r.Get("/"+tangled.RepoBlobNSID, x.RepoBlob) 58 59 r.Post("/"+tangled.SyncRequestCrawlNSID, x.RequestCrawl) 59 60 60 61 return r
+3
lexicons/repo/blob.json
··· 80 80 "lastCommit": { 81 81 "type": "ref", 82 82 "ref": "#lastCommit" 83 + }, 84 + "fileTooLarge": { 85 + "type": "boolean" 83 86 } 84 87 } 85 88 }