a fork of iceshrimp.net but a tweaked frontend to my personal liking. waow
fediverse social-media social iceshrimp fedi
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

[backend/parsing] Drastically improve MFM parser performance

This commit improves MFM parser performance by up to 22x, depending on input and platform.

+70 -50
+70 -50
Iceshrimp.Parsing/Mfm.fs
··· 197 197 } 198 198 199 199 // Patterns 200 - let italicPattern = ((notFollowedBy <| str "**") >>. skipChar '*') 201 - let italicPatternAlt = ((notFollowedBy <| str "__") >>. skipChar '_') 202 - let codePattern = (notFollowedBy <| str "```") >>. skipChar '`' 200 + let italicPatternAsterisk = notFollowedByString "**" >>. skipChar '*' 201 + let italicPatternUnderscore = notFollowedByString "__" >>. skipChar '_' 202 + let codePattern = notFollowedByString "```" >>. skipChar '`' 203 203 204 204 // Matchers 205 205 let hashtagMatcher = letter <|> digit <|> anyOf "-_" 206 206 let hashtagSatisfier = attempt hashtagMatcher 207 207 208 208 // Node parsers 209 - 210 - let italicNode1 = 209 + let italicAsteriskNode = 211 210 previousCharSatisfiesNot isNotWhitespace 212 - >>. italicPattern 211 + >>. italicPatternAsterisk 213 212 >>. pushLine 214 - >>. manyTill inlineNode italicPattern 213 + >>. manyTill inlineNode italicPatternAsterisk 215 214 .>> assertLine 215 + |>> fun c -> MfmItalicNode(aggregateTextInline c) :> MfmNode 216 216 217 - let italicNode2 = 217 + let italicUnderscoreNode = 218 218 previousCharSatisfiesNot isNotWhitespace 219 - >>. italicPatternAlt 219 + >>. italicPatternUnderscore 220 220 >>. pushLine 221 - >>. manyTill inlineNode italicPatternAlt 221 + >>. manyTill inlineNode italicPatternUnderscore 222 222 .>> assertLine 223 + |>> fun c -> MfmItalicNode(aggregateTextInline c) :> MfmNode 223 224 224 - let italicNode3 = 225 - skipString "<i>" >>. pushLine >>. manyTill inlineNode (skipString "</i>") 225 + let italicTagNode = 226 + skipString "<i>" >>. manyTill inlineNode (skipString "</i>") 227 + |>> fun c -> MfmItalicNode(aggregateTextInline c) :> MfmNode 228 + 229 + let boldAsteriskNode = 230 + previousCharSatisfiesNot isNotWhitespace 231 + >>. skipString "**" 232 + >>. pushLine 233 + >>. manyTill inlineNode (skipString "**") 226 234 .>> assertLine 235 + |>> fun c -> MfmBoldNode(aggregateTextInline c) :> MfmNode 227 236 228 - let italicNode = 229 - italicNode1 <|> italicNode2 <|> italicNode3 230 - |>> fun c -> MfmItalicNode(aggregateTextInline c) :> MfmNode 237 + let boldUnderscoreNode = 238 + previousCharSatisfiesNot isNotWhitespace 239 + >>. skipString "__" 240 + >>. pushLine 241 + >>. manyTill inlineNode (skipString "__") 242 + .>> assertLine 243 + |>> fun c -> MfmBoldNode(aggregateTextInline c) :> MfmNode 231 244 232 - let boldNode = 233 - (skipString "**" >>. pushLine >>. manyTill inlineNode (skipString "**") 234 - .>> assertLine) 235 - <|> (skipString "__" >>. pushLine >>. manyTill inlineNode (skipString "__") 236 - .>> assertLine) 237 - <|> (skipString "<b>" >>. pushLine >>. manyTill inlineNode (skipString "</b>") 238 - .>> assertLine) 245 + let boldTagNode = 246 + skipString "<b>" >>. manyTill inlineNode (skipString "</b>") 239 247 |>> fun c -> MfmBoldNode(aggregateTextInline c) :> MfmNode 240 248 241 249 let strikeNode = ··· 250 258 let codeBlockNode = 251 259 opt skipNewline 252 260 >>. opt skipNewline 261 + >>. followedByString "```" 253 262 >>. previousCharSatisfiesNot isNotNewline 254 263 >>. skipString "```" 255 264 >>. opt (many1CharsTill asciiLetter (lookAhead newline)) ··· 317 326 >>. many1CharsTill hashtagMatcher (notFollowedBy hashtagSatisfier) 318 327 |>> fun h -> MfmHashtagNode(h) :> MfmNode 319 328 320 - let urlNodePlain = 329 + let urlNode = 321 330 lookAhead (skipString "https://" <|> skipString "http://") 322 331 >>. manyCharsTill anyChar (nextCharSatisfies isWhitespace <|> nextCharSatisfies (isAnyOf "()") <|> eof) //FIXME: this needs significant improvements 323 332 >>= fun uri -> ··· 341 350 | "https" -> preturn (MfmUrlNode(uri, true) :> MfmNode) 342 351 | _ -> fail "invalid scheme" 343 352 | _ -> fail "invalid url" 344 - 345 - let urlNode = urlNodePlain <|> urlNodeBrackets 346 353 347 354 let linkNode = 348 355 (opt (pchar '?')) ··· 380 387 381 388 let charNode = anyChar |>> fun v -> MfmCharNode(v) :> MfmNode 382 389 383 - // Node collection 384 - let inlineNodeSeq = 385 - [ plainNode 386 - smallNode 387 - italicNode 388 - boldNode 389 - strikeNode 390 - hashtagNode 391 - mentionNode 392 - codeNode 393 - urlNode 394 - linkNode 395 - mathNode 396 - emojiCodeNode 397 - fnNode 398 - charNode ] 399 - 400 - let simpleNodeSeq = [ plainNode; emojiCodeNode; charNode ] 390 + // Custom parser for higher throughput 391 + type ParseMode = 392 + | Full 393 + | Inline 394 + | Simple 401 395 402 - let blockNodeSeq = [ centerNode; codeBlockNode; mathBlockNode; quoteNode ] 396 + let parseNode (m: ParseMode) = 397 + let prefixedNode (m: ParseMode) : Parser<MfmNode, int64> = 398 + fun (stream: CharStream<_>) -> 399 + match (stream.Peek(), m) with 400 + // Block nodes, ordered by expected frequency 401 + | '`', Full -> codeBlockNode <|> codeNode 402 + | '\n', Full when stream.Match("\n```") -> codeBlockNode 403 + | '\n', Full when stream.Match("\n\n```") -> codeBlockNode 404 + | '>', Full -> quoteNode 405 + | '<', Full when stream.Match "<center>" -> centerNode 406 + | '\\', Full when stream.Match "\\[" -> mathBlockNode 407 + // Inline nodes, ordered by expected frequency 408 + | '*', (Full | Inline) -> italicAsteriskNode <|> boldAsteriskNode 409 + | '_', (Full | Inline) -> italicUnderscoreNode <|> boldUnderscoreNode 410 + | '@', (Full | Inline) -> mentionNode 411 + | '#', (Full | Inline) -> hashtagNode 412 + | '`', Inline -> codeNode 413 + | 'h', (Full | Inline) when stream.Match "http" -> urlNode 414 + | ':', (Full | Inline | Simple) -> emojiCodeNode 415 + | '~', (Full | Inline) when stream.Match "~~" -> strikeNode 416 + | '[', (Full | Inline) -> linkNode 417 + | '<', (Full | Inline) -> choice [ plainNode; smallNode; italicTagNode; boldTagNode; urlNodeBrackets ] 418 + | '<', Simple when stream.Match "<plain>" -> plainNode 419 + | '\\', (Full | Inline) when stream.Match "\\(" -> mathNode 420 + | '$', (Full | Inline) when stream.Match "$[" -> fnNode 421 + | '?', (Full | Inline) when stream.Match "[" -> linkNode 422 + // Fallback to char node 423 + | _ -> charNode 424 + <| stream 403 425 404 - let nodeSeq = [ blockNodeSeq; inlineNodeSeq ] 426 + attempt <| prefixedNode m <|> charNode 405 427 406 428 // Populate references 407 - do nodeRef.Value <- choice <| seqAttempt (seqFlatten <| nodeSeq) 408 - 409 - do inlineNodeRef.Value <- choice <| (seqAttempt inlineNodeSeq) |>> fun v -> v :?> MfmInlineNode 410 - 411 - do simpleRef.Value <- choice <| seqAttempt simpleNodeSeq 429 + do nodeRef.Value <- parseNode Full 430 + do inlineNodeRef.Value <- parseNode Inline |>> fun v -> v :?> MfmInlineNode 431 + do simpleRef.Value <- parseNode Simple 412 432 413 433 // Final parse command 414 434 let parse = spaces >>. manyTill node eof .>> spaces