Transpiler for HTML-in-PowerShell, PSX (like JSX)
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

tokenizer mostly finished, all tests passing

+522 -18
+271 -5
Compiler.ps1
··· 98 98 EQUAL # = 99 99 OPEN_ELEMENT_END # </ 100 100 101 - PS_BRACE_START # { 102 - PS_BRACE_END # } 101 + TAG_NAME # [a-zA-Z0-9_-\.]+ 102 + ATTR_NAME # [a-zA-Z0-9_-\.]+ 103 + ATTR_VALUE # plain identifier 104 + ATTR_VALUE_SCRIPTBLOCK # { ... } 105 + 106 + EOF 107 + 108 + ILLEGAL # idk 109 + } 110 + 111 + enum ElementState { 112 + UNOPENED = 0 # before < 113 + OPENED = 1 # < 114 + AFTER_TAG = 2 # <div 115 + AFTER_ATTRIBUTE_NAME = 3 # <div attr 116 + AFTER_ATTRIBUTE_EQUALS = 4 # <div attr= 117 + AFTER_ATTRIBUTE_VALUE = 5 # <div attr=val, <div 118 + CLOSED = 6 # <div ...> 119 + CLOSED_VOID = 7 # <div .../> 103 120 104 - IDENTIFIER # [a-zA-Z0-9_-\.]+ 121 + END_OPENED = 8 # <div ...></ 122 + END_AFTER_TAG = 9 # <div ...></div 123 + END_CLOSED = 10 # <div ...></div>, <div .../> 105 124 } 106 125 107 126 class Token { ··· 123 142 } 124 143 125 144 class Lexer { 126 - $LexInput 145 + [String] $LexInput 146 + [Int] $Pos 147 + [Int] $NextPos 148 + [String] $Char 149 + 150 + [String] $CurrentTag = '' 151 + [ElementState] $State = [ElementState]::UNOPENED 127 152 128 153 Lexer($LexInput) { 129 154 $this.LexInput = $LexInput 155 + $this.Pos = 0 156 + $this.NextPos = 1 157 + $this.Char = [String]($LexInput[0]) 158 + } 159 + 160 + hidden [Void] AssertElementState([ElementState] $Expected) { 161 + if ($Expected -ne $this.State) { 162 + throw "Bad lexer state: expected $Expected, got $($this.State)" 163 + } 164 + } 165 + 166 + hidden [String] PopChar() { 167 + if ($this.NextPos -ge $this.LexInput.Length) { 168 + $this.Char = '' 169 + } else { 170 + $this.Char = $this.LexInput[$this.NextPos] 171 + } 172 + 173 + $this.Pos = $this.NextPos 174 + $this.NextPos++ 175 + 176 + return $this.Char 177 + } 178 + 179 + hidden [String] PeekChar() { 180 + if ($this.NextPos -ge $this.LexInput.Length) { 181 + return '' 182 + } else { 183 + return $this.LexInput[$this.NextPos] 184 + } 185 + } 186 + 187 + hidden [Boolean] CharIsIdentifier() { 188 + return $this.Char -match '[a-zA-Z0-9_-]' 189 + } 190 + 191 + hidden [String] PopIdentifier() { 192 + $Start = $this.Pos 193 + 194 + while ($this.CharIsIdentifier()) { 195 + $this.PopChar() 196 + } 197 + # js string.substring is (indexStart, indexEnd) 198 + # dotnet String.SubString is (startIndex, length) 199 + return $this.LexInput.SubString($Start, $this.Pos-$Start) 200 + } 201 + 202 + hidden [String] PeekIdentifier() { 203 + $Start = $this.Pos 204 + $OldNext = $this.NextPos 205 + $OldChar = $this.Char 206 + 207 + while ($this.CharIsIdentifier()) { 208 + $this.PopChar() 209 + } 210 + 211 + $ret = $this.LexInput.SubString($Start, $this.Pos-$Start) 212 + 213 + # there's a better way to do this but that is for later 214 + $this.Pos = $Start 215 + $this.NextPos = $OldNext 216 + $this.Char = $OldChar 217 + 218 + return $ret 219 + } 220 + 221 + hidden [String] PopQuotedAttrValue() { 222 + if ($this.Char -notin '"',"'",'{') { 223 + $this.Die($this.Char) 224 + } 225 + $ExpectingQuote = $this.Char -eq '{' ? '}' : $this.Char 226 + $this.PopChar() # first " 227 + $Start = $this.Pos 228 + 229 + while ($this.PopChar() -ne $ExpectingQuote) {} 230 + 231 + $End = $this.Pos 232 + $this.PopChar() # final " 233 + 234 + return $this.LexInput.SubString($Start, $End - $Start) 235 + } 236 + 237 + hidden [Void] ConsumeWhitespace() { 238 + while ($this.Char -match '\s') { 239 + $this.PopChar() 240 + } 241 + } 242 + 243 + hidden [Void] Die([String] $Char) { 244 + throw "Unexpected token $char (char $($this.Pos)) at state $($this.State)" 130 245 } 131 246 132 247 [Token] NextToken() { 133 - return [Token]::new([TokenType]::OPEN_ELEMENT_START, $this.LexInput) 248 + $Token = $null 249 + 250 + if ($this.Pos -eq $this.LexInput.Length) { 251 + return [Token]::new( [TokenType]::EOF, '' ) 252 + } 253 + 254 + switch -Regex ($this.Char) { 255 + '<' { 256 + switch ($this.State) { 257 + {$_ -eq [ElementState]::UNOPENED} { 258 + $Token = [Token]::new( [TokenType]::OPEN_ELEMENT_START, $this.Char ) 259 + $this.State = [ElementState]::OPENED 260 + } 261 + {$_ -eq [ElementState]::OPENED} { 262 + # $this.State = [ElementState]:: 263 + } 264 + {$_ -eq [ElementState]::CLOSED} { 265 + if ($this.PeekChar() -eq '/') { 266 + $Token = [Token]::new( [TokenType]::OPEN_ELEMENT_END, $this.Char + $this.PopChar() ) 267 + $this.State = [ElementState]::END_OPENED 268 + } 269 + } 270 + default { 271 + $this.Die('<') 272 + } 273 + } 274 + } 275 + 276 + '/' { 277 + switch ($this.State) { 278 + {$_ -in 279 + [ElementState]::AFTER_TAG, 280 + [ElementState]::AFTER_ATTRIBUTE_NAME, 281 + [ElementState]::AFTER_ATTRIBUTE_VALUE 282 + } { 283 + if ($this.PeekChar() -eq '>') { 284 + $Token = [Token]::new( [TokenType]::VOID_ELEMENT_CLOSE, $this.Char + $this.PopChar() ) 285 + $this.State = [ElementState]::CLOSED_VOID 286 + } 287 + } 288 + default { 289 + $this.Die('/') 290 + } 291 + } 292 + } 293 + 294 + '>' { 295 + # check for nested psx here!!! 296 + switch ($this.State) { 297 + {$_ -in 298 + [ElementState]::AFTER_TAG, 299 + [ElementState]::AFTER_ATTRIBUTE_NAME, 300 + [ElementState]::AFTER_ATTRIBUTE_VALUE 301 + } { 302 + $Token = [Token]::new( [TokenType]::CLOSE_ELEMENT, $this.Char ) 303 + $this.State = [ElementState]::CLOSED 304 + } 305 + {$_ -eq [ElementState]::END_AFTER_TAG} { 306 + $Token = [Token]::new( [TokenType]::CLOSE_ELEMENT, $this.Char ) 307 + $this.State = [ElementState]::END_CLOSED 308 + } 309 + default { 310 + $this.Die('>') 311 + } 312 + } 313 + 314 + $Token = [Token]::new([TokenType]::CLOSE_ELEMENT, $this.Char) 315 + } 316 + 317 + {$_ -in '"', "'"} { 318 + # TODO: check for nested PowerShell 319 + if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_EQUALS) { 320 + $this.Die($_) 321 + } 322 + 323 + $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE 324 + return [Token]::new( [TokenType]::ATTR_VALUE , $this.PopQuotedAttrValue() ) 325 + } 326 + 327 + '{' { 328 + if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_EQUALS) { 329 + $this.Die($_) 330 + } 331 + 332 + $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE 333 + return [Token]::new( [TokenType]::ATTR_VALUE_SCRIPTBLOCK , $this.PopQuotedAttrValue() ) 334 + } 335 + 336 + '[a-zA-Z0-9_-]' { 337 + $TokType = $null 338 + switch ($this.State) { 339 + {$_ -eq [ElementState]::OPENED} { 340 + $TokType = [TokenType]::TAG_NAME 341 + $this.CurrentTag = $this.PeekIdentifier() 342 + 343 + $this.State = [ElementState]::AFTER_TAG 344 + } 345 + {$_ -in 346 + [ElementState]::AFTER_TAG, 347 + [ElementState]::AFTER_ATTRIBUTE_NAME, 348 + [ElementState]::AFTER_ATTRIBUTE_VALUE 349 + } { 350 + $TokType = [TokenType]::ATTR_NAME 351 + 352 + $this.State = [ElementState]::AFTER_ATTRIBUTE_NAME 353 + } 354 + {$_ -eq [ElementState]::AFTER_ATTRIBUTE_EQUALS} { 355 + $TokType = [TokenType]::ATTR_VALUE 356 + 357 + $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE 358 + } 359 + {$_ -eq [ElementState]::END_OPENED} { 360 + $Cur = $this.CurrentTag 361 + $Peek = $this.PeekIdentifier() 362 + if ($Cur -ne $Peek) { 363 + throw "Start tag name ($Cur) and end tag name ($Peek) doesn't match" 364 + } 365 + 366 + $TokType = [TokenType]::TAG_NAME 367 + 368 + $this.State = [ElementState]::END_AFTER_TAG 369 + } 370 + default { 371 + $this.Die( $this.Char ) 372 + } 373 + } 374 + 375 + return [Token]::new($TokType, $this.PopIdentifier()) 376 + } 377 + 378 + '=' { 379 + if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_NAME) { 380 + $this.Die('=') 381 + } 382 + 383 + $Token = [Token]::new([TokenType]::EQUAL, $this.Char) 384 + 385 + $this.State = [ElementState]::AFTER_ATTRIBUTE_EQUALS 386 + } 387 + 388 + '\s' { 389 + $this.ConsumeWhitespace() 390 + return $this.NextToken() 391 + } 392 + 393 + default { 394 + $Token = [Token]::new([TokenType]::ILLEGAL, $this.LexInput.Substring($this.Pos)) 395 + } 396 + } 397 + 398 + $this.PopChar() 399 + return $Token 134 400 } 135 401 }
+251 -13
Tests.ps1
··· 1 + param( 2 + [Int[]] $DumpTest, 3 + [Switch] $DumpAll 4 + ) 5 + 1 6 . ./Compiler.ps1 2 7 8 + $EOF = [Token]::new( [TokenType]::EOF, '' ) 9 + 3 10 $Tests = @( 4 11 @{ 5 12 Name = "OPEN_ELEMENT_START" ··· 8 15 [Token]::new( 9 16 [TokenType]::OPEN_ELEMENT_START, 10 17 '<' 11 - ) 18 + ), 19 + $EOF 12 20 ) 13 21 }, 14 22 @{ 15 - Name = "<div" 23 + Name = "Opener and tag name" 16 24 Input = "<div" 17 25 Expect = @( 18 26 [Token]::new( ··· 20 28 '<' 21 29 ), 22 30 [Token]::new( 23 - [TokenType]::IDENTIFIER, 31 + [TokenType]::TAG_NAME, 24 32 'div' 25 - ) 33 + ), 34 + $EOF 35 + ) 36 + }, 37 + @{ 38 + Name = "Empty element" 39 + Input = "<div></div>" 40 + Expect = @( 41 + [Token]::new( 42 + [TokenType]::OPEN_ELEMENT_START, 43 + '<' 44 + ), 45 + [Token]::new( 46 + [TokenType]::TAG_NAME, 47 + 'div' 48 + ), 49 + [Token]::new( 50 + [TokenType]::CLOSE_ELEMENT, 51 + '>' 52 + ), 53 + [Token]::new( 54 + [TokenType]::OPEN_ELEMENT_END, 55 + '</' 56 + ), 57 + [Token]::new( 58 + [TokenType]::TAG_NAME, 59 + 'div' 60 + ), 61 + [Token]::new( 62 + [TokenType]::CLOSE_ELEMENT, 63 + '>' 64 + ), 65 + $EOF 66 + ) 67 + }, 68 + @{ 69 + Name = "Empty void element" 70 + Input = "<img/>" 71 + Expect = @( 72 + [Token]::new( 73 + [TokenType]::OPEN_ELEMENT_START, 74 + '<' 75 + ), 76 + [Token]::new( 77 + [TokenType]::TAG_NAME, 78 + 'img' 79 + ), 80 + [Token]::new( 81 + [TokenType]::VOID_ELEMENT_CLOSE, 82 + '/>' 83 + ), 84 + $EOF 85 + ) 86 + } 87 + @{ 88 + Name = "Ignore extra whitespace (empty element)" 89 + Input = "<`n`n div `t`n ></ div `n `t`r >" 90 + Expect = @( 91 + [Token]::new( 92 + [TokenType]::OPEN_ELEMENT_START, 93 + '<' 94 + ), 95 + [Token]::new( 96 + [TokenType]::TAG_NAME, 97 + 'div' 98 + ), 99 + [Token]::new( 100 + [TokenType]::CLOSE_ELEMENT, 101 + '>' 102 + ), 103 + [Token]::new( 104 + [TokenType]::OPEN_ELEMENT_END, 105 + '</' 106 + ), 107 + [Token]::new( 108 + [TokenType]::TAG_NAME, 109 + 'div' 110 + ), 111 + [Token]::new( 112 + [TokenType]::CLOSE_ELEMENT, 113 + '>' 114 + ), 115 + $EOF 116 + ) 117 + }, 118 + @{ 119 + Name = "Empty element with attributes" 120 + Input = "<span attr1=`"bau bau`" attr2=awa></span>" 121 + Expect = @( 122 + [Token]::new( 123 + [TokenType]::OPEN_ELEMENT_START, 124 + '<' 125 + ), 126 + [Token]::new( 127 + [TokenType]::TAG_NAME, 128 + 'span' 129 + ), 130 + [Token]::new( 131 + [TokenType]::ATTR_NAME, 132 + 'attr1' 133 + ), 134 + [Token]::new( 135 + [TokenType]::EQUAL, 136 + '=' 137 + ), 138 + [Token]::new( 139 + [TokenType]::ATTR_VALUE, 140 + 'bau bau' 141 + ), 142 + [Token]::new( 143 + [TokenType]::ATTR_NAME, 144 + 'attr2' 145 + ), 146 + [Token]::new( 147 + [TokenType]::EQUAL, 148 + '=' 149 + ), 150 + [Token]::new( 151 + [TokenType]::ATTR_VALUE, 152 + 'awa' 153 + ), 154 + [Token]::new( 155 + [TokenType]::CLOSE_ELEMENT, 156 + '>' 157 + ), 158 + [Token]::new( 159 + [TokenType]::OPEN_ELEMENT_END, 160 + '</' 161 + ), 162 + [Token]::new( 163 + [TokenType]::TAG_NAME, 164 + 'span' 165 + ), 166 + [Token]::new( 167 + [TokenType]::CLOSE_ELEMENT, 168 + '>' 169 + ), 170 + $EOF 171 + ) 172 + }, 173 + @{ 174 + Name = "Attribute with pure scriptblock" 175 + Input = '<img attrx={ $_ ; ''hello'' "there" """wao"""; return "<baubau knrs>" } />' 176 + Expect = @( 177 + [Token]::new( 178 + [TokenType]::OPEN_ELEMENT_START, 179 + '<' 180 + ), 181 + [Token]::new( 182 + [TokenType]::TAG_NAME, 183 + 'img' 184 + ), 185 + [Token]::new( 186 + [TokenType]::ATTR_NAME, 187 + 'attrx' 188 + ), 189 + [Token]::new( 190 + [TokenType]::EQUAL, 191 + '=' 192 + ), 193 + [Token]::new( 194 + [TokenType]::ATTR_VALUE_SCRIPTBLOCK, 195 + ' $_ ; ''hello'' "there" """wao"""; return "<baubau knrs>" ' 196 + ), 197 + [Token]::new( 198 + [TokenType]::VOID_ELEMENT_CLOSE, 199 + '/>' 200 + ), 201 + $EOF 202 + ) 203 + }, 204 + @{ 205 + Name = "Attribute with embedded scriptblock" 206 + Input = '<img attrx="bau $( $_ )$(gci Function: )uab" />' 207 + Expect = @( 208 + [Token]::new( 209 + [TokenType]::OPEN_ELEMENT_START, 210 + '<' 211 + ), 212 + [Token]::new( 213 + [TokenType]::TAG_NAME, 214 + 'img' 215 + ), 216 + [Token]::new( 217 + [TokenType]::ATTR_NAME, 218 + 'attrx' 219 + ), 220 + [Token]::new( 221 + [TokenType]::EQUAL, 222 + '=' 223 + ), 224 + [Token]::new( 225 + [TokenType]::ATTR_VALUE, 226 + 'bau $( $_ )$(gci Function: )uab' 227 + ), 228 + [Token]::new( 229 + [TokenType]::VOID_ELEMENT_CLOSE, 230 + '/>' 231 + ), 232 + $EOF 26 233 ) 27 234 } 28 235 ) 29 236 237 + $t=0 30 238 $Tests | % { 31 239 $Lexer = [Lexer]::new($_.Input) 32 - $Toks = ,$Lexer.NextToken() 240 + $i=0 33 241 34 - $tis = $_.Expect[0] 35 - $tat = $Toks[0] 242 + if ($t -in $DumpTest -or $DumpAll) { 243 + Write-Host "Dumping for test $t ""$($_.Name)""" 244 + Write-Host "Input: $($_.Input)" 245 + } 36 246 37 - if ((Compare-Object $_.Expect $Toks)) { 38 - throw ("Test {0} failed: expected << {1} >>, got << {2} >>" -f ` 39 - $_.Name, 40 - "[`n`t$( $_.Expect -join ",`n`t" )`n]", 41 - "[$( $Toks -join ',' )]" 42 - ) 247 + for ($i = 0; $i -lt $_.Expect.Count; $i++) { 248 + $Case = $_.Expect[$i] 249 + $PreState = $Lexer.State 250 + $Next = $Lexer.NextToken() 251 + 252 + if ($t -in $DumpTest -or $DumpAll) { 253 + Write-Host (@" 254 + {0} 255 + [ pre-state: {1} pos:{2} next:{3} post-state: {4}] 256 + -> {5} 257 + <- {6} 258 + "@ -f ( 259 + $i, 260 + $PreState, 261 + $Lexer.Pos, 262 + $Lexer.NextPos, 263 + $Lexer.State, 264 + $Case, 265 + $Next 266 + )) 267 + } 268 + 269 + if ($Case -ne $Next) { 270 + throw ("Test ""{0}"" case {1} failed: expected {2}, got {3}" -f ` 271 + $_.Name, 272 + $i, 273 + "[ $Case ]", 274 + "[ $Next ]" 275 + ) 276 + } 43 277 } 278 + 279 + $t++ 44 280 } 281 + 282 + Write-Host "All $($Tests.Count) test(s) passed"