this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

cue/literal: allow escaped newlines in multiline strings

Signed-off-by: Roger Peppe <rogpeppe@gmail.com>
Change-Id: I678ecd5c6c2e8ea15f7bedb17dad995b53c20497
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/537298
Reviewed-by: Marcel van Lohuizen <mpvl@gmail.com>
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
Unity-Result: CUEcueckoo <cueckoo@cuelang.org>
TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>

authored by

Roger Peppe and committed by
Daniel Martí
618e5301 bd9ae537

+81 -23
+71 -22
cue/literal/string.go
··· 29 29 errUnmatchedQuote = errors.New("invalid string: unmatched quote") 30 30 // TODO: making this an error is optional according to RFC 4627. But we 31 31 // could make it not an error if this ever results in an issue. 32 - errSurrogate = errors.New("unmatched surrogate pair") 32 + errSurrogate = errors.New("unmatched surrogate pair") 33 + errEscapedLastNewline = errors.New("last newline of multiline string cannot be escaped") 33 34 ) 34 35 35 36 // Unquote interprets s as a single- or double-quoted, single- or multi-line ··· 135 136 return q, nStart, int(q.numChar) + q.numHash, nil 136 137 } 137 138 138 - // Unquote unquotes the given string. It must be terminated with a quote or an 139 + // Unquote unquotes the given string, which should not contain 140 + // the initial quote character(s). It must be terminated with a quote or an 139 141 // interpolation start. Escape sequences are expanded and surrogates 140 142 // are replaced with the corresponding non-surrogate code points. 141 143 func (q QuoteInfo) Unquote(s string) (string, error) { ··· 155 157 var runeTmp [utf8.UTFMax]byte 156 158 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 157 159 stripNL := false 160 + wasEscapedNewline := false 158 161 for len(s) > 0 { 159 162 switch s[0] { 160 163 case '\r': 161 164 s = s[1:] 165 + wasEscapedNewline = false 162 166 continue 163 167 case '\n': 164 - switch { 165 - case !q.multiline: 166 - fallthrough 167 - default: 168 - return "", errInvalidWhitespace 169 - case strings.HasPrefix(s[1:], q.whitespace): 170 - s = s[1+len(q.whitespace):] 171 - case strings.HasPrefix(s[1:], "\n"): 172 - s = s[1:] 168 + var err error 169 + s, err = skipWhitespaceAfterNewline(s[1:], q) 170 + if err != nil { 171 + return "", err 173 172 } 174 173 stripNL = true 174 + wasEscapedNewline = false 175 175 buf = append(buf, '\n') 176 176 continue 177 177 } ··· 194 194 195 195 s = ss 196 196 if c < 0 { 197 - if c == -2 { 198 - stripNL = false 199 - } 200 - if stripNL { 201 - // Strip the last newline, but only if it came from a closing 202 - // quote. 203 - buf = buf[:len(buf)-1] 197 + switch c { 198 + case escapedNewline: 199 + var err error 200 + s, err = skipWhitespaceAfterNewline(s, q) 201 + if err != nil { 202 + return "", err 203 + } 204 + wasEscapedNewline = true 205 + continue 206 + case terminatedByQuote: 207 + if wasEscapedNewline { 208 + return "", errEscapedLastNewline 209 + } 210 + if stripNL { 211 + // Strip the last newline, but only if it came from a closing 212 + // quote. 213 + buf = buf[:len(buf)-1] 214 + } 215 + case terminatedByExpr: 216 + default: 217 + panic("unreachable") 204 218 } 205 219 return string(buf), nil 206 220 } 207 221 stripNL = false 222 + wasEscapedNewline = false 208 223 if c < utf8.RuneSelf || !multibyte { 209 224 buf = append(buf, byte(c)) 210 225 } else { ··· 216 231 return "", errUnmatchedQuote 217 232 } 218 233 234 + func skipWhitespaceAfterNewline(s string, q QuoteInfo) (string, error) { 235 + switch { 236 + case !q.multiline: 237 + // Can't happen because Unquote does an initial check for literal newlines 238 + // in the non-multiline case, but be defensive. 239 + fallthrough 240 + default: 241 + return "", errInvalidWhitespace 242 + case strings.HasPrefix(s, q.whitespace): 243 + s = s[len(q.whitespace):] 244 + case strings.HasPrefix(s, "\n"): 245 + case strings.HasPrefix(s, "\r\n"): 246 + } 247 + return s, nil 248 + } 249 + 219 250 const ( 220 251 surHigh = 0xD800 221 252 surLow = 0xDC00 ··· 247 278 return false 248 279 } 249 280 281 + const ( 282 + terminatedByQuote = rune(-1) 283 + terminatedByExpr = rune(-2) 284 + escapedNewline = rune(-3) 285 + ) 286 + 250 287 // unquoteChar decodes the first character or byte in the escaped string. 251 288 // It returns four values: 252 289 // 253 - // 1) value, the decoded Unicode code point or byte value; the special value 254 - // of -1 indicates terminated by quotes and -2 means terminated by \(. 290 + // 1) value, the decoded Unicode code point or byte value if non-negative, or 291 + // one of the following special values: 292 + // - terminatedByQuote indicates terminated by quotes 293 + // - terminatedByExpr means terminated by \( 294 + // - escapedNewline means that the line-termination character was quoted and should be omitted 255 295 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 256 296 // 3) tail, the remainder of the string after the character; and 257 297 // 4) an error that will be nil if the character is syntactically valid. ··· 281 321 // TODO: terminating quote in middle of string 282 322 return 0, false, s[ln:], errSyntax 283 323 } 284 - return -1, false, "", nil 324 + return terminatedByQuote, false, "", nil 285 325 case c >= utf8.RuneSelf: 286 326 // TODO: consider handling surrogate values. These are discarded by 287 327 // DecodeRuneInString. It is technically correct to disallow it, but ··· 398 438 // TODO: terminating quote in middle of string 399 439 return 0, false, s, errSyntax 400 440 } 401 - value = -2 441 + value = terminatedByExpr 442 + case '\r': 443 + if len(s) == 0 || s[0] != '\n' { 444 + err = errSyntax 445 + return 446 + } 447 + s = s[1:] 448 + value = escapedNewline 449 + case '\n': 450 + value = escapedNewline 402 451 default: 403 452 err = errSyntax 404 453 return
+9 -1
cue/literal/string_test.go
··· 28 28 {`'Hello'`, "Hello", nil}, 29 29 {`'Hellø'`, "Hellø", nil}, 30 30 {`"""` + "\n\t\tHello\n\t\t" + `"""`, "Hello", nil}, 31 + {`"""` + "\r\n\tHello\r\n\n\t" + `"""`, "Hello\n", nil}, 31 32 {"'''\n\t\tHello\n\t\t'''", "Hello", nil}, 33 + {`"""` + "\n\tHello \\\n\tworld\n\t" + `"""`, "Hello world", nil}, 34 + {`"""` + "\r\n\tHello \\\r\n\tworld\r\n\t" + `"""`, "Hello world", nil}, 35 + {`"""` + "\none \\\ntwo \\\nthree\n" + `"""`, "one two three", nil}, 36 + {`"""` + "\nHello\\\n" + `"""`, "", errEscapedLastNewline}, 37 + {`"""` + "\n\tHello\\\n\t" + `"""`, "", errEscapedLastNewline}, 38 + {`"""` + "\r\n\tHello\\\r\n\t" + `"""`, "", errEscapedLastNewline}, 32 39 {"'''\n\t\tHello\n\n\t\t'''", "Hello\n", nil}, 33 40 {"'''\n\n\t\tHello\n\t\t'''", "\nHello", nil}, 34 41 {"'''\n\n\n\n\t\t'''", "\n\n", nil}, ··· 103 110 {`#"Hello"`, "", errUnmatchedQuote}, 104 111 {`#"Hello'#`, "", errUnmatchedQuote}, 105 112 {`#""" """#`, "", errMissingNewline}, 113 + {`"""` + "\r\n\tHello \\\r", "", errUnmatchedQuote}, 106 114 107 115 // TODO: should this be legal? 108 116 {`#"""#`, "", errMissingNewline}, ··· 110 118 for i, tc := range testCases { 111 119 t.Run(fmt.Sprintf("%d/%s", i, tc.in), func(t *testing.T) { 112 120 if got, err := Unquote(tc.in); err != tc.err { 113 - t.Errorf("error: got %q; want %q", err, tc.err) 121 + t.Errorf("error: got %#v; want %#v", err, tc.err) 114 122 } else if got != tc.out { 115 123 t.Errorf("value: got %q; want %q", got, tc.out) 116 124 }
+1
cue/scanner/scanner_test.go
··· 132 132 {token.STRING, "##\"\"\"\n\"\"\"#\n\"\"\"##", literal}, 133 133 {token.STRING, `##"####"##`, literal}, 134 134 {token.STRING, `#"foobar"#`, literal}, 135 + {token.STRING, `#" """#`, literal}, 135 136 {token.STRING, `#"\r"#`, literal}, 136 137 {token.STRING, `#"\("#`, literal}, 137 138 {token.STRING, `#"\q"#`, literal},