this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

encoding/xml/koala: introduce the first experimental XML encoder

This is the initial implementation of the Koala XML encoding
as described at https://cuelang.org/discussion/3776.

In short, this encoding is inspired by BadgerFish, an XML to JSON
translation where XML element attributes and bodies become JSON object
key-value pairs with "$" prefixes. We do something very similar
in the XML "Koala" encoding, but adapting the logic to fit CUE better.

Here we introduce the decoding side only; the encoding side will come
as a separate change at a later time.

See #3776.

Signed-off-by: Matthew Sladescu <msladescu@microsoft.com>
Change-Id: If2292c95289fc0229321b7737249dd6057556b03
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1211362
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>
Reviewed-by: Chief Cueckoo <chief.cueckoo@cue.works>
Unity-Result: CUE porcuepine <cue.porcuepine@gmail.com>

authored by

Matthew Sladescu and committed by
Daniel Martí
ebf42e56 c565c1d3

+881
+331
encoding/xml/koala/decode.go
··· 1 + // Copyright 2025 The CUE Authors 2 + // 3 + // Licensed under the Apache License, Version 2.0 (the "License"); 4 + // you may not use this file except in compliance with the License. 5 + // You may obtain a copy of the License at 6 + // 7 + // http://www.apache.org/licenses/LICENSE-2.0 8 + // 9 + // Unless required by applicable law or agreed to in writing, software 10 + // distributed under the License is distributed on an "AS IS" BASIS, 11 + // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 + // See the License for the specific language governing permissions and 13 + // limitations under the License. 14 + 15 + // Package koala converts XML to and from CUE, as proposed in https://cuelang.org/discussion/3776. 16 + // 17 + // This encoding is inspired by the [BadgerFish] convention for translating XML to JSON. 18 + // XML elements are modeled as CUE structs, their attributes are modeled as struct fields 19 + // prefixed with "$", and their inner text content is modeleed as a field named "$$". 20 + // 21 + // WARNING: THIS PACKAGE IS EXPERIMENTAL. 22 + // ITS API MAY CHANGE AT ANY TIME. 23 + // 24 + // [BadgerFish]: http://www.sklar.com/badgerfish/ 25 + package koala 26 + 27 + import ( 28 + "bytes" 29 + "encoding/xml" 30 + "fmt" 31 + "io" 32 + "strings" 33 + "unicode" 34 + 35 + "cuelang.org/go/cue/ast" 36 + "cuelang.org/go/cue/token" 37 + ) 38 + 39 + // Decoder implements the decoding state. 40 + type Decoder struct { 41 + reader io.Reader 42 + fileName string 43 + tokenFile *token.File 44 + 45 + decoderRan bool 46 + 47 + // current XML element being processed. 48 + currXmlElement *xmlElement 49 + 50 + // The top-level CUE struct. 51 + astRoot *ast.StructLit 52 + // CUE model of ancestors of current XML element being processed. 53 + ancestors []currFieldInfo 54 + // CUE model of current XML element being processed. 55 + currField currFieldInfo 56 + // CUE model of current XML element's inner content ($$ attribute). 57 + currInnerText *ast.Field 58 + } 59 + 60 + // currFieldInfo encapsulates details of the CUE field for the current XML element being processed. 61 + type currFieldInfo struct { 62 + // CUE model of current XML element. 63 + field *ast.Field 64 + // Running map of the current field's children. 65 + currFieldChildren map[string]*ast.Field 66 + } 67 + 68 + // xmlElement models an XML Element hierarchy. 69 + // It is used for tracking namespace prefixes. 70 + type xmlElement struct { 71 + xmlName xml.Name 72 + attr []xml.Attr 73 + parent *xmlElement 74 + children []*xmlElement 75 + textContentIsWhiteSpace bool 76 + } 77 + 78 + // The prefix used to model the inner text content within an XML element. 79 + const contentAttribute string = "$$" 80 + 81 + // The prefix used to model each attribute of an XML element. 82 + const attributeSymbol string = "$" 83 + 84 + // NewDecoder creates a decoder from a stream of XML input. 85 + func NewDecoder(fileName string, r io.Reader) *Decoder { 86 + return &Decoder{reader: r, fileName: fileName} 87 + } 88 + 89 + // Decode parses the input stream as XML and converts it to a CUE [ast.Expr]. 90 + // The input stream is taken from the [Decoder] and consumed. 91 + func (dec *Decoder) Decode() (ast.Expr, error) { 92 + if dec.decoderRan { 93 + return nil, io.EOF 94 + } 95 + dec.decoderRan = true 96 + xmlText, err := io.ReadAll(dec.reader) 97 + if err != nil { 98 + return nil, err 99 + } 100 + reader := bytes.NewReader(xmlText) 101 + xmlDec := xml.NewDecoder(reader) 102 + 103 + // Create a token file to track the position of the XML content in the CUE file. 104 + dec.tokenFile = token.NewFile(dec.fileName, 0, len(xmlText)) 105 + dec.tokenFile.SetLinesForContent(xmlText) 106 + 107 + for { 108 + startOffset := xmlDec.InputOffset() 109 + t, err := xmlDec.Token() 110 + if err == io.EOF { 111 + break 112 + } 113 + if err != nil { 114 + return nil, err 115 + } 116 + switch xmlToken := t.(type) { 117 + case xml.StartElement: 118 + err = dec.decodeStartElement(xmlToken, startOffset) 119 + case xml.CharData: 120 + err = dec.decoderInnerText(xmlToken, startOffset) 121 + case xml.EndElement: 122 + err = dec.decodeEndElement() 123 + } 124 + if err != nil { 125 + return nil, err 126 + } 127 + // If the XML document has ended, break out of the loop. 128 + if dec.astRoot != nil && dec.currXmlElement == nil { 129 + break 130 + } 131 + } 132 + return dec.astRoot, nil 133 + } 134 + 135 + func (dec *Decoder) decoderInnerText(xmlToken xml.CharData, contentOffset int64) error { 136 + // If this is text content within an XML element. 137 + textContent := string(xml.CharData(xmlToken)) 138 + if dec.currField.field == nil { 139 + if isWhiteSpace(textContent) { 140 + return nil 141 + } 142 + return fmt.Errorf("text content outside of an XML element is not supported") 143 + } 144 + pos := dec.tokenFile.Pos(int(contentOffset), token.NoRelPos) 145 + txtContentPosition := pos 146 + txtLabel := ast.NewString(contentAttribute) 147 + txtLabel.ValuePos = txtContentPosition 148 + val := toBasicLit(textContent) 149 + val.ValuePos = txtContentPosition 150 + textContentNode := &ast.Field{ 151 + Label: txtLabel, 152 + Value: val, 153 + TokenPos: pos, 154 + } 155 + dec.currInnerText = textContentNode 156 + dec.currXmlElement.textContentIsWhiteSpace = isWhiteSpace(textContent) 157 + return nil 158 + } 159 + 160 + func (dec *Decoder) decodeEndElement() error { 161 + // If there is text content within the element, add it to the element's value. 162 + if dec.currXmlElement != nil && dec.currInnerText != nil { 163 + // Only support text content within an element that has no sub-elements. 164 + if len(dec.currXmlElement.children) == 0 { 165 + dec.appendToCurrFieldStruct(dec.currInnerText) 166 + dec.currInnerText = nil 167 + } else if len(dec.currXmlElement.children) > 0 && !dec.currXmlElement.textContentIsWhiteSpace { 168 + // If there is text content within an element that has sub-elements, return an error. 169 + return mixedContentError() 170 + } 171 + } 172 + // For the xmlElement hierarchy: step back up the XML hierarchy. 173 + if dec.currXmlElement != nil { 174 + dec.currXmlElement = dec.currXmlElement.parent 175 + } 176 + // For the CUE ast: end current element, and step back up the XML hierarchy. 177 + if len(dec.ancestors) > 0 { 178 + dec.currField = dec.ancestors[len(dec.ancestors)-1] 179 + dec.ancestors = dec.ancestors[:len(dec.ancestors)-1] 180 + } 181 + return nil 182 + } 183 + 184 + func (dec *Decoder) decodeStartElement(xmlToken xml.StartElement, startOffset int64) error { 185 + // Covers the root node. 186 + if dec.currField.field == nil { 187 + dec.currXmlElement = &xmlElement{xmlName: xmlToken.Name, attr: xmlToken.Attr} 188 + cueElement, err := dec.cueFieldFromXmlElement(xmlToken, dec.currXmlElement, startOffset) 189 + if err != nil { 190 + return err 191 + } 192 + dec.currField.assignNewCurrField(cueElement) 193 + dec.astRoot = ast.NewStruct(dec.currField.field) 194 + ast.SetPos(dec.astRoot, dec.tokenFile.Pos(0, token.NoRelPos)) 195 + return nil 196 + } 197 + // If this is not the root node, check if there is text content within the element. 198 + if dec.currInnerText != nil && !dec.currXmlElement.textContentIsWhiteSpace { 199 + return mixedContentError() 200 + } 201 + // Clear any whitespace text content. 202 + dec.currInnerText = nil 203 + // For xmlElement hierarchy: step down the XML hierarchy. 204 + parentXmlNode := dec.currXmlElement 205 + dec.currXmlElement = &xmlElement{xmlName: xmlToken.Name, attr: xmlToken.Attr, parent: parentXmlNode} 206 + parentXmlNode.children = append(parentXmlNode.children, dec.currXmlElement) 207 + // For the CUE ast: step down the CUE hierarchy. 208 + dec.ancestors = append(dec.ancestors, dec.currField) 209 + newElement, err := dec.cueFieldFromXmlElement(xmlToken, dec.currXmlElement, startOffset) 210 + if err != nil { 211 + return err 212 + } 213 + // Check if this new XML element has a name that's been seen before at the current level. 214 + prefixedXmlElementName := prefixedElementName(xmlToken, dec.currXmlElement) 215 + sameNameElements := dec.currField.currFieldChildren[prefixedXmlElementName] 216 + if sameNameElements != nil { 217 + list, ok := sameNameElements.Value.(*ast.ListLit) 218 + // If the field's value is not a ListLit, create a new ListLit and append the existing field. 219 + if !ok { 220 + list = &ast.ListLit{Elts: []ast.Expr{sameNameElements.Value}} 221 + sameNameElements.Value = list 222 + } 223 + // Append the new element to the ListLit, which we now know exists. 224 + list.Elts = append(list.Elts, newElement.Value) 225 + dec.currField.assignNewCurrField(newElement) 226 + return nil 227 + } 228 + dec.currField.currFieldChildren[prefixedXmlElementName] = newElement 229 + dec.appendToCurrFieldStruct(newElement) 230 + dec.currField.assignNewCurrField(newElement) 231 + return nil 232 + } 233 + 234 + func (dec *Decoder) appendToCurrFieldStruct(field *ast.Field) { 235 + dec.currField.field.Value.(*ast.StructLit).Elts = append(dec.currField.field.Value.(*ast.StructLit).Elts, field) 236 + } 237 + 238 + func mixedContentError() error { 239 + return fmt.Errorf("text content within an XML element that has sub-elements is not supported") 240 + } 241 + 242 + func isWhiteSpace(s string) bool { 243 + for _, r := range s { 244 + if !unicode.IsSpace(r) { 245 + return false 246 + } 247 + } 248 + return true 249 + } 250 + 251 + // cueFieldFromXmlElement creates a new [ast.Field] to model the given xml element information 252 + // in [xml.StartElement] and [xmlElement]. The startOffset represents the offset 253 + // for the beginning of the start tag of the given XML element. 254 + func (dec *Decoder) cueFieldFromXmlElement(elem xml.StartElement, xmlNode *xmlElement, startOffset int64) (*ast.Field, error) { 255 + elementName := prefixedElementName(elem, xmlNode) 256 + resLabel := ast.NewString(elementName) 257 + pos := dec.tokenFile.Pos(int(startOffset), token.NoRelPos) 258 + resLabel.ValuePos = pos 259 + resultValue := &ast.StructLit{} 260 + result := &ast.Field{ 261 + Label: resLabel, 262 + Value: resultValue, 263 + TokenPos: pos, 264 + } 265 + // Extract attributes as children. 266 + for _, a := range elem.Attr { 267 + attrName := prefixedAttrName(a, elem, xmlNode) 268 + label := ast.NewString(attributeSymbol + attrName) 269 + value := toBasicLit(a.Value) 270 + label.ValuePos = pos 271 + value.ValuePos = pos 272 + attrExpr := &ast.Field{ 273 + Label: label, 274 + Value: value, 275 + TokenPos: pos, 276 + } 277 + resultValue.Elts = append(resultValue.Elts, attrExpr) 278 + } 279 + return result, nil 280 + } 281 + 282 + // prefixedElementName returns the full name of an element, 283 + // including its namespace prefix if it has one; but without namespace prefix if it is "xmlns". 284 + func prefixedElementName(elem xml.StartElement, xmlNode *xmlElement) string { 285 + elementName := elem.Name.Local 286 + if elem.Name.Space != "" { 287 + prefixNS := nsPrefix(elem.Name.Space, elem.Attr, xmlNode) 288 + if prefixNS != "xmlns" { 289 + elementName = prefixNS + ":" + elem.Name.Local 290 + } 291 + } 292 + return elementName 293 + } 294 + 295 + // prefixedAttrName returns the full name of an attribute, including its namespace prefix if it has one. 296 + func prefixedAttrName(a xml.Attr, elem xml.StartElement, xmlNode *xmlElement) string { 297 + attrName := a.Name.Local 298 + if a.Name.Space != "" { 299 + prefix := nsPrefix(a.Name.Space, elem.Attr, xmlNode) 300 + attrName = prefix + ":" + a.Name.Local 301 + } 302 + return attrName 303 + } 304 + 305 + func toBasicLit(s string) *ast.BasicLit { 306 + s = strings.ReplaceAll(s, "\r", "") 307 + return ast.NewString(s) 308 + } 309 + 310 + // nsPrefix finds the prefix label for a given namespace by looking at the current node's 311 + // attributes and then walking up the hierarchy of XML nodes. 312 + func nsPrefix(nameSpace string, attributes []xml.Attr, xmlNode *xmlElement) string { 313 + // When the prefix is xmlns, then the namespace is xmlns according to the golang XML parser. 314 + if nameSpace == "xmlns" { 315 + return "xmlns" 316 + } 317 + for _, attr := range attributes { 318 + if attr.Value == nameSpace { 319 + return attr.Name.Local 320 + } 321 + } 322 + if xmlNode.parent != nil { 323 + return nsPrefix(nameSpace, xmlNode.parent.attr, xmlNode.parent) 324 + } 325 + panic("could not find prefix for namespace " + nameSpace) 326 + } 327 + 328 + func (cf *currFieldInfo) assignNewCurrField(field *ast.Field) { 329 + cf.field = field 330 + cf.currFieldChildren = make(map[string]*ast.Field) 331 + }
+550
encoding/xml/koala/decode_test.go
··· 1 + // Copyright 2025 The CUE Authors 2 + // 3 + // Licensed under the Apache License, Version 2.0 (the "License"); 4 + // you may not use this file except in compliance with the License. 5 + // You may obtain a copy of the License at 6 + // 7 + // http://www.apache.org/licenses/LICENSE-2.0 8 + // 9 + // Unless required by applicable law or agreed to in writing, software 10 + // distributed under the License is distributed on an "AS IS" BASIS, 11 + // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 + // See the License for the specific language governing permissions and 13 + // limitations under the License. 14 + 15 + package koala_test 16 + 17 + import ( 18 + "strings" 19 + "testing" 20 + 21 + "github.com/go-quicktest/qt" 22 + 23 + "cuelang.org/go/cue" 24 + "cuelang.org/go/cue/ast/astutil" 25 + "cuelang.org/go/cue/cuecontext" 26 + "cuelang.org/go/cue/errors" 27 + "cuelang.org/go/cue/format" 28 + "cuelang.org/go/encoding/xml/koala" 29 + ) 30 + 31 + func TestErrorReporting(t *testing.T) { 32 + t.Parallel() 33 + tests := []struct { 34 + name string 35 + inputXML string 36 + cueConstraints string 37 + expectedError string 38 + }{{ 39 + name: "Element Text Content Constraint Error", 40 + inputXML: `<?xml version="1.0" encoding="UTF-8"?> 41 + <test v="v2.1"> 42 + <edge n="2.65" o="3.65"/> 43 + <container id="555"/> 44 + <container id="777"/> 45 + <container id="888" > 46 + <l attr="x"/> 47 + <l attr="y"/> 48 + </container> 49 + <text>content</text> 50 + </test>`, 51 + cueConstraints: `test: { 52 + $v: string 53 + edge: { 54 + $n: string 55 + $o: string 56 + } 57 + container: [...{ 58 + $id: string 59 + l: [...{ 60 + $attr: string 61 + }] 62 + }] 63 + text: { 64 + $$: int 65 + } 66 + }`, 67 + expectedError: "test.text.$$: conflicting values int and \"content\" (mismatched types int and string):\n input.xml:10:10\n schema.cue:14:8\n", 68 + }, { 69 + name: "Attribute Constraint Error", 70 + inputXML: `<?xml version="1.0" encoding="UTF-8"?> 71 + <test v="v2.1"> 72 + <edge n="2.65" o="3.65"/> 73 + <container id="555"/> 74 + <container id="777"/> 75 + <container id="888" > 76 + <l attr="x"/> 77 + <l attr="y"/> 78 + </container> 79 + <text>content</text> 80 + </test>`, 81 + cueConstraints: `test: { 82 + $v: int 83 + edge: { 84 + $n: string 85 + $o: string 86 + } 87 + container: [...{ 88 + $id: string 89 + l: [...{ 90 + $attr: string 91 + }] 92 + }] 93 + text: { 94 + $$: string 95 + } 96 + }`, 97 + expectedError: "test.$v: conflicting values int and \"v2.1\" (mismatched types int and string):\n input.xml:2:3\n schema.cue:2:7\n", 98 + }, 99 + { 100 + name: "Attribute Constraint Error on self-closing element", 101 + inputXML: `<?xml version="1.0" encoding="UTF-8"?> 102 + <test v="v2.1"> 103 + <edge n="2.65" o="3.65"/> 104 + <container id="555"/> 105 + <container id="777"/> 106 + <container id="888" > 107 + <l attr="x"/> 108 + <l attr="y"/> 109 + </container> 110 + <text>content</text> 111 + </test>`, 112 + cueConstraints: `test: { 113 + $v: string 114 + edge: { 115 + $n: int 116 + $o: string 117 + } 118 + container: [...{ 119 + $id: string 120 + l: [...{ 121 + $attr: string 122 + }] 123 + }] 124 + text: { 125 + $$: string 126 + } 127 + }`, 128 + expectedError: "test.edge.$n: conflicting values int and \"2.65\" (mismatched types int and string):\n input.xml:3:4\n schema.cue:4:8\n", 129 + }, 130 + } 131 + 132 + for _, test := range tests { 133 + t.Run(test.name, func(t *testing.T) { 134 + t.Parallel() 135 + 136 + fileName := "input.xml" 137 + dec := koala.NewDecoder(fileName, strings.NewReader(test.inputXML)) 138 + 139 + cueExpr, err := dec.Decode() 140 + 141 + qt.Assert(t, qt.IsNil(err)) 142 + 143 + rootCueFile, err := astutil.ToFile(cueExpr) 144 + qt.Assert(t, qt.IsNil(err)) 145 + 146 + c := cuecontext.New() 147 + rootCueVal := c.BuildFile(rootCueFile, cue.Filename(fileName)) 148 + 149 + // compile some CUE into a Value 150 + compiledSchema := c.CompileString(test.cueConstraints, cue.Filename("schema.cue")) 151 + 152 + //unify the compiledSchema against the formattedConfig 153 + unified := compiledSchema.Unify(rootCueVal) 154 + 155 + actualError := "" 156 + if err := unified.Validate(cue.Concrete(true)); err != nil { 157 + actualError = errors.Details(err, nil) 158 + } 159 + 160 + qt.Assert(t, qt.Equals(actualError, test.expectedError)) 161 + }) 162 + } 163 + } 164 + 165 + func TestElementDecoding(t *testing.T) { 166 + t.Parallel() 167 + 168 + tests := []struct { 169 + name string 170 + inputXML string 171 + wantCUE string 172 + }{{ 173 + name: "Simple Elements", 174 + inputXML: `<note> 175 + <to> </to> 176 + <from>Jani</from> 177 + <heading>Reminder</heading> 178 + <body>Don't forget me this weekend!</body> 179 + </note>`, 180 + wantCUE: `note: { 181 + to: $$: " " 182 + from: $$: "Jani" 183 + heading: $$: "Reminder" 184 + body: $$: "Don't forget me this weekend!" 185 + } 186 + `, 187 + }, 188 + { 189 + name: "Simple self-closing element", 190 + inputXML: `<note> 191 + <to/> 192 + <from>Jani</from> 193 + <heading>Reminder</heading> 194 + <body>Don't forget me this weekend!</body> 195 + </note>`, 196 + wantCUE: `note: { 197 + to: {} 198 + from: $$: "Jani" 199 + heading: $$: "Reminder" 200 + body: $$: "Don't forget me this weekend!" 201 + } 202 + `, 203 + }, 204 + { 205 + name: "Attribute", 206 + inputXML: `<note alpha="abcd"> 207 + <to>Tove</to> 208 + <from>Jani</from> 209 + <heading>Reminder</heading> 210 + <body>Don't forget me this weekend!</body> 211 + </note>`, 212 + wantCUE: `note: { 213 + $alpha: "abcd" 214 + to: $$: "Tove" 215 + from: $$: "Jani" 216 + heading: $$: "Reminder" 217 + body: $$: "Don't forget me this weekend!" 218 + } 219 + `, 220 + }, 221 + { 222 + name: "Attribute and Element with the same name", 223 + inputXML: `<note alpha="abcd"> 224 + <to>Tove</to> 225 + <from>Jani</from> 226 + <heading>Reminder</heading> 227 + <body>Don't forget me this weekend!</body> 228 + <alpha>efgh</alpha> 229 + </note>`, 230 + wantCUE: `note: { 231 + $alpha: "abcd" 232 + to: $$: "Tove" 233 + from: $$: "Jani" 234 + heading: $$: "Reminder" 235 + body: $$: "Don't forget me this weekend!" 236 + alpha: $$: "efgh" 237 + } 238 + `, 239 + }, 240 + { 241 + name: "Mapping for content when an attribute exists", 242 + inputXML: `<note alpha="abcd"> 243 + hello 244 + </note>`, 245 + wantCUE: `note: { 246 + $alpha: "abcd" 247 + $$: "\n\thello\n" 248 + } 249 + `, 250 + }, 251 + { 252 + name: "Nested Element", 253 + inputXML: `<notes> 254 + <note alpha="abcd">hello</note> 255 + </notes>`, 256 + wantCUE: `notes: note: { 257 + $alpha: "abcd" 258 + $$: "hello" 259 + } 260 + `, 261 + }, 262 + { 263 + name: "Collections", 264 + inputXML: `<notes> 265 + <note alpha="abcd">hello</note> 266 + <note alpha="abcdef">goodbye</note> 267 + </notes>`, 268 + wantCUE: `notes: note: [{ 269 + $alpha: "abcd" 270 + $$: "hello" 271 + }, { 272 + $alpha: "abcdef" 273 + $$: "goodbye" 274 + }] 275 + `, 276 + }, 277 + { 278 + name: "Interleaving Element Types", 279 + inputXML: `<notes> 280 + <note alpha="abcd">hello</note> 281 + <note alpha="abcdef">goodbye</note> 282 + <book>mybook</book> 283 + <note alpha="ab">goodbye</note> 284 + <note>direct</note> 285 + </notes>`, 286 + wantCUE: `notes: { 287 + note: [{ 288 + $alpha: "abcd" 289 + $$: "hello" 290 + }, { 291 + $alpha: "abcdef" 292 + $$: "goodbye" 293 + }, { 294 + $alpha: "ab" 295 + $$: "goodbye" 296 + }, { 297 + $$: "direct" 298 + }] 299 + book: $$: "mybook" 300 + } 301 + `, 302 + }, 303 + { 304 + name: "Namespaces", 305 + inputXML: `<h:table xmlns:h="http://www.w3.org/TR/html4/"> 306 + <h:tr> 307 + <h:td>Apples</h:td> 308 + <h:td>Bananas</h:td> 309 + </h:tr> 310 + </h:table>`, 311 + wantCUE: `"h:table": { 312 + "$xmlns:h": "http://www.w3.org/TR/html4/" 313 + "h:tr": "h:td": [{ 314 + $$: "Apples" 315 + }, { 316 + $$: "Bananas" 317 + }] 318 + } 319 + `, 320 + }, 321 + { 322 + name: "Attribute namespace prefix", 323 + inputXML: `<h:table xmlns:h="http://www.w3.org/TR/html4/" xmlns:f="http://www.w3.org/TR/html5/"> 324 + <h:tr> 325 + <h:td f:type="fruit">Apples</h:td> 326 + <h:td>Bananas</h:td> 327 + </h:tr> 328 + </h:table>`, 329 + wantCUE: `"h:table": { 330 + "$xmlns:h": "http://www.w3.org/TR/html4/" 331 + "$xmlns:f": "http://www.w3.org/TR/html5/" 332 + "h:tr": "h:td": [{ 333 + "$f:type": "fruit" 334 + $$: "Apples" 335 + }, { 336 + $$: "Bananas" 337 + }] 338 + } 339 + `, 340 + }, 341 + { 342 + name: "Mixed Namespaces", 343 + inputXML: `<h:table xmlns:h="http://www.w3.org/TR/html4/" xmlns:r="d"> 344 + <h:tr> 345 + <h:td>Apples</h:td> 346 + <h:td>Bananas</h:td> 347 + <r:blah>e3r</r:blah> 348 + </h:tr> 349 + </h:table>`, 350 + wantCUE: `"h:table": { 351 + "$xmlns:h": "http://www.w3.org/TR/html4/" 352 + "$xmlns:r": "d" 353 + "h:tr": { 354 + "h:td": [{ 355 + $$: "Apples" 356 + }, { 357 + $$: "Bananas" 358 + }] 359 + "r:blah": $$: "e3r" 360 + } 361 + } 362 + `, 363 + }, 364 + { 365 + name: "Elements with same name but different namespaces", 366 + inputXML: `<h:table xmlns:h="http://www.w3.org/TR/html4/" xmlns:r="d"> 367 + <h:tr> 368 + <h:td>Apples</h:td> 369 + <h:td>Bananas</h:td> 370 + <r:td>e3r</r:td> 371 + </h:tr> 372 + </h:table>`, 373 + wantCUE: `"h:table": { 374 + "$xmlns:h": "http://www.w3.org/TR/html4/" 375 + "$xmlns:r": "d" 376 + "h:tr": { 377 + "h:td": [{ 378 + $$: "Apples" 379 + }, { 380 + $$: "Bananas" 381 + }] 382 + "r:td": $$: "e3r" 383 + } 384 + } 385 + `, 386 + }, 387 + { 388 + name: "Collection of elements, where elements have optional properties", 389 + inputXML: `<books> 390 + <book> 391 + <title>title</title> 392 + <author>John Doe</author> 393 + </book> 394 + <book> 395 + <title>title2</title> 396 + <author>Jane Doe</author> 397 + </book> 398 + <book> 399 + <title>Lord of the rings</title> 400 + <author>JRR Tolkien</author> 401 + <volume> 402 + <title>Fellowship</title> 403 + <author>JRR Tolkien</author> 404 + </volume> 405 + <volume> 406 + <title>Two Towers</title> 407 + <author>JRR Tolkien</author> 408 + </volume> 409 + <volume> 410 + <title>Return of the King</title> 411 + <author>JRR Tolkien</author> 412 + </volume> 413 + </book> 414 + </books>`, 415 + wantCUE: `books: book: [{ 416 + title: $$: "title" 417 + author: $$: "John Doe" 418 + }, { 419 + title: $$: "title2" 420 + author: $$: "Jane Doe" 421 + }, { 422 + title: $$: "Lord of the rings" 423 + author: $$: "JRR Tolkien" 424 + volume: [{ 425 + title: $$: "Fellowship" 426 + author: $$: "JRR Tolkien" 427 + }, { 428 + title: $$: "Two Towers" 429 + author: $$: "JRR Tolkien" 430 + }, { 431 + title: $$: "Return of the King" 432 + author: $$: "JRR Tolkien" 433 + }] 434 + }] 435 + `, 436 + }, 437 + { 438 + name: "Carriage Return Filter Test", 439 + inputXML: "<node>\r\nhello\r\n</node>", 440 + wantCUE: `node: $$: "\nhello\n" 441 + `, 442 + }, 443 + { 444 + name: "Spacing either side of xml (including new lines before and after root node)", 445 + inputXML: ` 446 + 447 + <root> 448 + <message>Hello World!</message> 449 + <nested> 450 + <a1>one level</a1> 451 + <a2> 452 + <b>two levels</b> 453 + </a2> 454 + </nested> 455 + </root> 456 + 457 + `, 458 + wantCUE: `root: { 459 + message: $$: "Hello World!" 460 + nested: { 461 + a1: $$: "one level" 462 + a2: b: $$: "two levels" 463 + } 464 + } 465 + `, 466 + }, 467 + } 468 + 469 + for _, test := range tests { 470 + t.Run(test.name, func(t *testing.T) { 471 + t.Parallel() 472 + 473 + dec := koala.NewDecoder("input.xml", strings.NewReader(test.inputXML)) 474 + cueExpr, err := dec.Decode() 475 + 476 + qt.Assert(t, qt.IsNil(err)) 477 + 478 + rootCueFile, err := astutil.ToFile(cueExpr) 479 + qt.Assert(t, qt.IsNil(err)) 480 + 481 + actualCue, err := format.Node(rootCueFile, format.Simplify()) 482 + 483 + qt.Assert(t, qt.IsNil(err)) 484 + qt.Assert(t, qt.Equals(string(actualCue), test.wantCUE)) 485 + }) 486 + } 487 + } 488 + 489 + func TestErrors(t *testing.T) { 490 + t.Parallel() 491 + 492 + tests := []struct { 493 + name string 494 + inputXML string 495 + expectedError string 496 + }{ 497 + { 498 + name: "Text after root node followed by subelements", 499 + inputXML: `<note> 500 + mixed 501 + <from>Jani</from> 502 + <heading>Reminder</heading> 503 + <body>Don't forget me this weekend!</body> 504 + </note>`, 505 + expectedError: `text content within an XML element that has sub-elements is not supported`, 506 + }, 507 + { 508 + name: "Text in middle of subelements", 509 + inputXML: `<note> 510 + <to/> 511 + mixed 512 + <from>Jani</from> 513 + <heading>Reminder</heading> 514 + <body>Don't forget me this weekend!</body> 515 + </note>`, 516 + expectedError: `text content within an XML element that has sub-elements is not supported`, 517 + }, 518 + { 519 + name: "Nested mixed content", 520 + inputXML: `<note> 521 + <to/> 522 + <from>Jani <subElement/></from> 523 + <heading>Reminder</heading> 524 + <body>Don't forget me this weekend!</body> 525 + </note>`, 526 + expectedError: `text content within an XML element that has sub-elements is not supported`, 527 + }, 528 + { 529 + name: "Text before end of root element", 530 + inputXML: `<note> 531 + <to/> 532 + <from></from> 533 + <heading>Reminder</heading> 534 + myText 535 + </note>`, 536 + expectedError: `text content within an XML element that has sub-elements is not supported`, 537 + }, 538 + } 539 + 540 + for _, test := range tests { 541 + t.Run(test.name, func(t *testing.T) { 542 + t.Parallel() 543 + 544 + dec := koala.NewDecoder("input.xml", strings.NewReader(test.inputXML)) 545 + _, err := dec.Decode() 546 + 547 + qt.Assert(t, qt.ErrorMatches(err, test.expectedError)) 548 + }) 549 + } 550 + }