A fork of https://github.com/crosspoint-reader/crosspoint-reader
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add expat and swap out EPUB HTML parser (#2)

* Add expat and swap out ERB HTML parser

* Increase EpubHtmlParserSlim file buffer to 1024 bytes

* Cleanup TextBlock functions

* Do not break words when leaving spans

authored by

Dave Allie and committed by
GitHub
dd6e649d ad8cee12

+15969 -269
-181
lib/Epub/Epub/EpubHtmlParser.cpp
··· 1 - #include "EpubHtmlParser.h" 2 - 3 - #include <EpdRenderer.h> 4 - #include <HardwareSerial.h> 5 - 6 - #include "Page.h" 7 - #include "htmlEntities.h" 8 - 9 - const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"}; 10 - constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]); 11 - 12 - const char* BLOCK_TAGS[] = {"p", "li", "div", "br"}; 13 - constexpr int NUM_BLOCK_TAGS = sizeof(BLOCK_TAGS) / sizeof(BLOCK_TAGS[0]); 14 - 15 - const char* BOLD_TAGS[] = {"b"}; 16 - constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]); 17 - 18 - const char* ITALIC_TAGS[] = {"i"}; 19 - constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]); 20 - 21 - const char* IMAGE_TAGS[] = {"img"}; 22 - constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]); 23 - 24 - const char* SKIP_TAGS[] = {"head", "table"}; 25 - constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]); 26 - 27 - // given the start and end of a tag, check to see if it matches a known tag 28 - bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) { 29 - for (int i = 0; i < possible_tag_count; i++) { 30 - if (strcmp(tag_name, possible_tags[i]) == 0) { 31 - return true; 32 - } 33 - } 34 - return false; 35 - } 36 - 37 - // start a new text block if needed 38 - void EpubHtmlParser::startNewTextBlock(const BLOCK_STYLE style) { 39 - if (currentTextBlock) { 40 - // already have a text block running and it is empty - just reuse it 41 - if (currentTextBlock->isEmpty()) { 42 - currentTextBlock->set_style(style); 43 - return; 44 - } 45 - 46 - currentTextBlock->finish(); 47 - makePages(); 48 - delete currentTextBlock; 49 - } 50 - currentTextBlock = new TextBlock(style); 51 - } 52 - 53 - bool EpubHtmlParser::VisitEnter(const tinyxml2::XMLElement& element, const tinyxml2::XMLAttribute* firstAttribute) { 54 - const char* tag_name = element.Name(); 55 - if (matches(tag_name, IMAGE_TAGS, NUM_IMAGE_TAGS)) { 56 - const char* src = element.Attribute("src"); 57 - if (src) { 58 - // don't leave an empty text block in the list 59 - // const BLOCK_STYLE style = currentTextBlock->get_style(); 60 - if (currentTextBlock->isEmpty()) { 61 - delete currentTextBlock; 62 - currentTextBlock = nullptr; 63 - } 64 - // TODO: Fix this 65 - // blocks.push_back(new ImageBlock(m_base_path + src)); 66 - // start a new text block - with the same style as before 67 - // startNewTextBlock(style); 68 - } else { 69 - // ESP_LOGE(TAG, "Could not find src attribute"); 70 - } 71 - return false; 72 - } 73 - 74 - if (matches(tag_name, SKIP_TAGS, NUM_SKIP_TAGS)) { 75 - return false; 76 - } 77 - 78 - // Serial.printf("Text: %s\n", element.GetText()); 79 - 80 - if (matches(tag_name, HEADER_TAGS, NUM_HEADER_TAGS)) { 81 - insideBoldTag = true; 82 - startNewTextBlock(CENTER_ALIGN); 83 - } else if (matches(tag_name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { 84 - if (strcmp(tag_name, "br") == 0) { 85 - startNewTextBlock(currentTextBlock->get_style()); 86 - } else { 87 - startNewTextBlock(JUSTIFIED); 88 - } 89 - } else if (matches(tag_name, BOLD_TAGS, NUM_BOLD_TAGS)) { 90 - insideBoldTag = true; 91 - } else if (matches(tag_name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { 92 - insideItalicTag = true; 93 - } 94 - return true; 95 - } 96 - /// Visit a text node. 97 - bool EpubHtmlParser::Visit(const tinyxml2::XMLText& text) { 98 - const char* content = text.Value(); 99 - currentTextBlock->addSpan(replaceHtmlEntities(content), insideBoldTag, insideItalicTag); 100 - return true; 101 - } 102 - 103 - bool EpubHtmlParser::VisitExit(const tinyxml2::XMLElement& element) { 104 - const char* tag_name = element.Name(); 105 - if (matches(tag_name, HEADER_TAGS, NUM_HEADER_TAGS)) { 106 - insideBoldTag = false; 107 - } else if (matches(tag_name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { 108 - // nothing to do 109 - } else if (matches(tag_name, BOLD_TAGS, NUM_BOLD_TAGS)) { 110 - insideBoldTag = false; 111 - } else if (matches(tag_name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { 112 - insideItalicTag = false; 113 - } 114 - return true; 115 - } 116 - 117 - bool EpubHtmlParser::parseAndBuildPages() { 118 - startNewTextBlock(JUSTIFIED); 119 - tinyxml2::XMLDocument doc(false, tinyxml2::COLLAPSE_WHITESPACE); 120 - 121 - const tinyxml2::XMLError result = doc.LoadFile(filepath); 122 - if (result != tinyxml2::XML_SUCCESS) { 123 - Serial.printf("Failed to load file, Error: %s\n", tinyxml2::XMLDocument::ErrorIDToName(result)); 124 - return false; 125 - } 126 - 127 - doc.Accept(this); 128 - if (currentTextBlock) { 129 - makePages(); 130 - completePageFn(currentPage); 131 - currentPage = nullptr; 132 - delete currentTextBlock; 133 - currentTextBlock = nullptr; 134 - } 135 - 136 - return true; 137 - } 138 - 139 - void EpubHtmlParser::makePages() { 140 - if (!currentTextBlock) { 141 - Serial.println("!! No text block to make pages for !!"); 142 - return; 143 - } 144 - 145 - if (!currentPage) { 146 - currentPage = new Page(); 147 - } 148 - 149 - const int lineHeight = renderer.getLineHeight(); 150 - const int pageHeight = renderer.getPageHeight(); 151 - 152 - // Long running task, make sure to let other things happen 153 - vTaskDelay(1); 154 - 155 - if (currentTextBlock->getType() == TEXT_BLOCK) { 156 - const auto lines = currentTextBlock->splitIntoLines(renderer); 157 - 158 - for (const auto line : lines) { 159 - if (currentPage->nextY + lineHeight > pageHeight) { 160 - completePageFn(currentPage); 161 - currentPage = new Page(); 162 - } 163 - 164 - currentPage->elements.push_back(new PageLine(line, currentPage->nextY)); 165 - currentPage->nextY += lineHeight; 166 - } 167 - // TODO: Fix spacing between paras 168 - // add some extra line between blocks 169 - currentPage->nextY += lineHeight / 2; 170 - } 171 - // TODO: Image block support 172 - // if (block->getType() == BlockType::IMAGE_BLOCK) { 173 - // ImageBlock *imageBlock = (ImageBlock *)block; 174 - // if (y + imageBlock->height > page_height) { 175 - // pages.push_back(new Page()); 176 - // y = 0; 177 - // } 178 - // pages.back()->elements.push_back(new PageImage(imageBlock, y)); 179 - // y += imageBlock->height; 180 - // } 181 - }
-34
lib/Epub/Epub/EpubHtmlParser.h
··· 1 - #pragma once 2 - #include <tinyxml2.h> 3 - 4 - #include <functional> 5 - 6 - #include "blocks/TextBlock.h" 7 - 8 - class Page; 9 - class EpdRenderer; 10 - 11 - class EpubHtmlParser final : public tinyxml2::XMLVisitor { 12 - const char* filepath; 13 - EpdRenderer& renderer; 14 - std::function<void(Page*)> completePageFn; 15 - 16 - bool insideBoldTag = false; 17 - bool insideItalicTag = false; 18 - TextBlock* currentTextBlock = nullptr; 19 - Page* currentPage = nullptr; 20 - 21 - void startNewTextBlock(BLOCK_STYLE style); 22 - void makePages(); 23 - 24 - // xml parser callbacks 25 - bool VisitEnter(const tinyxml2::XMLElement& element, const tinyxml2::XMLAttribute* firstAttribute) override; 26 - bool Visit(const tinyxml2::XMLText& text) override; 27 - bool VisitExit(const tinyxml2::XMLElement& element) override; 28 - // xml parser callbacks 29 - public: 30 - explicit EpubHtmlParser(const char* filepath, EpdRenderer& renderer, const std::function<void(Page*)>& completePageFn) 31 - : filepath(filepath), renderer(renderer), completePageFn(completePageFn) {} 32 - ~EpubHtmlParser() override = default; 33 - bool parseAndBuildPages(); 34 - };
+291
lib/Epub/Epub/EpubHtmlParserSlim.cpp
··· 1 + #include "EpubHtmlParserSlim.h" 2 + 3 + #include <EpdRenderer.h> 4 + #include <HardwareSerial.h> 5 + 6 + #include "Page.h" 7 + #include "htmlEntities.h" 8 + 9 + const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"}; 10 + constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]); 11 + 12 + const char* BLOCK_TAGS[] = {"p", "li", "div", "br"}; 13 + constexpr int NUM_BLOCK_TAGS = sizeof(BLOCK_TAGS) / sizeof(BLOCK_TAGS[0]); 14 + 15 + const char* BOLD_TAGS[] = {"b"}; 16 + constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]); 17 + 18 + const char* ITALIC_TAGS[] = {"i"}; 19 + constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]); 20 + 21 + const char* IMAGE_TAGS[] = {"img"}; 22 + constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]); 23 + 24 + const char* SKIP_TAGS[] = {"head", "table"}; 25 + constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]); 26 + 27 + bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; } 28 + 29 + // given the start and end of a tag, check to see if it matches a known tag 30 + bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) { 31 + for (int i = 0; i < possible_tag_count; i++) { 32 + if (strcmp(tag_name, possible_tags[i]) == 0) { 33 + return true; 34 + } 35 + } 36 + return false; 37 + } 38 + 39 + // start a new text block if needed 40 + void EpubHtmlParserSlim::startNewTextBlock(const BLOCK_STYLE style) { 41 + if (currentTextBlock) { 42 + // already have a text block running and it is empty - just reuse it 43 + if (currentTextBlock->isEmpty()) { 44 + currentTextBlock->setStyle(style); 45 + return; 46 + } 47 + 48 + currentTextBlock->finish(); 49 + makePages(); 50 + delete currentTextBlock; 51 + } 52 + currentTextBlock = new TextBlock(style); 53 + } 54 + 55 + #include <expat.h> 56 + 57 + void XMLCALL EpubHtmlParserSlim::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { 58 + auto* self = static_cast<EpubHtmlParserSlim*>(userData); 59 + (void)atts; 60 + 61 + // Middle of skip 62 + if (self->skipUntilDepth < self->depth) { 63 + self->depth += 1; 64 + return; 65 + } 66 + 67 + if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) { 68 + // const char* src = element.Attribute("src"); 69 + // if (src) { 70 + // // don't leave an empty text block in the list 71 + // // const BLOCK_STYLE style = currentTextBlock->get_style(); 72 + // if (currentTextBlock->isEmpty()) { 73 + // delete currentTextBlock; 74 + // currentTextBlock = nullptr; 75 + // } 76 + // // TODO: Fix this 77 + // // blocks.push_back(new ImageBlock(m_base_path + src)); 78 + // // start a new text block - with the same style as before 79 + // // startNewTextBlock(style); 80 + // } else { 81 + // // ESP_LOGE(TAG, "Could not find src attribute"); 82 + // } 83 + 84 + // start skip 85 + self->skipUntilDepth = self->depth; 86 + self->depth += 1; 87 + return; 88 + } 89 + 90 + if (matches(name, SKIP_TAGS, NUM_SKIP_TAGS)) { 91 + // start skip 92 + self->skipUntilDepth = self->depth; 93 + self->depth += 1; 94 + return; 95 + } 96 + 97 + if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { 98 + self->startNewTextBlock(CENTER_ALIGN); 99 + self->boldUntilDepth = min(self->boldUntilDepth, self->depth); 100 + } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { 101 + if (strcmp(name, "br") == 0) { 102 + self->startNewTextBlock(self->currentTextBlock->getStyle()); 103 + } else { 104 + self->startNewTextBlock(JUSTIFIED); 105 + } 106 + } else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { 107 + self->boldUntilDepth = min(self->boldUntilDepth, self->depth); 108 + } else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { 109 + self->italicUntilDepth = min(self->italicUntilDepth, self->depth); 110 + } 111 + 112 + self->depth += 1; 113 + } 114 + 115 + void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s, const int len) { 116 + auto* self = static_cast<EpubHtmlParserSlim*>(userData); 117 + 118 + // Middle of skip 119 + if (self->skipUntilDepth < self->depth) { 120 + return; 121 + } 122 + 123 + for (int i = 0; i < len; i++) { 124 + if (isWhitespace(s[i])) { 125 + // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it 126 + if (self->partWordBufferIndex > 0) { 127 + self->partWordBuffer[self->partWordBufferIndex] = '\0'; 128 + self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth, 129 + self->italicUntilDepth < self->depth); 130 + self->partWordBufferIndex = 0; 131 + } 132 + // Skip the whitespace char 133 + continue; 134 + } 135 + 136 + // If we're about to run out of space, then cut the word off and start a new one 137 + if (self->partWordBufferIndex >= PART_WORD_BUFFER_SIZE - 2) { 138 + self->partWordBuffer[self->partWordBufferIndex] = '\0'; 139 + self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth, 140 + self->italicUntilDepth < self->depth); 141 + self->partWordBufferIndex = 0; 142 + } 143 + 144 + self->partWordBuffer[self->partWordBufferIndex++] = s[i]; 145 + } 146 + } 147 + 148 + void XMLCALL EpubHtmlParserSlim::endElement(void* userData, const XML_Char* name) { 149 + auto* self = static_cast<EpubHtmlParserSlim*>(userData); 150 + (void)name; 151 + 152 + if (self->partWordBufferIndex > 0) { 153 + // Only flush out part word buffer if we're closing a block tag or are at the top of the HTML file. 154 + // We don't want to flush out content when closing inline tags like <span>. 155 + // Currently this also flushes out on closing <b> and <i> tags, but they are line tags so that shouldn't happen, 156 + // text styling needs to be overhauled to fix it. 157 + const bool shouldBreakText = matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || 158 + matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || 159 + matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || 160 + matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || 161 + self->depth == 1; 162 + 163 + if (shouldBreakText) { 164 + self->partWordBuffer[self->partWordBufferIndex] = '\0'; 165 + self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth, 166 + self->italicUntilDepth < self->depth); 167 + self->partWordBufferIndex = 0; 168 + } 169 + } 170 + 171 + self->depth -= 1; 172 + 173 + // Leaving skip 174 + if (self->skipUntilDepth == self->depth) { 175 + self->skipUntilDepth = INT_MAX; 176 + } 177 + 178 + // Leaving bold 179 + if (self->boldUntilDepth == self->depth) { 180 + self->boldUntilDepth = INT_MAX; 181 + } 182 + 183 + // Leaving italic 184 + if (self->italicUntilDepth == self->depth) { 185 + self->italicUntilDepth = INT_MAX; 186 + } 187 + } 188 + 189 + bool EpubHtmlParserSlim::parseAndBuildPages() { 190 + startNewTextBlock(JUSTIFIED); 191 + 192 + const XML_Parser parser = XML_ParserCreate(nullptr); 193 + int done; 194 + 195 + if (!parser) { 196 + Serial.println("Couldn't allocate memory for parser"); 197 + return false; 198 + } 199 + 200 + XML_SetUserData(parser, this); 201 + XML_SetElementHandler(parser, startElement, endElement); 202 + XML_SetCharacterDataHandler(parser, characterData); 203 + 204 + FILE* file = fopen(filepath, "r"); 205 + 206 + do { 207 + void* const buf = XML_GetBuffer(parser, 1024); 208 + if (!buf) { 209 + Serial.println("Couldn't allocate memory for buffer"); 210 + XML_ParserFree(parser); 211 + fclose(file); 212 + return false; 213 + } 214 + 215 + const size_t len = fread(buf, 1, 1024, file); 216 + 217 + if (ferror(file)) { 218 + Serial.println("Read error"); 219 + XML_ParserFree(parser); 220 + fclose(file); 221 + return false; 222 + } 223 + 224 + done = feof(file); 225 + 226 + if (XML_ParseBuffer(parser, static_cast<int>(len), done) == XML_STATUS_ERROR) { 227 + Serial.printf("Parse error at line %lu:\n%s\n", XML_GetCurrentLineNumber(parser), 228 + XML_ErrorString(XML_GetErrorCode(parser))); 229 + XML_ParserFree(parser); 230 + fclose(file); 231 + return false; 232 + } 233 + } while (!done); 234 + 235 + XML_ParserFree(parser); 236 + fclose(file); 237 + 238 + // Process last page if there is still text 239 + if (currentTextBlock) { 240 + makePages(); 241 + completePageFn(currentPage); 242 + currentPage = nullptr; 243 + delete currentTextBlock; 244 + currentTextBlock = nullptr; 245 + } 246 + 247 + return true; 248 + } 249 + 250 + void EpubHtmlParserSlim::makePages() { 251 + if (!currentTextBlock) { 252 + Serial.println("!! No text block to make pages for !!"); 253 + return; 254 + } 255 + 256 + if (!currentPage) { 257 + currentPage = new Page(); 258 + } 259 + 260 + const int lineHeight = renderer.getLineHeight(); 261 + const int pageHeight = renderer.getPageHeight(); 262 + 263 + // Long running task, make sure to let other things happen 264 + vTaskDelay(1); 265 + 266 + if (currentTextBlock->getType() == TEXT_BLOCK) { 267 + const auto lines = currentTextBlock->splitIntoLines(renderer); 268 + 269 + for (const auto line : lines) { 270 + if (currentPage->nextY + lineHeight > pageHeight) { 271 + completePageFn(currentPage); 272 + currentPage = new Page(); 273 + } 274 + 275 + currentPage->elements.push_back(new PageLine(line, currentPage->nextY)); 276 + currentPage->nextY += lineHeight; 277 + } 278 + // add some extra line between blocks 279 + currentPage->nextY += lineHeight / 2; 280 + } 281 + // TODO: Image block support 282 + // if (block->getType() == BlockType::IMAGE_BLOCK) { 283 + // ImageBlock *imageBlock = (ImageBlock *)block; 284 + // if (y + imageBlock->height > page_height) { 285 + // pages.push_back(new Page()); 286 + // y = 0; 287 + // } 288 + // pages.back()->elements.push_back(new PageImage(imageBlock, y)); 289 + // y += imageBlock->height; 290 + // } 291 + }
+42
lib/Epub/Epub/EpubHtmlParserSlim.h
··· 1 + #pragma once 2 + 3 + #include <expat.h> 4 + #include <limits.h> 5 + 6 + #include <functional> 7 + 8 + #include "blocks/TextBlock.h" 9 + 10 + class Page; 11 + class EpdRenderer; 12 + 13 + #define PART_WORD_BUFFER_SIZE 200 14 + 15 + class EpubHtmlParserSlim { 16 + const char* filepath; 17 + EpdRenderer& renderer; 18 + std::function<void(Page*)> completePageFn; 19 + int depth = 0; 20 + int skipUntilDepth = INT_MAX; 21 + int boldUntilDepth = INT_MAX; 22 + int italicUntilDepth = INT_MAX; 23 + // If we encounter words longer than this, but this is pretty large 24 + char partWordBuffer[PART_WORD_BUFFER_SIZE] = {}; 25 + int partWordBufferIndex = 0; 26 + TextBlock* currentTextBlock = nullptr; 27 + Page* currentPage = nullptr; 28 + 29 + void startNewTextBlock(BLOCK_STYLE style); 30 + void makePages(); 31 + // XML callbacks 32 + static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts); 33 + static void XMLCALL characterData(void* userData, const XML_Char* s, int len); 34 + static void XMLCALL endElement(void* userData, const XML_Char* name); 35 + 36 + public: 37 + explicit EpubHtmlParserSlim(const char* filepath, EpdRenderer& renderer, 38 + const std::function<void(Page*)>& completePageFn) 39 + : filepath(filepath), renderer(renderer), completePageFn(completePageFn) {} 40 + ~EpubHtmlParserSlim() = default; 41 + bool parseAndBuildPages(); 42 + };
+5 -5
lib/Epub/Epub/Section.cpp
··· 5 5 6 6 #include <fstream> 7 7 8 - #include "EpubHtmlParser.h" 8 + #include "EpubHtmlParserSlim.h" 9 9 #include "Page.h" 10 10 11 11 void Section::onPageComplete(const Page* page) { 12 - Serial.printf("Page %d complete\n", pageCount); 12 + Serial.printf("Page %d complete - free mem: %lu\n", pageCount, ESP.getFreeHeap()); 13 13 14 14 const auto filePath = cachePath + "/page_" + std::to_string(pageCount) + ".bin"; 15 15 ··· 75 75 } 76 76 77 77 const auto sdTmpHtmlPath = "/sd" + tmpHtmlPath; 78 - auto visitor = 79 - EpubHtmlParser(sdTmpHtmlPath.c_str(), renderer, [this](const Page* page) { this->onPageComplete(page); }); 80 78 81 - // TODO: Come back and see if mem used here can be lowered? 79 + auto visitor = 80 + EpubHtmlParserSlim(sdTmpHtmlPath.c_str(), renderer, [this](const Page* page) { this->onPageComplete(page); }); 82 81 const bool success = visitor.parseAndBuildPages(); 82 + 83 83 SD.remove(tmpHtmlPath.c_str()); 84 84 if (!success) { 85 85 Serial.println("Failed to parse and build pages");
+8 -46
lib/Epub/Epub/blocks/TextBlock.cpp
··· 3 3 #include <EpdRenderer.h> 4 4 #include <Serialization.h> 5 5 6 - static bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; } 6 + void TextBlock::addWord(const std::string& word, const bool is_bold, const bool is_italic) { 7 + if (word.length() == 0) return; 7 8 8 - // move past anything that should be considered part of a work 9 - static int skipWord(const std::string& text, int index, const int length) { 10 - while (index < length && !isWhitespace(text[index])) { 11 - index++; 12 - } 13 - return index; 14 - } 15 - 16 - // skip past any white space characters 17 - static int skipWhitespace(const std::string& html, int index, const int length) { 18 - while (index < length && isWhitespace(html[index])) { 19 - index++; 20 - } 21 - return index; 22 - } 23 - 24 - void TextBlock::addSpan(const std::string& span, const bool is_bold, const bool is_italic) { 25 - // adding a span to text block 26 - // make a copy of the text as we'll modify it 27 - const int length = span.length(); 28 - // const auto text = new char[length + 1]; 29 - // strcpy(text, span); 30 - // work out where each word is in the span 31 - int index = 0; 32 - while (index < length) { 33 - // skip past any whitespace to the start of a word 34 - index = skipWhitespace(span, index, length); 35 - const int wordStart = index; 36 - // find the end of the word 37 - index = skipWord(span, index, length); 38 - const int wordLength = index - wordStart; 39 - if (wordLength > 0) { 40 - words.push_back(span.substr(wordStart, wordLength)); 41 - wordStyles.push_back((is_bold ? BOLD_SPAN : 0) | (is_italic ? ITALIC_SPAN : 0)); 42 - } 43 - } 9 + words.push_back(word); 10 + wordStyles.push_back((is_bold ? BOLD_SPAN : 0) | (is_italic ? ITALIC_SPAN : 0)); 44 11 } 45 12 46 13 std::list<TextBlock*> TextBlock::splitIntoLines(const EpdRenderer& renderer) { ··· 189 156 190 157 void TextBlock::render(const EpdRenderer& renderer, const int x, const int y) const { 191 158 for (int i = 0; i < words.size(); i++) { 192 - // get the style 193 - const uint8_t wordStyle = wordStyles[i]; 194 159 // render the word 195 160 EpdFontStyle fontStyle = REGULAR; 196 - if (wordStyles[i] & BOLD_SPAN) { 197 - if (wordStyles[i] & ITALIC_SPAN) { 198 - fontStyle = BOLD_ITALIC; 199 - } else { 200 - fontStyle = BOLD; 201 - } 202 - 161 + if (wordStyles[i] & BOLD_SPAN && wordStyles[i] & ITALIC_SPAN) { 162 + fontStyle = BOLD_ITALIC; 163 + } else if (wordStyles[i] & BOLD_SPAN) { 164 + fontStyle = BOLD; 203 165 } else if (wordStyles[i] & ITALIC_SPAN) { 204 166 fontStyle = ITALIC; 205 167 }
+3 -3
lib/Epub/Epub/blocks/TextBlock.h
··· 30 30 BLOCK_STYLE style; 31 31 32 32 public: 33 - void addSpan(const std::string& span, bool is_bold, bool is_italic); 34 33 explicit TextBlock(const BLOCK_STYLE style) : style(style) {} 35 34 explicit TextBlock(const std::vector<std::string>& words, const std::vector<uint16_t>& word_xpos, 36 35 // the styles of each word 37 36 const std::vector<uint8_t>& word_styles, const BLOCK_STYLE style) 38 37 : words(words), wordXpos(word_xpos), wordStyles(word_styles), style(style) {} 39 38 ~TextBlock() override = default; 40 - void set_style(const BLOCK_STYLE style) { this->style = style; } 41 - BLOCK_STYLE get_style() const { return style; } 39 + void addWord(const std::string& word, bool is_bold, bool is_italic); 40 + void setStyle(const BLOCK_STYLE style) { this->style = style; } 41 + BLOCK_STYLE getStyle() const { return style; } 42 42 bool isEmpty() override { return words.empty(); } 43 43 void layout(EpdRenderer& renderer) override {}; 44 44 // given a renderer works out where to break the words into lines
+17
lib/expat/.gitignore
··· 1 + Makefile 2 + .libs 3 + *.lo 4 + Debug 5 + Debug-w 6 + Release 7 + Release-w 8 + expat.ncb 9 + expat.opt 10 + expat.plg 11 + Debug_static 12 + Debug-w_static 13 + Release_static 14 + Release-w_static 15 + expat_static.plg 16 + expatw.plg 17 + expatw_static.plg
+87
lib/expat/Makefile.am
··· 1 + # 2 + # __ __ _ 3 + # ___\ \/ /_ __ __ _| |_ 4 + # / _ \\ /| '_ \ / _` | __| 5 + # | __// \| |_) | (_| | |_ 6 + # \___/_/\_\ .__/ \__,_|\__| 7 + # |_| XML parser 8 + # 9 + # Copyright (c) 2017-2024 Sebastian Pipping <sebastian@pipping.org> 10 + # Copyright (c) 2017 Tomasz Kłoczko <kloczek@fedoraproject.org> 11 + # Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 12 + # Licensed under the MIT license: 13 + # 14 + # Permission is hereby granted, free of charge, to any person obtaining 15 + # a copy of this software and associated documentation files (the 16 + # "Software"), to deal in the Software without restriction, including 17 + # without limitation the rights to use, copy, modify, merge, publish, 18 + # distribute, sublicense, and/or sell copies of the Software, and to permit 19 + # persons to whom the Software is furnished to do so, subject to the 20 + # following conditions: 21 + # 22 + # The above copyright notice and this permission notice shall be included 23 + # in all copies or substantial portions of the Software. 24 + # 25 + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 + # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 + # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 28 + # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 29 + # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 30 + # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 31 + # USE OR OTHER DEALINGS IN THE SOFTWARE. 32 + 33 + include_HEADERS = \ 34 + ../expat_config.h \ 35 + expat.h \ 36 + expat_external.h 37 + 38 + lib_LTLIBRARIES = libexpat.la 39 + if WITH_TESTS 40 + noinst_LTLIBRARIES = libtestpat.la 41 + endif 42 + 43 + libexpat_la_LDFLAGS = \ 44 + @AM_LDFLAGS@ \ 45 + @LIBM@ \ 46 + -no-undefined \ 47 + -version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@ 48 + 49 + libexpat_la_SOURCES = \ 50 + xmlparse.c \ 51 + xmltok.c \ 52 + xmlrole.c 53 + 54 + if WITH_TESTS 55 + libtestpat_la_CPPFLAGS = -DXML_TESTING 56 + 57 + libtestpat_la_SOURCES = $(libexpat_la_SOURCES) 58 + endif 59 + 60 + doc_DATA = \ 61 + ../AUTHORS \ 62 + ../Changes 63 + 64 + install-data-hook: 65 + cd "$(DESTDIR)$(docdir)" && $(am__mv) Changes changelog 66 + 67 + uninstall-local: 68 + $(RM) "$(DESTDIR)$(docdir)/changelog" 69 + 70 + EXTRA_DIST = \ 71 + ascii.h \ 72 + asciitab.h \ 73 + expat_external.h \ 74 + expat.h \ 75 + iasciitab.h \ 76 + internal.h \ 77 + latin1tab.h \ 78 + libexpat.def.cmake \ 79 + nametab.h \ 80 + siphash.h \ 81 + utf8tab.h \ 82 + winconfig.h \ 83 + xmlrole.h \ 84 + xmltok.h \ 85 + xmltok_impl.c \ 86 + xmltok_impl.h \ 87 + xmltok_ns.c
+123
lib/expat/ascii.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1999-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2007 Karl Waclawek <karl@waclawek.net> 13 + Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org> 14 + Licensed under the MIT license: 15 + 16 + Permission is hereby granted, free of charge, to any person obtaining 17 + a copy of this software and associated documentation files (the 18 + "Software"), to deal in the Software without restriction, including 19 + without limitation the rights to use, copy, modify, merge, publish, 20 + distribute, sublicense, and/or sell copies of the Software, and to permit 21 + persons to whom the Software is furnished to do so, subject to the 22 + following conditions: 23 + 24 + The above copyright notice and this permission notice shall be included 25 + in all copies or substantial portions of the Software. 26 + 27 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 30 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 31 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 32 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 33 + USE OR OTHER DEALINGS IN THE SOFTWARE. 34 + */ 35 + 36 + #define ASCII_A 0x41 37 + #define ASCII_B 0x42 38 + #define ASCII_C 0x43 39 + #define ASCII_D 0x44 40 + #define ASCII_E 0x45 41 + #define ASCII_F 0x46 42 + #define ASCII_G 0x47 43 + #define ASCII_H 0x48 44 + #define ASCII_I 0x49 45 + #define ASCII_J 0x4A 46 + #define ASCII_K 0x4B 47 + #define ASCII_L 0x4C 48 + #define ASCII_M 0x4D 49 + #define ASCII_N 0x4E 50 + #define ASCII_O 0x4F 51 + #define ASCII_P 0x50 52 + #define ASCII_Q 0x51 53 + #define ASCII_R 0x52 54 + #define ASCII_S 0x53 55 + #define ASCII_T 0x54 56 + #define ASCII_U 0x55 57 + #define ASCII_V 0x56 58 + #define ASCII_W 0x57 59 + #define ASCII_X 0x58 60 + #define ASCII_Y 0x59 61 + #define ASCII_Z 0x5A 62 + 63 + #define ASCII_a 0x61 64 + #define ASCII_b 0x62 65 + #define ASCII_c 0x63 66 + #define ASCII_d 0x64 67 + #define ASCII_e 0x65 68 + #define ASCII_f 0x66 69 + #define ASCII_g 0x67 70 + #define ASCII_h 0x68 71 + #define ASCII_i 0x69 72 + #define ASCII_j 0x6A 73 + #define ASCII_k 0x6B 74 + #define ASCII_l 0x6C 75 + #define ASCII_m 0x6D 76 + #define ASCII_n 0x6E 77 + #define ASCII_o 0x6F 78 + #define ASCII_p 0x70 79 + #define ASCII_q 0x71 80 + #define ASCII_r 0x72 81 + #define ASCII_s 0x73 82 + #define ASCII_t 0x74 83 + #define ASCII_u 0x75 84 + #define ASCII_v 0x76 85 + #define ASCII_w 0x77 86 + #define ASCII_x 0x78 87 + #define ASCII_y 0x79 88 + #define ASCII_z 0x7A 89 + 90 + #define ASCII_0 0x30 91 + #define ASCII_1 0x31 92 + #define ASCII_2 0x32 93 + #define ASCII_3 0x33 94 + #define ASCII_4 0x34 95 + #define ASCII_5 0x35 96 + #define ASCII_6 0x36 97 + #define ASCII_7 0x37 98 + #define ASCII_8 0x38 99 + #define ASCII_9 0x39 100 + 101 + #define ASCII_TAB 0x09 102 + #define ASCII_SPACE 0x20 103 + #define ASCII_EXCL 0x21 104 + #define ASCII_QUOT 0x22 105 + #define ASCII_AMP 0x26 106 + #define ASCII_APOS 0x27 107 + #define ASCII_MINUS 0x2D 108 + #define ASCII_PERIOD 0x2E 109 + #define ASCII_COLON 0x3A 110 + #define ASCII_SEMI 0x3B 111 + #define ASCII_LT 0x3C 112 + #define ASCII_EQUALS 0x3D 113 + #define ASCII_GT 0x3E 114 + #define ASCII_LSQB 0x5B 115 + #define ASCII_RSQB 0x5D 116 + #define ASCII_UNDERSCORE 0x5F 117 + #define ASCII_LPAREN 0x28 118 + #define ASCII_RPAREN 0x29 119 + #define ASCII_FF 0x0C 120 + #define ASCII_SLASH 0x2F 121 + #define ASCII_HASH 0x23 122 + #define ASCII_PIPE 0x7C 123 + #define ASCII_COMMA 0x2C
+66
lib/expat/asciitab.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org> 13 + Licensed under the MIT license: 14 + 15 + Permission is hereby granted, free of charge, to any person obtaining 16 + a copy of this software and associated documentation files (the 17 + "Software"), to deal in the Software without restriction, including 18 + without limitation the rights to use, copy, modify, merge, publish, 19 + distribute, sublicense, and/or sell copies of the Software, and to permit 20 + persons to whom the Software is furnished to do so, subject to the 21 + following conditions: 22 + 23 + The above copyright notice and this permission notice shall be included 24 + in all copies or substantial portions of the Software. 25 + 26 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 29 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 30 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 31 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 32 + USE OR OTHER DEALINGS IN THE SOFTWARE. 33 + */ 34 + 35 + /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 36 + /* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 37 + /* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, 38 + /* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML, 39 + /* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 40 + /* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 41 + /* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 42 + /* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 43 + /* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, 44 + /* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, 45 + /* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, 46 + /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, 47 + /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, 48 + /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, 49 + /* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, 50 + /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, 51 + /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, 52 + /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, 53 + /* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 54 + /* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 55 + /* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 56 + /* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 57 + /* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, 58 + /* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, 59 + /* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, 60 + /* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, 61 + /* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 62 + /* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 63 + /* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 64 + /* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 65 + /* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, 66 + /* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER,
+1031
lib/expat/expat.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2000-2005 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> 13 + Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 14 + Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> 15 + Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org> 16 + Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> 17 + Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 18 + Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl> 19 + Copyright (c) 2023 Hanno Böck <hanno@gentoo.org> 20 + Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com> 21 + Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp> 22 + Copyright (c) 2025 Matthew Fernandez <matthew.fernandez@gmail.com> 23 + Licensed under the MIT license: 24 + 25 + Permission is hereby granted, free of charge, to any person obtaining 26 + a copy of this software and associated documentation files (the 27 + "Software"), to deal in the Software without restriction, including 28 + without limitation the rights to use, copy, modify, merge, publish, 29 + distribute, sublicense, and/or sell copies of the Software, and to permit 30 + persons to whom the Software is furnished to do so, subject to the 31 + following conditions: 32 + 33 + The above copyright notice and this permission notice shall be included 34 + in all copies or substantial portions of the Software. 35 + 36 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 37 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 38 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 39 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 40 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 41 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 42 + USE OR OTHER DEALINGS IN THE SOFTWARE. 43 + */ 44 + 45 + #ifndef Expat_INCLUDED 46 + #define Expat_INCLUDED 1 47 + 48 + #include <stdlib.h> 49 + 50 + #include "expat_external.h" 51 + 52 + #ifdef __cplusplus 53 + extern "C" { 54 + #endif 55 + 56 + struct XML_ParserStruct; 57 + typedef struct XML_ParserStruct* XML_Parser; 58 + 59 + typedef unsigned char XML_Bool; 60 + #define XML_TRUE ((XML_Bool)1) 61 + #define XML_FALSE ((XML_Bool)0) 62 + 63 + /* The XML_Status enum gives the possible return values for several 64 + API functions. The preprocessor #defines are included so this 65 + stanza can be added to code that still needs to support older 66 + versions of Expat 1.95.x: 67 + 68 + #ifndef XML_STATUS_OK 69 + #define XML_STATUS_OK 1 70 + #define XML_STATUS_ERROR 0 71 + #endif 72 + 73 + Otherwise, the #define hackery is quite ugly and would have been 74 + dropped. 75 + */ 76 + enum XML_Status { 77 + XML_STATUS_ERROR = 0, 78 + #define XML_STATUS_ERROR XML_STATUS_ERROR 79 + XML_STATUS_OK = 1, 80 + #define XML_STATUS_OK XML_STATUS_OK 81 + XML_STATUS_SUSPENDED = 2 82 + #define XML_STATUS_SUSPENDED XML_STATUS_SUSPENDED 83 + }; 84 + 85 + enum XML_Error { 86 + XML_ERROR_NONE, 87 + XML_ERROR_NO_MEMORY, 88 + XML_ERROR_SYNTAX, 89 + XML_ERROR_NO_ELEMENTS, 90 + XML_ERROR_INVALID_TOKEN, 91 + XML_ERROR_UNCLOSED_TOKEN, 92 + XML_ERROR_PARTIAL_CHAR, 93 + XML_ERROR_TAG_MISMATCH, 94 + XML_ERROR_DUPLICATE_ATTRIBUTE, 95 + XML_ERROR_JUNK_AFTER_DOC_ELEMENT, 96 + XML_ERROR_PARAM_ENTITY_REF, 97 + XML_ERROR_UNDEFINED_ENTITY, 98 + XML_ERROR_RECURSIVE_ENTITY_REF, 99 + XML_ERROR_ASYNC_ENTITY, 100 + XML_ERROR_BAD_CHAR_REF, 101 + XML_ERROR_BINARY_ENTITY_REF, 102 + XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, 103 + XML_ERROR_MISPLACED_XML_PI, 104 + XML_ERROR_UNKNOWN_ENCODING, 105 + XML_ERROR_INCORRECT_ENCODING, 106 + XML_ERROR_UNCLOSED_CDATA_SECTION, 107 + XML_ERROR_EXTERNAL_ENTITY_HANDLING, 108 + XML_ERROR_NOT_STANDALONE, 109 + XML_ERROR_UNEXPECTED_STATE, 110 + XML_ERROR_ENTITY_DECLARED_IN_PE, 111 + XML_ERROR_FEATURE_REQUIRES_XML_DTD, 112 + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING, 113 + /* Added in 1.95.7. */ 114 + XML_ERROR_UNBOUND_PREFIX, 115 + /* Added in 1.95.8. */ 116 + XML_ERROR_UNDECLARING_PREFIX, 117 + XML_ERROR_INCOMPLETE_PE, 118 + XML_ERROR_XML_DECL, 119 + XML_ERROR_TEXT_DECL, 120 + XML_ERROR_PUBLICID, 121 + XML_ERROR_SUSPENDED, 122 + XML_ERROR_NOT_SUSPENDED, 123 + XML_ERROR_ABORTED, 124 + XML_ERROR_FINISHED, 125 + XML_ERROR_SUSPEND_PE, 126 + /* Added in 2.0. */ 127 + XML_ERROR_RESERVED_PREFIX_XML, 128 + XML_ERROR_RESERVED_PREFIX_XMLNS, 129 + XML_ERROR_RESERVED_NAMESPACE_URI, 130 + /* Added in 2.2.1. */ 131 + XML_ERROR_INVALID_ARGUMENT, 132 + /* Added in 2.3.0. */ 133 + XML_ERROR_NO_BUFFER, 134 + /* Added in 2.4.0. */ 135 + XML_ERROR_AMPLIFICATION_LIMIT_BREACH, 136 + /* Added in 2.6.4. */ 137 + XML_ERROR_NOT_STARTED, 138 + }; 139 + 140 + enum XML_Content_Type { 141 + XML_CTYPE_EMPTY = 1, 142 + XML_CTYPE_ANY, 143 + XML_CTYPE_MIXED, 144 + XML_CTYPE_NAME, 145 + XML_CTYPE_CHOICE, 146 + XML_CTYPE_SEQ 147 + }; 148 + 149 + enum XML_Content_Quant { XML_CQUANT_NONE, XML_CQUANT_OPT, XML_CQUANT_REP, XML_CQUANT_PLUS }; 150 + 151 + /* If type == XML_CTYPE_EMPTY or XML_CTYPE_ANY, then quant will be 152 + XML_CQUANT_NONE, and the other fields will be zero or NULL. 153 + If type == XML_CTYPE_MIXED, then quant will be NONE or REP and 154 + numchildren will contain number of elements that may be mixed in 155 + and children point to an array of XML_Content cells that will be 156 + all of XML_CTYPE_NAME type with no quantification. 157 + 158 + If type == XML_CTYPE_NAME, then the name points to the name, and 159 + the numchildren field will be zero and children will be NULL. The 160 + quant fields indicates any quantifiers placed on the name. 161 + 162 + CHOICE and SEQ will have name NULL, the number of children in 163 + numchildren and children will point, recursively, to an array 164 + of XML_Content cells. 165 + 166 + The EMPTY, ANY, and MIXED types will only occur at top level. 167 + */ 168 + 169 + typedef struct XML_cp XML_Content; 170 + 171 + struct XML_cp { 172 + enum XML_Content_Type type; 173 + enum XML_Content_Quant quant; 174 + XML_Char* name; 175 + unsigned int numchildren; 176 + XML_Content* children; 177 + }; 178 + 179 + /* This is called for an element declaration. See above for 180 + description of the model argument. It's the user code's responsibility 181 + to free model when finished with it. See XML_FreeContentModel. 182 + There is no need to free the model from the handler, it can be kept 183 + around and freed at a later stage. 184 + */ 185 + typedef void(XMLCALL* XML_ElementDeclHandler)(void* userData, const XML_Char* name, XML_Content* model); 186 + 187 + XMLPARSEAPI(void) 188 + XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl); 189 + 190 + /* The Attlist declaration handler is called for *each* attribute. So 191 + a single Attlist declaration with multiple attributes declared will 192 + generate multiple calls to this handler. The "default" parameter 193 + may be NULL in the case of the "#IMPLIED" or "#REQUIRED" 194 + keyword. The "isrequired" parameter will be true and the default 195 + value will be NULL in the case of "#REQUIRED". If "isrequired" is 196 + true and default is non-NULL, then this is a "#FIXED" default. 197 + */ 198 + typedef void(XMLCALL* XML_AttlistDeclHandler)(void* userData, const XML_Char* elname, const XML_Char* attname, 199 + const XML_Char* att_type, const XML_Char* dflt, int isrequired); 200 + 201 + XMLPARSEAPI(void) 202 + XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl); 203 + 204 + /* The XML declaration handler is called for *both* XML declarations 205 + and text declarations. The way to distinguish is that the version 206 + parameter will be NULL for text declarations. The encoding 207 + parameter may be NULL for XML declarations. The standalone 208 + parameter will be -1, 0, or 1 indicating respectively that there 209 + was no standalone parameter in the declaration, that it was given 210 + as no, or that it was given as yes. 211 + */ 212 + typedef void(XMLCALL* XML_XmlDeclHandler)(void* userData, const XML_Char* version, const XML_Char* encoding, 213 + int standalone); 214 + 215 + XMLPARSEAPI(void) 216 + XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler xmldecl); 217 + 218 + typedef struct { 219 + void* (*malloc_fcn)(size_t size); 220 + void* (*realloc_fcn)(void* ptr, size_t size); 221 + void (*free_fcn)(void* ptr); 222 + } XML_Memory_Handling_Suite; 223 + 224 + /* Constructs a new parser; encoding is the encoding specified by the 225 + external protocol or NULL if there is none specified. 226 + */ 227 + XMLPARSEAPI(XML_Parser) 228 + XML_ParserCreate(const XML_Char* encoding); 229 + 230 + /* Constructs a new parser and namespace processor. Element type 231 + names and attribute names that belong to a namespace will be 232 + expanded; unprefixed attribute names are never expanded; unprefixed 233 + element type names are expanded only if there is a default 234 + namespace. The expanded name is the concatenation of the namespace 235 + URI, the namespace separator character, and the local part of the 236 + name. If the namespace separator is '\0' then the namespace URI 237 + and the local part will be concatenated without any separator. 238 + It is a programming error to use the separator '\0' with namespace 239 + triplets (see XML_SetReturnNSTriplet). 240 + If a namespace separator is chosen that can be part of a URI or 241 + part of an XML name, splitting an expanded name back into its 242 + 1, 2 or 3 original parts on application level in the element handler 243 + may end up vulnerable, so these are advised against; sane choices for 244 + a namespace separator are e.g. '\n' (line feed) and '|' (pipe). 245 + 246 + Note that Expat does not validate namespace URIs (beyond encoding) 247 + against RFC 3986 today (and is not required to do so with regard to 248 + the XML 1.0 namespaces specification) but it may start doing that 249 + in future releases. Before that, an application using Expat must 250 + be ready to receive namespace URIs containing non-URI characters. 251 + */ 252 + XMLPARSEAPI(XML_Parser) 253 + XML_ParserCreateNS(const XML_Char* encoding, XML_Char namespaceSeparator); 254 + 255 + /* Constructs a new parser using the memory management suite referred to 256 + by memsuite. If memsuite is NULL, then use the standard library memory 257 + suite. If namespaceSeparator is non-NULL it creates a parser with 258 + namespace processing as described above. The character pointed at 259 + will serve as the namespace separator. 260 + 261 + All further memory operations used for the created parser will come from 262 + the given suite. 263 + */ 264 + XMLPARSEAPI(XML_Parser) 265 + XML_ParserCreate_MM(const XML_Char* encoding, const XML_Memory_Handling_Suite* memsuite, 266 + const XML_Char* namespaceSeparator); 267 + 268 + /* Prepare a parser object to be reused. This is particularly 269 + valuable when memory allocation overhead is disproportionately high, 270 + such as when a large number of small documents need to be parsed. 271 + All handlers are cleared from the parser, except for the 272 + unknownEncodingHandler. The parser's external state is re-initialized 273 + except for the values of ns and ns_triplets. 274 + 275 + Added in Expat 1.95.3. 276 + */ 277 + XMLPARSEAPI(XML_Bool) 278 + XML_ParserReset(XML_Parser parser, const XML_Char* encoding); 279 + 280 + /* atts is array of name/value pairs, terminated by 0; 281 + names and values are 0 terminated. 282 + */ 283 + typedef void(XMLCALL* XML_StartElementHandler)(void* userData, const XML_Char* name, const XML_Char** atts); 284 + 285 + typedef void(XMLCALL* XML_EndElementHandler)(void* userData, const XML_Char* name); 286 + 287 + /* s is not 0 terminated. */ 288 + typedef void(XMLCALL* XML_CharacterDataHandler)(void* userData, const XML_Char* s, int len); 289 + 290 + /* target and data are 0 terminated */ 291 + typedef void(XMLCALL* XML_ProcessingInstructionHandler)(void* userData, const XML_Char* target, const XML_Char* data); 292 + 293 + /* data is 0 terminated */ 294 + typedef void(XMLCALL* XML_CommentHandler)(void* userData, const XML_Char* data); 295 + 296 + typedef void(XMLCALL* XML_StartCdataSectionHandler)(void* userData); 297 + typedef void(XMLCALL* XML_EndCdataSectionHandler)(void* userData); 298 + 299 + /* This is called for any characters in the XML document for which 300 + there is no applicable handler. This includes both characters that 301 + are part of markup which is of a kind that is not reported 302 + (comments, markup declarations), or characters that are part of a 303 + construct which could be reported but for which no handler has been 304 + supplied. The characters are passed exactly as they were in the XML 305 + document except that they will be encoded in UTF-8 or UTF-16. 306 + Line boundaries are not normalized. Note that a byte order mark 307 + character is not passed to the default handler. There are no 308 + guarantees about how characters are divided between calls to the 309 + default handler: for example, a comment might be split between 310 + multiple calls. 311 + */ 312 + typedef void(XMLCALL* XML_DefaultHandler)(void* userData, const XML_Char* s, int len); 313 + 314 + /* This is called for the start of the DOCTYPE declaration, before 315 + any DTD or internal subset is parsed. 316 + */ 317 + typedef void(XMLCALL* XML_StartDoctypeDeclHandler)(void* userData, const XML_Char* doctypeName, const XML_Char* sysid, 318 + const XML_Char* pubid, int has_internal_subset); 319 + 320 + /* This is called for the end of the DOCTYPE declaration when the 321 + closing > is encountered, but after processing any external 322 + subset. 323 + */ 324 + typedef void(XMLCALL* XML_EndDoctypeDeclHandler)(void* userData); 325 + 326 + /* This is called for entity declarations. The is_parameter_entity 327 + argument will be non-zero if the entity is a parameter entity, zero 328 + otherwise. 329 + 330 + For internal entities (<!ENTITY foo "bar">), value will 331 + be non-NULL and systemId, publicID, and notationName will be NULL. 332 + The value string is NOT null-terminated; the length is provided in 333 + the value_length argument. Since it is legal to have zero-length 334 + values, do not use this argument to test for internal entities. 335 + 336 + For external entities, value will be NULL and systemId will be 337 + non-NULL. The publicId argument will be NULL unless a public 338 + identifier was provided. The notationName argument will have a 339 + non-NULL value only for unparsed entity declarations. 340 + 341 + Note that is_parameter_entity can't be changed to XML_Bool, since 342 + that would break binary compatibility. 343 + */ 344 + typedef void(XMLCALL* XML_EntityDeclHandler)(void* userData, const XML_Char* entityName, int is_parameter_entity, 345 + const XML_Char* value, int value_length, const XML_Char* base, 346 + const XML_Char* systemId, const XML_Char* publicId, 347 + const XML_Char* notationName); 348 + 349 + XMLPARSEAPI(void) 350 + XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler); 351 + 352 + /* OBSOLETE -- OBSOLETE -- OBSOLETE 353 + This handler has been superseded by the EntityDeclHandler above. 354 + It is provided here for backward compatibility. 355 + 356 + This is called for a declaration of an unparsed (NDATA) entity. 357 + The base argument is whatever was set by XML_SetBase. The 358 + entityName, systemId and notationName arguments will never be 359 + NULL. The other arguments may be. 360 + */ 361 + typedef void(XMLCALL* XML_UnparsedEntityDeclHandler)(void* userData, const XML_Char* entityName, const XML_Char* base, 362 + const XML_Char* systemId, const XML_Char* publicId, 363 + const XML_Char* notationName); 364 + 365 + /* This is called for a declaration of notation. The base argument is 366 + whatever was set by XML_SetBase. The notationName will never be 367 + NULL. The other arguments can be. 368 + */ 369 + typedef void(XMLCALL* XML_NotationDeclHandler)(void* userData, const XML_Char* notationName, const XML_Char* base, 370 + const XML_Char* systemId, const XML_Char* publicId); 371 + 372 + /* When namespace processing is enabled, these are called once for 373 + each namespace declaration. The call to the start and end element 374 + handlers occur between the calls to the start and end namespace 375 + declaration handlers. For an xmlns attribute, prefix will be 376 + NULL. For an xmlns="" attribute, uri will be NULL. 377 + */ 378 + typedef void(XMLCALL* XML_StartNamespaceDeclHandler)(void* userData, const XML_Char* prefix, const XML_Char* uri); 379 + 380 + typedef void(XMLCALL* XML_EndNamespaceDeclHandler)(void* userData, const XML_Char* prefix); 381 + 382 + /* This is called if the document is not standalone, that is, it has an 383 + external subset or a reference to a parameter entity, but does not 384 + have standalone="yes". If this handler returns XML_STATUS_ERROR, 385 + then processing will not continue, and the parser will return a 386 + XML_ERROR_NOT_STANDALONE error. 387 + If parameter entity parsing is enabled, then in addition to the 388 + conditions above this handler will only be called if the referenced 389 + entity was actually read. 390 + */ 391 + typedef int(XMLCALL* XML_NotStandaloneHandler)(void* userData); 392 + 393 + /* This is called for a reference to an external parsed general 394 + entity. The referenced entity is not automatically parsed. The 395 + application can parse it immediately or later using 396 + XML_ExternalEntityParserCreate. 397 + 398 + The parser argument is the parser parsing the entity containing the 399 + reference; it can be passed as the parser argument to 400 + XML_ExternalEntityParserCreate. The systemId argument is the 401 + system identifier as specified in the entity declaration; it will 402 + not be NULL. 403 + 404 + The base argument is the system identifier that should be used as 405 + the base for resolving systemId if systemId was relative; this is 406 + set by XML_SetBase; it may be NULL. 407 + 408 + The publicId argument is the public identifier as specified in the 409 + entity declaration, or NULL if none was specified; the whitespace 410 + in the public identifier will have been normalized as required by 411 + the XML spec. 412 + 413 + The context argument specifies the parsing context in the format 414 + expected by the context argument to XML_ExternalEntityParserCreate; 415 + context is valid only until the handler returns, so if the 416 + referenced entity is to be parsed later, it must be copied. 417 + context is NULL only when the entity is a parameter entity. 418 + 419 + The handler should return XML_STATUS_ERROR if processing should not 420 + continue because of a fatal error in the handling of the external 421 + entity. In this case the calling parser will return an 422 + XML_ERROR_EXTERNAL_ENTITY_HANDLING error. 423 + 424 + Note that unlike other handlers the first argument is the parser, 425 + not userData. 426 + */ 427 + typedef int(XMLCALL* XML_ExternalEntityRefHandler)(XML_Parser parser, const XML_Char* context, const XML_Char* base, 428 + const XML_Char* systemId, const XML_Char* publicId); 429 + 430 + /* This is called in two situations: 431 + 1) An entity reference is encountered for which no declaration 432 + has been read *and* this is not an error. 433 + 2) An internal entity reference is read, but not expanded, because 434 + XML_SetDefaultHandler has been called. 435 + Note: skipped parameter entities in declarations and skipped general 436 + entities in attribute values cannot be reported, because 437 + the event would be out of sync with the reporting of the 438 + declarations or attribute values 439 + */ 440 + typedef void(XMLCALL* XML_SkippedEntityHandler)(void* userData, const XML_Char* entityName, int is_parameter_entity); 441 + 442 + /* This structure is filled in by the XML_UnknownEncodingHandler to 443 + provide information to the parser about encodings that are unknown 444 + to the parser. 445 + 446 + The map[b] member gives information about byte sequences whose 447 + first byte is b. 448 + 449 + If map[b] is c where c is >= 0, then b by itself encodes the 450 + Unicode scalar value c. 451 + 452 + If map[b] is -1, then the byte sequence is malformed. 453 + 454 + If map[b] is -n, where n >= 2, then b is the first byte of an 455 + n-byte sequence that encodes a single Unicode scalar value. 456 + 457 + The data member will be passed as the first argument to the convert 458 + function. 459 + 460 + The convert function is used to convert multibyte sequences; s will 461 + point to a n-byte sequence where map[(unsigned char)*s] == -n. The 462 + convert function must return the Unicode scalar value represented 463 + by this byte sequence or -1 if the byte sequence is malformed. 464 + 465 + The convert function may be NULL if the encoding is a single-byte 466 + encoding, that is if map[b] >= -1 for all bytes b. 467 + 468 + When the parser is finished with the encoding, then if release is 469 + not NULL, it will call release passing it the data member; once 470 + release has been called, the convert function will not be called 471 + again. 472 + 473 + Expat places certain restrictions on the encodings that are supported 474 + using this mechanism. 475 + 476 + 1. Every ASCII character that can appear in a well-formed XML document, 477 + other than the characters 478 + 479 + $@\^`{}~ 480 + 481 + must be represented by a single byte, and that byte must be the 482 + same byte that represents that character in ASCII. 483 + 484 + 2. No character may require more than 4 bytes to encode. 485 + 486 + 3. All characters encoded must have Unicode scalar values <= 487 + 0xFFFF, (i.e., characters that would be encoded by surrogates in 488 + UTF-16 are not allowed). Note that this restriction doesn't 489 + apply to the built-in support for UTF-8 and UTF-16. 490 + 491 + 4. No Unicode character may be encoded by more than one distinct 492 + sequence of bytes. 493 + */ 494 + typedef struct { 495 + int map[256]; 496 + void* data; 497 + int(XMLCALL* convert)(void* data, const char* s); 498 + void(XMLCALL* release)(void* data); 499 + } XML_Encoding; 500 + 501 + /* This is called for an encoding that is unknown to the parser. 502 + 503 + The encodingHandlerData argument is that which was passed as the 504 + second argument to XML_SetUnknownEncodingHandler. 505 + 506 + The name argument gives the name of the encoding as specified in 507 + the encoding declaration. 508 + 509 + If the callback can provide information about the encoding, it must 510 + fill in the XML_Encoding structure, and return XML_STATUS_OK. 511 + Otherwise it must return XML_STATUS_ERROR. 512 + 513 + If info does not describe a suitable encoding, then the parser will 514 + return an XML_ERROR_UNKNOWN_ENCODING error. 515 + */ 516 + typedef int(XMLCALL* XML_UnknownEncodingHandler)(void* encodingHandlerData, const XML_Char* name, XML_Encoding* info); 517 + 518 + XMLPARSEAPI(void) 519 + XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end); 520 + 521 + XMLPARSEAPI(void) 522 + XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler handler); 523 + 524 + XMLPARSEAPI(void) 525 + XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler handler); 526 + 527 + XMLPARSEAPI(void) 528 + XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler); 529 + 530 + XMLPARSEAPI(void) 531 + XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler); 532 + XMLPARSEAPI(void) 533 + XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler); 534 + 535 + XMLPARSEAPI(void) 536 + XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, XML_EndCdataSectionHandler end); 537 + 538 + XMLPARSEAPI(void) 539 + XML_SetStartCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start); 540 + 541 + XMLPARSEAPI(void) 542 + XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end); 543 + 544 + /* This sets the default handler and also inhibits expansion of 545 + internal entities. These entity references will be passed to the 546 + default handler, or to the skipped entity handler, if one is set. 547 + */ 548 + XMLPARSEAPI(void) 549 + XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler); 550 + 551 + /* This sets the default handler but does not inhibit expansion of 552 + internal entities. The entity reference will not be passed to the 553 + default handler. 554 + */ 555 + XMLPARSEAPI(void) 556 + XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler); 557 + 558 + XMLPARSEAPI(void) 559 + XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end); 560 + 561 + XMLPARSEAPI(void) 562 + XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start); 563 + 564 + XMLPARSEAPI(void) 565 + XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end); 566 + 567 + XMLPARSEAPI(void) 568 + XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler); 569 + 570 + XMLPARSEAPI(void) 571 + XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler); 572 + 573 + XMLPARSEAPI(void) 574 + XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, XML_EndNamespaceDeclHandler end); 575 + 576 + XMLPARSEAPI(void) 577 + XML_SetStartNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start); 578 + 579 + XMLPARSEAPI(void) 580 + XML_SetEndNamespaceDeclHandler(XML_Parser parser, XML_EndNamespaceDeclHandler end); 581 + 582 + XMLPARSEAPI(void) 583 + XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler); 584 + 585 + XMLPARSEAPI(void) 586 + XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler handler); 587 + 588 + /* If a non-NULL value for arg is specified here, then it will be 589 + passed as the first argument to the external entity ref handler 590 + instead of the parser object. 591 + */ 592 + XMLPARSEAPI(void) 593 + XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void* arg); 594 + 595 + XMLPARSEAPI(void) 596 + XML_SetSkippedEntityHandler(XML_Parser parser, XML_SkippedEntityHandler handler); 597 + 598 + XMLPARSEAPI(void) 599 + XML_SetUnknownEncodingHandler(XML_Parser parser, XML_UnknownEncodingHandler handler, void* encodingHandlerData); 600 + 601 + /* This can be called within a handler for a start element, end 602 + element, processing instruction or character data. It causes the 603 + corresponding markup to be passed to the default handler. 604 + */ 605 + XMLPARSEAPI(void) 606 + XML_DefaultCurrent(XML_Parser parser); 607 + 608 + /* If do_nst is non-zero, and namespace processing is in effect, and 609 + a name has a prefix (i.e. an explicit namespace qualifier) then 610 + that name is returned as a triplet in a single string separated by 611 + the separator character specified when the parser was created: URI 612 + + sep + local_name + sep + prefix. 613 + 614 + If do_nst is zero, then namespace information is returned in the 615 + default manner (URI + sep + local_name) whether or not the name 616 + has a prefix. 617 + 618 + Note: Calling XML_SetReturnNSTriplet after XML_Parse or 619 + XML_ParseBuffer has no effect. 620 + */ 621 + 622 + XMLPARSEAPI(void) 623 + XML_SetReturnNSTriplet(XML_Parser parser, int do_nst); 624 + 625 + /* This value is passed as the userData argument to callbacks. */ 626 + XMLPARSEAPI(void) 627 + XML_SetUserData(XML_Parser parser, void* userData); 628 + 629 + /* Returns the last value set by XML_SetUserData or NULL. */ 630 + #define XML_GetUserData(parser) (*(void**)(parser)) 631 + 632 + /* This is equivalent to supplying an encoding argument to 633 + XML_ParserCreate. On success XML_SetEncoding returns non-zero, 634 + zero otherwise. 635 + Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer 636 + has no effect and returns XML_STATUS_ERROR. 637 + */ 638 + XMLPARSEAPI(enum XML_Status) 639 + XML_SetEncoding(XML_Parser parser, const XML_Char* encoding); 640 + 641 + /* If this function is called, then the parser will be passed as the 642 + first argument to callbacks instead of userData. The userData will 643 + still be accessible using XML_GetUserData. 644 + */ 645 + XMLPARSEAPI(void) 646 + XML_UseParserAsHandlerArg(XML_Parser parser); 647 + 648 + /* If useDTD == XML_TRUE is passed to this function, then the parser 649 + will assume that there is an external subset, even if none is 650 + specified in the document. In such a case the parser will call the 651 + externalEntityRefHandler with a value of NULL for the systemId 652 + argument (the publicId and context arguments will be NULL as well). 653 + Note: For the purpose of checking WFC: Entity Declared, passing 654 + useDTD == XML_TRUE will make the parser behave as if the document 655 + had a DTD with an external subset. 656 + Note: If this function is called, then this must be done before 657 + the first call to XML_Parse or XML_ParseBuffer, since it will 658 + have no effect after that. Returns 659 + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING. 660 + Note: If the document does not have a DOCTYPE declaration at all, 661 + then startDoctypeDeclHandler and endDoctypeDeclHandler will not 662 + be called, despite an external subset being parsed. 663 + Note: If XML_DTD is not defined when Expat is compiled, returns 664 + XML_ERROR_FEATURE_REQUIRES_XML_DTD. 665 + Note: If parser == NULL, returns XML_ERROR_INVALID_ARGUMENT. 666 + */ 667 + XMLPARSEAPI(enum XML_Error) 668 + XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD); 669 + 670 + /* Sets the base to be used for resolving relative URIs in system 671 + identifiers in declarations. Resolving relative identifiers is 672 + left to the application: this value will be passed through as the 673 + base argument to the XML_ExternalEntityRefHandler, 674 + XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base 675 + argument will be copied. Returns XML_STATUS_ERROR if out of memory, 676 + XML_STATUS_OK otherwise. 677 + */ 678 + XMLPARSEAPI(enum XML_Status) 679 + XML_SetBase(XML_Parser parser, const XML_Char* base); 680 + 681 + XMLPARSEAPI(const XML_Char*) 682 + XML_GetBase(XML_Parser parser); 683 + 684 + /* Returns the number of the attribute/value pairs passed in last call 685 + to the XML_StartElementHandler that were specified in the start-tag 686 + rather than defaulted. Each attribute/value pair counts as 2; thus 687 + this corresponds to an index into the atts array passed to the 688 + XML_StartElementHandler. Returns -1 if parser == NULL. 689 + */ 690 + XMLPARSEAPI(int) 691 + XML_GetSpecifiedAttributeCount(XML_Parser parser); 692 + 693 + /* Returns the index of the ID attribute passed in the last call to 694 + XML_StartElementHandler, or -1 if there is no ID attribute or 695 + parser == NULL. Each attribute/value pair counts as 2; thus this 696 + corresponds to an index into the atts array passed to the 697 + XML_StartElementHandler. 698 + */ 699 + XMLPARSEAPI(int) 700 + XML_GetIdAttributeIndex(XML_Parser parser); 701 + 702 + #ifdef XML_ATTR_INFO 703 + /* Source file byte offsets for the start and end of attribute names and values. 704 + The value indices are exclusive of surrounding quotes; thus in a UTF-8 source 705 + file an attribute value of "blah" will yield: 706 + info->valueEnd - info->valueStart = 4 bytes. 707 + */ 708 + typedef struct { 709 + XML_Index nameStart; /* Offset to beginning of the attribute name. */ 710 + XML_Index nameEnd; /* Offset after the attribute name's last byte. */ 711 + XML_Index valueStart; /* Offset to beginning of the attribute value. */ 712 + XML_Index valueEnd; /* Offset after the attribute value's last byte. */ 713 + } XML_AttrInfo; 714 + 715 + /* Returns an array of XML_AttrInfo structures for the attribute/value pairs 716 + passed in last call to the XML_StartElementHandler that were specified 717 + in the start-tag rather than defaulted. Each attribute/value pair counts 718 + as 1; thus the number of entries in the array is 719 + XML_GetSpecifiedAttributeCount(parser) / 2. 720 + */ 721 + XMLPARSEAPI(const XML_AttrInfo*) 722 + XML_GetAttributeInfo(XML_Parser parser); 723 + #endif 724 + 725 + /* Parses some input. Returns XML_STATUS_ERROR if a fatal error is 726 + detected. The last call to XML_Parse must have isFinal true; len 727 + may be zero for this call (or any other). 728 + 729 + Though the return values for these functions has always been 730 + described as a Boolean value, the implementation, at least for the 731 + 1.95.x series, has always returned exactly one of the XML_Status 732 + values. 733 + */ 734 + XMLPARSEAPI(enum XML_Status) 735 + XML_Parse(XML_Parser parser, const char* s, int len, int isFinal); 736 + 737 + XMLPARSEAPI(void*) 738 + XML_GetBuffer(XML_Parser parser, int len); 739 + 740 + XMLPARSEAPI(enum XML_Status) 741 + XML_ParseBuffer(XML_Parser parser, int len, int isFinal); 742 + 743 + /* Stops parsing, causing XML_Parse() or XML_ParseBuffer() to return. 744 + Must be called from within a call-back handler, except when aborting 745 + (resumable = 0) an already suspended parser. Some call-backs may 746 + still follow because they would otherwise get lost. Examples: 747 + - endElementHandler() for empty elements when stopped in 748 + startElementHandler(), 749 + - endNameSpaceDeclHandler() when stopped in endElementHandler(), 750 + and possibly others. 751 + 752 + Can be called from most handlers, including DTD related call-backs, 753 + except when parsing an external parameter entity and resumable != 0. 754 + Returns XML_STATUS_OK when successful, XML_STATUS_ERROR otherwise. 755 + Possible error codes: 756 + - XML_ERROR_SUSPENDED: when suspending an already suspended parser. 757 + - XML_ERROR_FINISHED: when the parser has already finished. 758 + - XML_ERROR_SUSPEND_PE: when suspending while parsing an external PE. 759 + 760 + When resumable != 0 (true) then parsing is suspended, that is, 761 + XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED. 762 + Otherwise, parsing is aborted, that is, XML_Parse() and XML_ParseBuffer() 763 + return XML_STATUS_ERROR with error code XML_ERROR_ABORTED. 764 + 765 + *Note*: 766 + This will be applied to the current parser instance only, that is, if 767 + there is a parent parser then it will continue parsing when the 768 + externalEntityRefHandler() returns. It is up to the implementation of 769 + the externalEntityRefHandler() to call XML_StopParser() on the parent 770 + parser (recursively), if one wants to stop parsing altogether. 771 + 772 + When suspended, parsing can be resumed by calling XML_ResumeParser(). 773 + */ 774 + XMLPARSEAPI(enum XML_Status) 775 + XML_StopParser(XML_Parser parser, XML_Bool resumable); 776 + 777 + /* Resumes parsing after it has been suspended with XML_StopParser(). 778 + Must not be called from within a handler call-back. Returns same 779 + status codes as XML_Parse() or XML_ParseBuffer(). 780 + Additional error code XML_ERROR_NOT_SUSPENDED possible. 781 + 782 + *Note*: 783 + This must be called on the most deeply nested child parser instance 784 + first, and on its parent parser only after the child parser has finished, 785 + to be applied recursively until the document entity's parser is restarted. 786 + That is, the parent parser will not resume by itself and it is up to the 787 + application to call XML_ResumeParser() on it at the appropriate moment. 788 + */ 789 + XMLPARSEAPI(enum XML_Status) 790 + XML_ResumeParser(XML_Parser parser); 791 + 792 + enum XML_Parsing { XML_INITIALIZED, XML_PARSING, XML_FINISHED, XML_SUSPENDED }; 793 + 794 + typedef struct { 795 + enum XML_Parsing parsing; 796 + XML_Bool finalBuffer; 797 + } XML_ParsingStatus; 798 + 799 + /* Returns status of parser with respect to being initialized, parsing, 800 + finished, or suspended and processing the final buffer. 801 + XXX XML_Parse() and XML_ParseBuffer() should return XML_ParsingStatus, 802 + XXX with XML_FINISHED_OK or XML_FINISHED_ERROR replacing XML_FINISHED 803 + */ 804 + XMLPARSEAPI(void) 805 + XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus* status); 806 + 807 + /* Creates an XML_Parser object that can parse an external general 808 + entity; context is a '\0'-terminated string specifying the parse 809 + context; encoding is a '\0'-terminated string giving the name of 810 + the externally specified encoding, or NULL if there is no 811 + externally specified encoding. The context string consists of a 812 + sequence of tokens separated by formfeeds (\f); a token consisting 813 + of a name specifies that the general entity of the name is open; a 814 + token of the form prefix=uri specifies the namespace for a 815 + particular prefix; a token of the form =uri specifies the default 816 + namespace. This can be called at any point after the first call to 817 + an ExternalEntityRefHandler so longer as the parser has not yet 818 + been freed. The new parser is completely independent and may 819 + safely be used in a separate thread. The handlers and userData are 820 + initialized from the parser argument. Returns NULL if out of memory. 821 + Otherwise returns a new XML_Parser object. 822 + */ 823 + XMLPARSEAPI(XML_Parser) 824 + XML_ExternalEntityParserCreate(XML_Parser parser, const XML_Char* context, const XML_Char* encoding); 825 + 826 + enum XML_ParamEntityParsing { 827 + XML_PARAM_ENTITY_PARSING_NEVER, 828 + XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE, 829 + XML_PARAM_ENTITY_PARSING_ALWAYS 830 + }; 831 + 832 + /* Controls parsing of parameter entities (including the external DTD 833 + subset). If parsing of parameter entities is enabled, then 834 + references to external parameter entities (including the external 835 + DTD subset) will be passed to the handler set with 836 + XML_SetExternalEntityRefHandler. The context passed will be 0. 837 + 838 + Unlike external general entities, external parameter entities can 839 + only be parsed synchronously. If the external parameter entity is 840 + to be parsed, it must be parsed during the call to the external 841 + entity ref handler: the complete sequence of 842 + XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and 843 + XML_ParserFree calls must be made during this call. After 844 + XML_ExternalEntityParserCreate has been called to create the parser 845 + for the external parameter entity (context must be 0 for this 846 + call), it is illegal to make any calls on the old parser until 847 + XML_ParserFree has been called on the newly created parser. 848 + If the library has been compiled without support for parameter 849 + entity parsing (ie without XML_DTD being defined), then 850 + XML_SetParamEntityParsing will return 0 if parsing of parameter 851 + entities is requested; otherwise it will return non-zero. 852 + Note: If XML_SetParamEntityParsing is called after XML_Parse or 853 + XML_ParseBuffer, then it has no effect and will always return 0. 854 + Note: If parser == NULL, the function will do nothing and return 0. 855 + */ 856 + XMLPARSEAPI(int) 857 + XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing parsing); 858 + 859 + /* Sets the hash salt to use for internal hash calculations. 860 + Helps in preventing DoS attacks based on predicting hash 861 + function behavior. This must be called before parsing is started. 862 + Returns 1 if successful, 0 when called after parsing has started. 863 + Note: If parser == NULL, the function will do nothing and return 0. 864 + */ 865 + XMLPARSEAPI(int) 866 + XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt); 867 + 868 + /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then 869 + XML_GetErrorCode returns information about the error. 870 + */ 871 + XMLPARSEAPI(enum XML_Error) 872 + XML_GetErrorCode(XML_Parser parser); 873 + 874 + /* These functions return information about the current parse 875 + location. They may be called from any callback called to report 876 + some parse event; in this case the location is the location of the 877 + first of the sequence of characters that generated the event. When 878 + called from callbacks generated by declarations in the document 879 + prologue, the location identified isn't as neatly defined, but will 880 + be within the relevant markup. When called outside of the callback 881 + functions, the position indicated will be just past the last parse 882 + event (regardless of whether there was an associated callback). 883 + 884 + They may also be called after returning from a call to XML_Parse 885 + or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then 886 + the location is the location of the character at which the error 887 + was detected; otherwise the location is the location of the last 888 + parse event, as described above. 889 + 890 + Note: XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber 891 + return 0 to indicate an error. 892 + Note: XML_GetCurrentByteIndex returns -1 to indicate an error. 893 + */ 894 + XMLPARSEAPI(XML_Size) XML_GetCurrentLineNumber(XML_Parser parser); 895 + XMLPARSEAPI(XML_Size) XML_GetCurrentColumnNumber(XML_Parser parser); 896 + XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser); 897 + 898 + /* Return the number of bytes in the current event. 899 + Returns 0 if the event is in an internal entity. 900 + */ 901 + XMLPARSEAPI(int) 902 + XML_GetCurrentByteCount(XML_Parser parser); 903 + 904 + /* If XML_CONTEXT_BYTES is >=1, returns the input buffer, sets 905 + the integer pointed to by offset to the offset within this buffer 906 + of the current parse position, and sets the integer pointed to by size 907 + to the size of this buffer (the number of input bytes). Otherwise 908 + returns a NULL pointer. Also returns a NULL pointer if a parse isn't 909 + active. 910 + 911 + NOTE: The character pointer returned should not be used outside 912 + the handler that makes the call. 913 + */ 914 + XMLPARSEAPI(const char*) 915 + XML_GetInputContext(XML_Parser parser, int* offset, int* size); 916 + 917 + /* For backwards compatibility with previous versions. */ 918 + #define XML_GetErrorLineNumber XML_GetCurrentLineNumber 919 + #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber 920 + #define XML_GetErrorByteIndex XML_GetCurrentByteIndex 921 + 922 + /* Frees the content model passed to the element declaration handler */ 923 + XMLPARSEAPI(void) 924 + XML_FreeContentModel(XML_Parser parser, XML_Content* model); 925 + 926 + /* Exposing the memory handling functions used in Expat */ 927 + XMLPARSEAPI(void*) 928 + XML_ATTR_MALLOC 929 + XML_ATTR_ALLOC_SIZE(2) 930 + XML_MemMalloc(XML_Parser parser, size_t size); 931 + 932 + XMLPARSEAPI(void*) 933 + XML_ATTR_ALLOC_SIZE(3) 934 + XML_MemRealloc(XML_Parser parser, void* ptr, size_t size); 935 + 936 + XMLPARSEAPI(void) 937 + XML_MemFree(XML_Parser parser, void* ptr); 938 + 939 + /* Frees memory used by the parser. */ 940 + XMLPARSEAPI(void) 941 + XML_ParserFree(XML_Parser parser); 942 + 943 + /* Returns a string describing the error. */ 944 + XMLPARSEAPI(const XML_LChar*) 945 + XML_ErrorString(enum XML_Error code); 946 + 947 + /* Return a string containing the version number of this expat */ 948 + XMLPARSEAPI(const XML_LChar*) 949 + XML_ExpatVersion(void); 950 + 951 + typedef struct { 952 + int major; 953 + int minor; 954 + int micro; 955 + } XML_Expat_Version; 956 + 957 + /* Return an XML_Expat_Version structure containing numeric version 958 + number information for this version of expat. 959 + */ 960 + XMLPARSEAPI(XML_Expat_Version) 961 + XML_ExpatVersionInfo(void); 962 + 963 + /* Added in Expat 1.95.5. */ 964 + enum XML_FeatureEnum { 965 + XML_FEATURE_END = 0, 966 + XML_FEATURE_UNICODE, 967 + XML_FEATURE_UNICODE_WCHAR_T, 968 + XML_FEATURE_DTD, 969 + XML_FEATURE_CONTEXT_BYTES, 970 + XML_FEATURE_MIN_SIZE, 971 + XML_FEATURE_SIZEOF_XML_CHAR, 972 + XML_FEATURE_SIZEOF_XML_LCHAR, 973 + XML_FEATURE_NS, 974 + XML_FEATURE_LARGE_SIZE, 975 + XML_FEATURE_ATTR_INFO, 976 + /* Added in Expat 2.4.0. */ 977 + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, 978 + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, 979 + /* Added in Expat 2.6.0. */ 980 + XML_FEATURE_GE, 981 + /* Added in Expat 2.7.2. */ 982 + XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, 983 + XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, 984 + /* Additional features must be added to the end of this enum. */ 985 + }; 986 + 987 + typedef struct { 988 + enum XML_FeatureEnum feature; 989 + const XML_LChar* name; 990 + long int value; 991 + } XML_Feature; 992 + 993 + XMLPARSEAPI(const XML_Feature*) 994 + XML_GetFeatureList(void); 995 + 996 + #if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1) 997 + /* Added in Expat 2.4.0 for XML_DTD defined and 998 + * added in Expat 2.6.0 for XML_GE == 1. */ 999 + XMLPARSEAPI(XML_Bool) 1000 + XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser, float maximumAmplificationFactor); 1001 + 1002 + /* Added in Expat 2.4.0 for XML_DTD defined and 1003 + * added in Expat 2.6.0 for XML_GE == 1. */ 1004 + XMLPARSEAPI(XML_Bool) 1005 + XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser, unsigned long long activationThresholdBytes); 1006 + 1007 + /* Added in Expat 2.7.2. */ 1008 + XMLPARSEAPI(XML_Bool) 1009 + XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, float maximumAmplificationFactor); 1010 + 1011 + /* Added in Expat 2.7.2. */ 1012 + XMLPARSEAPI(XML_Bool) 1013 + XML_SetAllocTrackerActivationThreshold(XML_Parser parser, unsigned long long activationThresholdBytes); 1014 + #endif 1015 + 1016 + /* Added in Expat 2.6.0. */ 1017 + XMLPARSEAPI(XML_Bool) 1018 + XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); 1019 + 1020 + /* Expat follows the semantic versioning convention. 1021 + See https://semver.org 1022 + */ 1023 + #define XML_MAJOR_VERSION 2 1024 + #define XML_MINOR_VERSION 7 1025 + #define XML_MICRO_VERSION 3 1026 + 1027 + #ifdef __cplusplus 1028 + } 1029 + #endif 1030 + 1031 + #endif /* not Expat_INCLUDED */
lib/expat/expat_config.h

This is a binary file and will not be displayed.

+163
lib/expat/expat_external.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2000-2004 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> 13 + Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net> 14 + Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org> 15 + Copyright (c) 2016-2019 Sebastian Pipping <sebastian@pipping.org> 16 + Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 17 + Copyright (c) 2018 Yury Gribov <tetra2005@gmail.com> 18 + Licensed under the MIT license: 19 + 20 + Permission is hereby granted, free of charge, to any person obtaining 21 + a copy of this software and associated documentation files (the 22 + "Software"), to deal in the Software without restriction, including 23 + without limitation the rights to use, copy, modify, merge, publish, 24 + distribute, sublicense, and/or sell copies of the Software, and to permit 25 + persons to whom the Software is furnished to do so, subject to the 26 + following conditions: 27 + 28 + The above copyright notice and this permission notice shall be included 29 + in all copies or substantial portions of the Software. 30 + 31 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 32 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 33 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 34 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 35 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 36 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 37 + USE OR OTHER DEALINGS IN THE SOFTWARE. 38 + */ 39 + 40 + #ifndef Expat_External_INCLUDED 41 + #define Expat_External_INCLUDED 1 42 + 43 + /* External API definitions */ 44 + 45 + /* Expat tries very hard to make the API boundary very specifically 46 + defined. There are two macros defined to control this boundary; 47 + each of these can be defined before including this header to 48 + achieve some different behavior, but doing so it not recommended or 49 + tested frequently. 50 + 51 + XMLCALL - The calling convention to use for all calls across the 52 + "library boundary." This will default to cdecl, and 53 + try really hard to tell the compiler that's what we 54 + want. 55 + 56 + XMLIMPORT - Whatever magic is needed to note that a function is 57 + to be imported from a dynamically loaded library 58 + (.dll, .so, or .sl, depending on your platform). 59 + 60 + The XMLCALL macro was added in Expat 1.95.7. The only one which is 61 + expected to be directly useful in client code is XMLCALL. 62 + 63 + Note that on at least some Unix versions, the Expat library must be 64 + compiled with the cdecl calling convention as the default since 65 + system headers may assume the cdecl convention. 66 + */ 67 + #ifndef XMLCALL 68 + #if defined(_MSC_VER) 69 + #define XMLCALL __cdecl 70 + #elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER) 71 + #define XMLCALL __attribute__((cdecl)) 72 + #else 73 + /* For any platform which uses this definition and supports more than 74 + one calling convention, we need to extend this definition to 75 + declare the convention used on that platform, if it's possible to 76 + do so. 77 + 78 + If this is the case for your platform, please file a bug report 79 + with information on how to identify your platform via the C 80 + pre-processor and how to specify the same calling convention as the 81 + platform's malloc() implementation. 82 + */ 83 + #define XMLCALL 84 + #endif 85 + #endif /* not defined XMLCALL */ 86 + 87 + #if !defined(XML_STATIC) && !defined(XMLIMPORT) 88 + #ifndef XML_BUILDING_EXPAT 89 + /* using Expat from an application */ 90 + 91 + #if defined(_MSC_VER) && !defined(__BEOS__) && !defined(__CYGWIN__) 92 + #define XMLIMPORT __declspec(dllimport) 93 + #endif 94 + 95 + #endif 96 + #endif /* not defined XML_STATIC */ 97 + 98 + #ifndef XML_ENABLE_VISIBILITY 99 + #define XML_ENABLE_VISIBILITY 0 100 + #endif 101 + 102 + #if !defined(XMLIMPORT) && XML_ENABLE_VISIBILITY 103 + #define XMLIMPORT __attribute__((visibility("default"))) 104 + #endif 105 + 106 + /* If we didn't define it above, define it away: */ 107 + #ifndef XMLIMPORT 108 + #define XMLIMPORT 109 + #endif 110 + 111 + #if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) 112 + #define XML_ATTR_MALLOC __attribute__((__malloc__)) 113 + #else 114 + #define XML_ATTR_MALLOC 115 + #endif 116 + 117 + #if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) 118 + #define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) 119 + #else 120 + #define XML_ATTR_ALLOC_SIZE(x) 121 + #endif 122 + 123 + #define XMLPARSEAPI(type) XMLIMPORT type XMLCALL 124 + 125 + #ifdef __cplusplus 126 + extern "C" { 127 + #endif 128 + 129 + #ifdef XML_UNICODE_WCHAR_T 130 + #ifndef XML_UNICODE 131 + #define XML_UNICODE 132 + #endif 133 + #if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2) 134 + #error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc" 135 + #endif 136 + #endif 137 + 138 + #ifdef XML_UNICODE /* Information is UTF-16 encoded. */ 139 + #ifdef XML_UNICODE_WCHAR_T 140 + typedef wchar_t XML_Char; 141 + typedef wchar_t XML_LChar; 142 + #else 143 + typedef unsigned short XML_Char; 144 + typedef char XML_LChar; 145 + #endif /* XML_UNICODE_WCHAR_T */ 146 + #else /* Information is UTF-8 encoded. */ 147 + typedef char XML_Char; 148 + typedef char XML_LChar; 149 + #endif /* XML_UNICODE */ 150 + 151 + #ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ 152 + typedef long long XML_Index; 153 + typedef unsigned long long XML_Size; 154 + #else 155 + typedef long XML_Index; 156 + typedef unsigned long XML_Size; 157 + #endif /* XML_LARGE_SIZE */ 158 + 159 + #ifdef __cplusplus 160 + } 161 + #endif 162 + 163 + #endif /* not Expat_External_INCLUDED */
+67
lib/expat/iasciitab.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org> 13 + Licensed under the MIT license: 14 + 15 + Permission is hereby granted, free of charge, to any person obtaining 16 + a copy of this software and associated documentation files (the 17 + "Software"), to deal in the Software without restriction, including 18 + without limitation the rights to use, copy, modify, merge, publish, 19 + distribute, sublicense, and/or sell copies of the Software, and to permit 20 + persons to whom the Software is furnished to do so, subject to the 21 + following conditions: 22 + 23 + The above copyright notice and this permission notice shall be included 24 + in all copies or substantial portions of the Software. 25 + 26 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 29 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 30 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 31 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 32 + USE OR OTHER DEALINGS IN THE SOFTWARE. 33 + */ 34 + 35 + /* Like asciitab.h, except that 0xD has code BT_S rather than BT_CR */ 36 + /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 37 + /* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 38 + /* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, 39 + /* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML, 40 + /* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 41 + /* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 42 + /* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 43 + /* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 44 + /* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, 45 + /* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, 46 + /* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, 47 + /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, 48 + /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, 49 + /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, 50 + /* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, 51 + /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, 52 + /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, 53 + /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, 54 + /* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 55 + /* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 56 + /* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 57 + /* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 58 + /* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, 59 + /* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, 60 + /* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, 61 + /* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, 62 + /* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 63 + /* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 64 + /* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 65 + /* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 66 + /* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, 67 + /* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER,
+187
lib/expat/internal.h
··· 1 + /* internal.h 2 + 3 + Internal definitions used by Expat. This is not needed to compile 4 + client code. 5 + 6 + The following calling convention macros are defined for frequently 7 + called functions: 8 + 9 + FASTCALL - Used for those internal functions that have a simple 10 + body and a low number of arguments and local variables. 11 + 12 + PTRCALL - Used for functions called though function pointers. 13 + 14 + PTRFASTCALL - Like PTRCALL, but for low number of arguments. 15 + 16 + inline - Used for selected internal functions for which inlining 17 + may improve performance on some platforms. 18 + 19 + Note: Use of these macros is based on judgement, not hard rules, 20 + and therefore subject to change. 21 + __ __ _ 22 + ___\ \/ /_ __ __ _| |_ 23 + / _ \\ /| '_ \ / _` | __| 24 + | __// \| |_) | (_| | |_ 25 + \___/_/\_\ .__/ \__,_|\__| 26 + |_| XML parser 27 + 28 + Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 29 + Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net> 30 + Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 31 + Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> 32 + Copyright (c) 2018 Yury Gribov <tetra2005@gmail.com> 33 + Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 34 + Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 35 + Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp> 36 + Licensed under the MIT license: 37 + 38 + Permission is hereby granted, free of charge, to any person obtaining 39 + a copy of this software and associated documentation files (the 40 + "Software"), to deal in the Software without restriction, including 41 + without limitation the rights to use, copy, modify, merge, publish, 42 + distribute, sublicense, and/or sell copies of the Software, and to permit 43 + persons to whom the Software is furnished to do so, subject to the 44 + following conditions: 45 + 46 + The above copyright notice and this permission notice shall be included 47 + in all copies or substantial portions of the Software. 48 + 49 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 50 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 51 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 52 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 53 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 54 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 55 + USE OR OTHER DEALINGS IN THE SOFTWARE. 56 + */ 57 + 58 + #if defined(__GNUC__) && defined(__i386__) && !defined(__MINGW32__) 59 + /* We'll use this version by default only where we know it helps. 60 + 61 + regparm() generates warnings on Solaris boxes. See SF bug #692878. 62 + 63 + Instability reported with egcs on a RedHat Linux 7.3. 64 + Let's comment out: 65 + #define FASTCALL __attribute__((stdcall, regparm(3))) 66 + and let's try this: 67 + */ 68 + #define FASTCALL __attribute__((regparm(3))) 69 + #define PTRFASTCALL __attribute__((regparm(3))) 70 + #endif 71 + 72 + /* Using __fastcall seems to have an unexpected negative effect under 73 + MS VC++, especially for function pointers, so we won't use it for 74 + now on that platform. It may be reconsidered for a future release 75 + if it can be made more effective. 76 + Likely reason: __fastcall on Windows is like stdcall, therefore 77 + the compiler cannot perform stack optimizations for call clusters. 78 + */ 79 + 80 + /* Make sure all of these are defined if they aren't already. */ 81 + 82 + #ifndef FASTCALL 83 + #define FASTCALL 84 + #endif 85 + 86 + #ifndef PTRCALL 87 + #define PTRCALL 88 + #endif 89 + 90 + #ifndef PTRFASTCALL 91 + #define PTRFASTCALL 92 + #endif 93 + 94 + #ifndef XML_MIN_SIZE 95 + #if !defined(__cplusplus) && !defined(inline) 96 + #ifdef __GNUC__ 97 + #define inline __inline 98 + #endif /* __GNUC__ */ 99 + #endif 100 + #endif /* XML_MIN_SIZE */ 101 + 102 + #ifdef __cplusplus 103 + #define inline inline 104 + #else 105 + #ifndef inline 106 + #define inline 107 + #endif 108 + #endif 109 + 110 + #include <limits.h> // ULONG_MAX 111 + #include <stddef.h> // size_t 112 + 113 + #if defined(_WIN32) && (!defined(__USE_MINGW_ANSI_STDIO) || (1 - __USE_MINGW_ANSI_STDIO - 1 == 0)) 114 + #define EXPAT_FMT_ULL(midpart) "%" midpart "I64u" 115 + #if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW 116 + #define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d" 117 + #define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u" 118 + #else 119 + #define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" 120 + #define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" 121 + #endif 122 + #else 123 + #define EXPAT_FMT_ULL(midpart) "%" midpart "llu" 124 + #if !defined(ULONG_MAX) 125 + #error Compiler did not define ULONG_MAX for us 126 + #elif ULONG_MAX == 18446744073709551615u // 2^64-1 127 + #define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" 128 + #define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" 129 + #elif defined(EMSCRIPTEN) // 32bit mode Emscripten 130 + #define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" 131 + #define EXPAT_FMT_SIZE_T(midpart) "%" midpart "zu" 132 + #else 133 + #define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" 134 + #define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" 135 + #endif 136 + #endif 137 + 138 + #ifndef UNUSED_P 139 + #define UNUSED_P(p) (void)p 140 + #endif 141 + 142 + /* NOTE BEGIN If you ever patch these defaults to greater values 143 + for non-attack XML payload in your environment, 144 + please file a bug report with libexpat. Thank you! 145 + */ 146 + #define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT 100.0f 147 + #define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT 8388608 // 8 MiB, 2^23 148 + 149 + #define EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT 100.0f 150 + #define EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT 67108864 // 64 MiB, 2^26 151 + 152 + // NOTE: If function expat_alloc was user facing, EXPAT_MALLOC_ALIGNMENT would 153 + // have to take sizeof(long double) into account 154 + #define EXPAT_MALLOC_ALIGNMENT sizeof(long long) // largest parser (sub)member 155 + #define EXPAT_MALLOC_PADDING ((EXPAT_MALLOC_ALIGNMENT) - sizeof(size_t)) 156 + 157 + /* NOTE END */ 158 + 159 + #include "expat.h" // so we can use type XML_Parser below 160 + 161 + #ifdef __cplusplus 162 + extern "C" { 163 + #endif 164 + 165 + void _INTERNAL_trim_to_complete_utf8_characters(const char* from, const char** fromLimRef); 166 + 167 + #if defined(XML_GE) && XML_GE == 1 168 + unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); 169 + unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); 170 + const char* unsignedCharToPrintable(unsigned char c); 171 + #endif 172 + 173 + extern 174 + #if !defined(XML_TESTING) 175 + const 176 + #endif 177 + XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c 178 + #if defined(XML_TESTING) 179 + void* expat_malloc(XML_Parser parser, size_t size, int sourceLine); 180 + void expat_free(XML_Parser parser, void* ptr, int sourceLine); 181 + void* expat_realloc(XML_Parser parser, void* ptr, size_t size, int sourceLine); 182 + extern unsigned int g_bytesScanned; // used for testing only 183 + #endif 184 + 185 + #ifdef __cplusplus 186 + } 187 + #endif
+66
lib/expat/latin1tab.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org> 13 + Licensed under the MIT license: 14 + 15 + Permission is hereby granted, free of charge, to any person obtaining 16 + a copy of this software and associated documentation files (the 17 + "Software"), to deal in the Software without restriction, including 18 + without limitation the rights to use, copy, modify, merge, publish, 19 + distribute, sublicense, and/or sell copies of the Software, and to permit 20 + persons to whom the Software is furnished to do so, subject to the 21 + following conditions: 22 + 23 + The above copyright notice and this permission notice shall be included 24 + in all copies or substantial portions of the Software. 25 + 26 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 29 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 30 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 31 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 32 + USE OR OTHER DEALINGS IN THE SOFTWARE. 33 + */ 34 + 35 + /* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 36 + /* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 37 + /* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 38 + /* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 39 + /* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 40 + /* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 41 + /* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 42 + /* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 43 + /* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 44 + /* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 45 + /* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, 46 + /* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 47 + /* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 48 + /* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME, 49 + /* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, 50 + /* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, 51 + /* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 52 + /* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 53 + /* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 54 + /* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 55 + /* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 56 + /* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, 57 + /* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 58 + /* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 59 + /* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 60 + /* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 61 + /* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 62 + /* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 63 + /* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 64 + /* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, 65 + /* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, 66 + /* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+84
lib/expat/libexpat.def.cmake
··· 1 + ; DEF file for MS VC++ 2 + 3 + EXPORTS 4 + XML_DefaultCurrent @1 5 + XML_ErrorString @2 6 + XML_ExpatVersion @3 7 + XML_ExpatVersionInfo @4 8 + XML_ExternalEntityParserCreate @5 9 + XML_GetBase @6 10 + XML_GetBuffer @7 11 + XML_GetCurrentByteCount @8 12 + XML_GetCurrentByteIndex @9 13 + XML_GetCurrentColumnNumber @10 14 + XML_GetCurrentLineNumber @11 15 + XML_GetErrorCode @12 16 + XML_GetIdAttributeIndex @13 17 + XML_GetInputContext @14 18 + XML_GetSpecifiedAttributeCount @15 19 + XML_Parse @16 20 + XML_ParseBuffer @17 21 + XML_ParserCreate @18 22 + XML_ParserCreateNS @19 23 + XML_ParserCreate_MM @20 24 + XML_ParserFree @21 25 + XML_SetAttlistDeclHandler @22 26 + XML_SetBase @23 27 + XML_SetCdataSectionHandler @24 28 + XML_SetCharacterDataHandler @25 29 + XML_SetCommentHandler @26 30 + XML_SetDefaultHandler @27 31 + XML_SetDefaultHandlerExpand @28 32 + XML_SetDoctypeDeclHandler @29 33 + XML_SetElementDeclHandler @30 34 + XML_SetElementHandler @31 35 + XML_SetEncoding @32 36 + XML_SetEndCdataSectionHandler @33 37 + XML_SetEndDoctypeDeclHandler @34 38 + XML_SetEndElementHandler @35 39 + XML_SetEndNamespaceDeclHandler @36 40 + XML_SetEntityDeclHandler @37 41 + XML_SetExternalEntityRefHandler @38 42 + XML_SetExternalEntityRefHandlerArg @39 43 + XML_SetNamespaceDeclHandler @40 44 + XML_SetNotStandaloneHandler @41 45 + XML_SetNotationDeclHandler @42 46 + XML_SetParamEntityParsing @43 47 + XML_SetProcessingInstructionHandler @44 48 + XML_SetReturnNSTriplet @45 49 + XML_SetStartCdataSectionHandler @46 50 + XML_SetStartDoctypeDeclHandler @47 51 + XML_SetStartElementHandler @48 52 + XML_SetStartNamespaceDeclHandler @49 53 + XML_SetUnknownEncodingHandler @50 54 + XML_SetUnparsedEntityDeclHandler @51 55 + XML_SetUserData @52 56 + XML_SetXmlDeclHandler @53 57 + XML_UseParserAsHandlerArg @54 58 + ; added with version 1.95.3 59 + XML_ParserReset @55 60 + XML_SetSkippedEntityHandler @56 61 + ; added with version 1.95.5 62 + XML_GetFeatureList @57 63 + XML_UseForeignDTD @58 64 + ; added with version 1.95.6 65 + XML_FreeContentModel @59 66 + XML_MemMalloc @60 67 + XML_MemRealloc @61 68 + XML_MemFree @62 69 + ; added with version 1.95.8 70 + XML_StopParser @63 71 + XML_ResumeParser @64 72 + XML_GetParsingStatus @65 73 + ; added with version 2.1.1 74 + @_EXPAT_COMMENT_ATTR_INFO@ XML_GetAttributeInfo @66 75 + XML_SetHashSalt @67 76 + ; internal @68 removed with version 2.3.1 77 + ; added with version 2.4.0 78 + @_EXPAT_COMMENT_DTD_OR_GE@ XML_SetBillionLaughsAttackProtectionActivationThreshold @69 79 + @_EXPAT_COMMENT_DTD_OR_GE@ XML_SetBillionLaughsAttackProtectionMaximumAmplification @70 80 + ; added with version 2.6.0 81 + XML_SetReparseDeferralEnabled @71 82 + ; added with version 2.7.2 83 + @_EXPAT_COMMENT_DTD_OR_GE@ XML_SetAllocTrackerMaximumAmplification @72 84 + @_EXPAT_COMMENT_DTD_OR_GE@ XML_SetAllocTrackerActivationThreshold @73
+11
lib/expat/library.json
··· 1 + { 2 + "name": "expat", 3 + "version": "2.7.3", 4 + "build": { 5 + "srcFilter": [ 6 + "+<xmlparse.c>", 7 + "+<xmlrole.c>", 8 + "+<xmltok.c>" 9 + ] 10 + } 11 + }
+102
lib/expat/nametab.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 10 + Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org> 11 + Licensed under the MIT license: 12 + 13 + Permission is hereby granted, free of charge, to any person obtaining 14 + a copy of this software and associated documentation files (the 15 + "Software"), to deal in the Software without restriction, including 16 + without limitation the rights to use, copy, modify, merge, publish, 17 + distribute, sublicense, and/or sell copies of the Software, and to permit 18 + persons to whom the Software is furnished to do so, subject to the 19 + following conditions: 20 + 21 + The above copyright notice and this permission notice shall be included 22 + in all copies or substantial portions of the Software. 23 + 24 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 27 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 28 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 29 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 30 + USE OR OTHER DEALINGS IN THE SOFTWARE. 31 + */ 32 + 33 + static const unsigned namingBitmap[] = { 34 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, 35 + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x04000000, 36 + 0x87FFFFFE, 0x07FFFFFE, 0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF, 0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 37 + 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, 38 + 0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFD740, 39 + 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, 0xFFFF0003, 0xFFFFFFFF, 40 + 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, 0x0000007F, 0x00000000, 0xFFFF0000, 41 + 0x000707FF, 0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000, 0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060, 42 + 0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003, 0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003, 0xFFF987E0, 43 + 0x036DFDFF, 0x5E000000, 0x001C0000, 0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001, 0xFFF99FE0, 0x23CDFDFF, 44 + 0xB0000000, 0x00000003, 0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000, 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 45 + 0x00000003, 0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003, 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003, 46 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000, 0xFEF02596, 47 + 0x200D6CAE, 0x0000001F, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF, 0x00000000, 0x00000000, 48 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 49 + 0x007FFFFF, 0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB, 0x40000000, 0xF580C900, 0x00000007, 0x02010800, 50 + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF, 0x3F3FFFFF, 51 + 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, 0x00000000, 0x00004C40, 52 + 0x00000000, 0x00000000, 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x000003FE, 0xFFFFFFFE, 53 + 0xFFFFFFFF, 0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF, 0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000, 54 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 55 + 0x0000003F, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000F, 56 + 0x00000000, 0x00000000, 0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE, 0x00000000, 0x00800000, 0xFF7FFFFF, 57 + 0xFF7FFFFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000, 58 + 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003, 0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, 0xFFFFDFFE, 59 + 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, 0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, 60 + 0x027FFFFF, 0xFFFFFFFE, 0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF, 0x00000000, 0x07FFFFFE, 0x0007FFFF, 61 + 0xFFFF03FF, 0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF, 0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF, 62 + 0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF, 0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0, 0xFFFBAFEE, 63 + 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1, 0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3, 0xD63DC7EC, 0xC3BFC718, 64 + 0x00803DC7, 0x0000FF80, 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3, 0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 65 + 0x0000FFC3, 0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 66 + 0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000, 0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000, 0x03000000, 67 + 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF, 0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 68 + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1FFF0000, 0x00000002, 0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 69 + 0xFFFFFFFF, 0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF, 70 + }; 71 + static const unsigned char nmstrtPages[] = { 72 + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x00, 73 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 74 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 75 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 76 + 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 77 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 78 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 79 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 80 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 81 + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 82 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 83 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 84 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 85 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 86 + }; 87 + static const unsigned char namePages[] = { 88 + 0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00, 0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x10, 0x11, 0x00, 89 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 90 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 91 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 92 + 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 93 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 94 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 95 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 96 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 97 + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 98 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 99 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 100 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 101 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 102 + };
+379
lib/expat/siphash.h
··· 1 + /* ========================================================================== 2 + * siphash.h - SipHash-2-4 in a single header file 3 + * -------------------------------------------------------------------------- 4 + * Derived by William Ahern from the reference implementation[1] published[2] 5 + * by Jean-Philippe Aumasson and Daniel J. Berstein. 6 + * Minimal changes by Sebastian Pipping and Victor Stinner on top, see below. 7 + * Licensed under the CC0 Public Domain Dedication license. 8 + * 9 + * 1. https://www.131002.net/siphash/siphash24.c 10 + * 2. https://www.131002.net/siphash/ 11 + * -------------------------------------------------------------------------- 12 + * HISTORY: 13 + * 14 + * 2020-10-03 (Sebastian Pipping) 15 + * - Drop support for Visual Studio 9.0/2008 and earlier 16 + * 17 + * 2019-08-03 (Sebastian Pipping) 18 + * - Mark part of sip24_valid as to be excluded from clang-format 19 + * - Re-format code using clang-format 9 20 + * 21 + * 2018-07-08 (Anton Maklakov) 22 + * - Add "fall through" markers for GCC's -Wimplicit-fallthrough 23 + * 24 + * 2017-11-03 (Sebastian Pipping) 25 + * - Hide sip_tobin and sip_binof unless SIPHASH_TOBIN macro is defined 26 + * 27 + * 2017-07-25 (Vadim Zeitlin) 28 + * - Fix use of SIPHASH_MAIN macro 29 + * 30 + * 2017-07-05 (Sebastian Pipping) 31 + * - Use _SIP_ULL macro to not require a C++11 compiler if compiled as C++ 32 + * - Add const qualifiers at two places 33 + * - Ensure <=80 characters line length (assuming tab width 4) 34 + * 35 + * 2017-06-23 (Victor Stinner) 36 + * - Address Win64 compile warnings 37 + * 38 + * 2017-06-18 (Sebastian Pipping) 39 + * - Clarify license note in the header 40 + * - Address C89 issues: 41 + * - Stop using inline keyword (and let compiler decide) 42 + * - Replace _Bool by int 43 + * - Turn macro siphash24 into a function 44 + * - Address invalid conversion (void pointer) by explicit cast 45 + * - Address lack of stdint.h for Visual Studio 2003 to 2008 46 + * - Always expose sip24_valid (for self-tests) 47 + * 48 + * 2012-11-04 - Born. (William Ahern) 49 + * -------------------------------------------------------------------------- 50 + * USAGE: 51 + * 52 + * SipHash-2-4 takes as input two 64-bit words as the key, some number of 53 + * message bytes, and outputs a 64-bit word as the message digest. This 54 + * implementation employs two data structures: a struct sipkey for 55 + * representing the key, and a struct siphash for representing the hash 56 + * state. 57 + * 58 + * For converting a 16-byte unsigned char array to a key, use either the 59 + * macro sip_keyof or the routine sip_tokey. The former instantiates a 60 + * compound literal key, while the latter requires a key object as a 61 + * parameter. 62 + * 63 + * unsigned char secret[16]; 64 + * arc4random_buf(secret, sizeof secret); 65 + * struct sipkey *key = sip_keyof(secret); 66 + * 67 + * For hashing a message, use either the convenience macro siphash24 or the 68 + * routines sip24_init, sip24_update, and sip24_final. 69 + * 70 + * struct siphash state; 71 + * void *msg; 72 + * size_t len; 73 + * uint64_t hash; 74 + * 75 + * sip24_init(&state, key); 76 + * sip24_update(&state, msg, len); 77 + * hash = sip24_final(&state); 78 + * 79 + * or 80 + * 81 + * hash = siphash24(msg, len, key); 82 + * 83 + * To convert the 64-bit hash value to a canonical 8-byte little-endian 84 + * binary representation, use either the macro sip_binof or the routine 85 + * sip_tobin. The former instantiates and returns a compound literal array, 86 + * while the latter requires an array object as a parameter. 87 + * -------------------------------------------------------------------------- 88 + * NOTES: 89 + * 90 + * o Neither sip_keyof, sip_binof, nor siphash24 will work with compilers 91 + * lacking compound literal support. Instead, you must use the lower-level 92 + * interfaces which take as parameters the temporary state objects. 93 + * 94 + * o Uppercase macros may evaluate parameters more than once. Lowercase 95 + * macros should not exhibit any such side effects. 96 + * ========================================================================== 97 + */ 98 + #ifndef SIPHASH_H 99 + #define SIPHASH_H 100 + 101 + #include <stddef.h> /* size_t */ 102 + #include <stdint.h> /* uint64_t uint32_t uint8_t */ 103 + 104 + /* 105 + * Workaround to not require a C++11 compiler for using ULL suffix 106 + * if this code is included and compiled as C++; related GCC warning is: 107 + * warning: use of C++11 long long integer constant [-Wlong-long] 108 + */ 109 + #define SIP_ULL(high, low) ((((uint64_t)high) << 32) | (low)) 110 + 111 + #define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) 112 + 113 + #define SIP_U32TO8_LE(p, v) \ 114 + (p)[0] = (uint8_t)((v) >> 0); \ 115 + (p)[1] = (uint8_t)((v) >> 8); \ 116 + (p)[2] = (uint8_t)((v) >> 16); \ 117 + (p)[3] = (uint8_t)((v) >> 24); 118 + 119 + #define SIP_U64TO8_LE(p, v) \ 120 + SIP_U32TO8_LE((p) + 0, (uint32_t)((v) >> 0)); \ 121 + SIP_U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); 122 + 123 + #define SIP_U8TO64_LE(p) \ 124 + (((uint64_t)((p)[0]) << 0) | ((uint64_t)((p)[1]) << 8) | ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ 125 + ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) 126 + 127 + #define SIPHASH_INITIALIZER {0, 0, 0, 0, {0}, 0, 0} 128 + 129 + struct siphash { 130 + uint64_t v0, v1, v2, v3; 131 + 132 + unsigned char buf[8], *p; 133 + uint64_t c; 134 + }; /* struct siphash */ 135 + 136 + #define SIP_KEYLEN 16 137 + 138 + struct sipkey { 139 + uint64_t k[2]; 140 + }; /* struct sipkey */ 141 + 142 + #define sip_keyof(k) sip_tokey(&(struct sipkey){{0}}, (k)) 143 + 144 + static struct sipkey* sip_tokey(struct sipkey* key, const void* src) { 145 + key->k[0] = SIP_U8TO64_LE((const unsigned char*)src); 146 + key->k[1] = SIP_U8TO64_LE((const unsigned char*)src + 8); 147 + return key; 148 + } /* sip_tokey() */ 149 + 150 + #ifdef SIPHASH_TOBIN 151 + 152 + #define sip_binof(v) sip_tobin((unsigned char[8]){0}, (v)) 153 + 154 + static void* sip_tobin(void* dst, uint64_t u64) { 155 + SIP_U64TO8_LE((unsigned char*)dst, u64); 156 + return dst; 157 + } /* sip_tobin() */ 158 + 159 + #endif /* SIPHASH_TOBIN */ 160 + 161 + static void sip_round(struct siphash* H, const int rounds) { 162 + int i; 163 + 164 + for (i = 0; i < rounds; i++) { 165 + H->v0 += H->v1; 166 + H->v1 = SIP_ROTL(H->v1, 13); 167 + H->v1 ^= H->v0; 168 + H->v0 = SIP_ROTL(H->v0, 32); 169 + 170 + H->v2 += H->v3; 171 + H->v3 = SIP_ROTL(H->v3, 16); 172 + H->v3 ^= H->v2; 173 + 174 + H->v0 += H->v3; 175 + H->v3 = SIP_ROTL(H->v3, 21); 176 + H->v3 ^= H->v0; 177 + 178 + H->v2 += H->v1; 179 + H->v1 = SIP_ROTL(H->v1, 17); 180 + H->v1 ^= H->v2; 181 + H->v2 = SIP_ROTL(H->v2, 32); 182 + } 183 + } /* sip_round() */ 184 + 185 + static struct siphash* sip24_init(struct siphash* H, const struct sipkey* key) { 186 + H->v0 = SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0]; 187 + H->v1 = SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1]; 188 + H->v2 = SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0]; 189 + H->v3 = SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1]; 190 + 191 + H->p = H->buf; 192 + H->c = 0; 193 + 194 + return H; 195 + } /* sip24_init() */ 196 + 197 + #define sip_endof(a) (&(a)[sizeof(a) / sizeof *(a)]) 198 + 199 + static struct siphash* sip24_update(struct siphash* H, const void* src, size_t len) { 200 + const unsigned char *p = (const unsigned char*)src, *pe = p + len; 201 + uint64_t m; 202 + 203 + do { 204 + while (p < pe && H->p < sip_endof(H->buf)) *H->p++ = *p++; 205 + 206 + if (H->p < sip_endof(H->buf)) break; 207 + 208 + m = SIP_U8TO64_LE(H->buf); 209 + H->v3 ^= m; 210 + sip_round(H, 2); 211 + H->v0 ^= m; 212 + 213 + H->p = H->buf; 214 + H->c += 8; 215 + } while (p < pe); 216 + 217 + return H; 218 + } /* sip24_update() */ 219 + 220 + static uint64_t sip24_final(struct siphash* H) { 221 + const char left = (char)(H->p - H->buf); 222 + uint64_t b = (H->c + left) << 56; 223 + 224 + switch (left) { 225 + case 7: 226 + b |= (uint64_t)H->buf[6] << 48; 227 + /* fall through */ 228 + case 6: 229 + b |= (uint64_t)H->buf[5] << 40; 230 + /* fall through */ 231 + case 5: 232 + b |= (uint64_t)H->buf[4] << 32; 233 + /* fall through */ 234 + case 4: 235 + b |= (uint64_t)H->buf[3] << 24; 236 + /* fall through */ 237 + case 3: 238 + b |= (uint64_t)H->buf[2] << 16; 239 + /* fall through */ 240 + case 2: 241 + b |= (uint64_t)H->buf[1] << 8; 242 + /* fall through */ 243 + case 1: 244 + b |= (uint64_t)H->buf[0] << 0; 245 + /* fall through */ 246 + case 0: 247 + break; 248 + } 249 + 250 + H->v3 ^= b; 251 + sip_round(H, 2); 252 + H->v0 ^= b; 253 + H->v2 ^= 0xff; 254 + sip_round(H, 4); 255 + 256 + return H->v0 ^ H->v1 ^ H->v2 ^ H->v3; 257 + } /* sip24_final() */ 258 + 259 + static uint64_t siphash24(const void* src, size_t len, const struct sipkey* key) { 260 + struct siphash state = SIPHASH_INITIALIZER; 261 + return sip24_final(sip24_update(sip24_init(&state, key), src, len)); 262 + } /* siphash24() */ 263 + 264 + /* 265 + * SipHash-2-4 output with 266 + * k = 00 01 02 ... 267 + * and 268 + * in = (empty string) 269 + * in = 00 (1 byte) 270 + * in = 00 01 (2 bytes) 271 + * in = 00 01 02 (3 bytes) 272 + * ... 273 + * in = 00 01 02 ... 3e (63 bytes) 274 + */ 275 + static int sip24_valid(void) { 276 + /* clang-format off */ 277 + static const unsigned char vectors[64][8] = { 278 + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, }, 279 + { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, }, 280 + { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, }, 281 + { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, }, 282 + { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, }, 283 + { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, }, 284 + { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, }, 285 + { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, }, 286 + { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, }, 287 + { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, }, 288 + { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, }, 289 + { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, }, 290 + { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, }, 291 + { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, }, 292 + { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, }, 293 + { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, }, 294 + { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, }, 295 + { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, }, 296 + { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, }, 297 + { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, }, 298 + { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, }, 299 + { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, }, 300 + { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, }, 301 + { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, }, 302 + { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, }, 303 + { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, }, 304 + { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, }, 305 + { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, }, 306 + { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, }, 307 + { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, }, 308 + { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, }, 309 + { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, }, 310 + { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, }, 311 + { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, }, 312 + { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, }, 313 + { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, }, 314 + { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, }, 315 + { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, }, 316 + { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, }, 317 + { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, }, 318 + { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, }, 319 + { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, }, 320 + { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, }, 321 + { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, }, 322 + { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, }, 323 + { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, }, 324 + { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, }, 325 + { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, }, 326 + { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, }, 327 + { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, }, 328 + { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, }, 329 + { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, }, 330 + { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, }, 331 + { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, }, 332 + { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, }, 333 + { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, }, 334 + { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, }, 335 + { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, }, 336 + { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, }, 337 + { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, }, 338 + { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, }, 339 + { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, }, 340 + { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, }, 341 + { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, } 342 + }; 343 + /* clang-format on */ 344 + 345 + unsigned char in[64]; 346 + struct sipkey k; 347 + size_t i; 348 + 349 + sip_tokey(&k, 350 + "\000\001\002\003\004\005\006\007\010\011" 351 + "\012\013\014\015\016\017"); 352 + 353 + for (i = 0; i < sizeof in; ++i) { 354 + in[i] = (unsigned char)i; 355 + 356 + if (siphash24(in, i, &k) != SIP_U8TO64_LE(vectors[i])) return 0; 357 + } 358 + 359 + return 1; 360 + } /* sip24_valid() */ 361 + 362 + #ifdef SIPHASH_MAIN 363 + 364 + #include <stdio.h> 365 + 366 + int main(void) { 367 + const int ok = sip24_valid(); 368 + 369 + if (ok) 370 + puts("OK"); 371 + else 372 + puts("FAIL"); 373 + 374 + return !ok; 375 + } /* main() */ 376 + 377 + #endif /* SIPHASH_MAIN */ 378 + 379 + #endif /* SIPHASH_H */
+66
lib/expat/utf8tab.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org> 13 + Licensed under the MIT license: 14 + 15 + Permission is hereby granted, free of charge, to any person obtaining 16 + a copy of this software and associated documentation files (the 17 + "Software"), to deal in the Software without restriction, including 18 + without limitation the rights to use, copy, modify, merge, publish, 19 + distribute, sublicense, and/or sell copies of the Software, and to permit 20 + persons to whom the Software is furnished to do so, subject to the 21 + following conditions: 22 + 23 + The above copyright notice and this permission notice shall be included 24 + in all copies or substantial portions of the Software. 25 + 26 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 29 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 30 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 31 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 32 + USE OR OTHER DEALINGS IN THE SOFTWARE. 33 + */ 34 + 35 + /* 0x80 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 36 + /* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 37 + /* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 38 + /* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 39 + /* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 40 + /* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 41 + /* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 42 + /* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 43 + /* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 44 + /* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 45 + /* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 46 + /* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 47 + /* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 48 + /* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 49 + /* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 50 + /* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, 51 + /* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, 52 + /* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, 53 + /* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, 54 + /* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, 55 + /* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, 56 + /* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, 57 + /* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, 58 + /* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, 59 + /* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, 60 + /* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, 61 + /* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, 62 + /* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, 63 + /* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4, 64 + /* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML, 65 + /* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, 66 + /* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM,
+48
lib/expat/winconfig.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 10 + Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net> 11 + Copyright (c) 2005 Karl Waclawek <karl@waclawek.net> 12 + Copyright (c) 2017-2023 Sebastian Pipping <sebastian@pipping.org> 13 + Copyright (c) 2023 Orgad Shaneh <orgad.shaneh@audiocodes.com> 14 + Licensed under the MIT license: 15 + 16 + Permission is hereby granted, free of charge, to any person obtaining 17 + a copy of this software and associated documentation files (the 18 + "Software"), to deal in the Software without restriction, including 19 + without limitation the rights to use, copy, modify, merge, publish, 20 + distribute, sublicense, and/or sell copies of the Software, and to permit 21 + persons to whom the Software is furnished to do so, subject to the 22 + following conditions: 23 + 24 + The above copyright notice and this permission notice shall be included 25 + in all copies or substantial portions of the Software. 26 + 27 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 30 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 31 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 32 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 33 + USE OR OTHER DEALINGS IN THE SOFTWARE. 34 + */ 35 + 36 + #ifndef WINCONFIG_H 37 + #define WINCONFIG_H 38 + 39 + #ifndef WIN32_LEAN_AND_MEAN 40 + #define WIN32_LEAN_AND_MEAN 41 + #endif 42 + #include <windows.h> 43 + #undef WIN32_LEAN_AND_MEAN 44 + 45 + #include <memory.h> 46 + #include <string.h> 47 + 48 + #endif /* ndef WINCONFIG_H */
+8210
lib/expat/xmlparse.c
··· 1 + /* 28bcd8b1ba7eb595d82822908257fd9c3589b4243e3c922d0369f35bfcd7b506 (2.7.3+) 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> 13 + Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 14 + Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> 15 + Copyright (c) 2016 Eric Rahm <erahm@mozilla.com> 16 + Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> 17 + Copyright (c) 2016 Gaurav <g.gupta@samsung.com> 18 + Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> 19 + Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr> 20 + Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com> 21 + Copyright (c) 2016 Ed Schouten <ed@nuxi.nl> 22 + Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 23 + Copyright (c) 2017 Václav Slavík <vaclav@slavik.io> 24 + Copyright (c) 2017 Viktor Szakats <commit@vsz.me> 25 + Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com> 26 + Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de> 27 + Copyright (c) 2017 Hans Wennborg <hans@chromium.org> 28 + Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com> 29 + Copyright (c) 2018 Benjamin Peterson <benjamin@python.org> 30 + Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 31 + Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org> 32 + Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 33 + Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org> 34 + Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org> 35 + Copyright (c) 2021 Donghee Na <donghee.na@python.org> 36 + Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net> 37 + Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com> 38 + Copyright (c) 2022 Jann Horn <jannh@google.com> 39 + Copyright (c) 2022 Sean McBride <sean@rogue-research.com> 40 + Copyright (c) 2023 Owain Davies <owaind@bath.edu> 41 + Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 42 + Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com> 43 + Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> 44 + Copyright (c) 2025 Matthew Fernandez <matthew.fernandez@gmail.com> 45 + Licensed under the MIT license: 46 + 47 + Permission is hereby granted, free of charge, to any person obtaining 48 + a copy of this software and associated documentation files (the 49 + "Software"), to deal in the Software without restriction, including 50 + without limitation the rights to use, copy, modify, merge, publish, 51 + distribute, sublicense, and/or sell copies of the Software, and to permit 52 + persons to whom the Software is furnished to do so, subject to the 53 + following conditions: 54 + 55 + The above copyright notice and this permission notice shall be included 56 + in all copies or substantial portions of the Software. 57 + 58 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 59 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 60 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 61 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 62 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 63 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 64 + USE OR OTHER DEALINGS IN THE SOFTWARE. 65 + */ 66 + 67 + #define XML_BUILDING_EXPAT 1 68 + 69 + #include "expat_config.h" 70 + 71 + #if !defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1) 72 + #error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default) 73 + #endif 74 + 75 + #if defined(XML_DTD) && XML_GE == 0 76 + #error Either undefine XML_DTD or define XML_GE to 1. 77 + #endif 78 + 79 + #if !defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) || (XML_CONTEXT_BYTES + 0 < 0) 80 + #error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default) 81 + #endif 82 + 83 + #if defined(HAVE_SYSCALL_GETRANDOM) 84 + #if !defined(_GNU_SOURCE) 85 + #define _GNU_SOURCE 1 /* syscall prototype */ 86 + #endif 87 + #endif 88 + 89 + #ifdef _WIN32 90 + /* force stdlib to define rand_s() */ 91 + #if !defined(_CRT_RAND_S) 92 + #define _CRT_RAND_S 93 + #endif 94 + #endif 95 + 96 + #include <assert.h> 97 + #include <limits.h> /* INT_MAX, UINT_MAX */ 98 + #include <math.h> /* isnan */ 99 + #include <stdbool.h> 100 + #include <stddef.h> 101 + #include <stdint.h> /* SIZE_MAX, uintptr_t */ 102 + #include <stdio.h> /* fprintf */ 103 + #include <stdlib.h> /* getenv, rand_s */ 104 + #include <string.h> /* memset(), memcpy() */ 105 + 106 + #ifdef _WIN32 107 + #define getpid GetCurrentProcessId 108 + #else 109 + #include <errno.h> 110 + #include <fcntl.h> /* O_RDONLY */ 111 + #include <sys/time.h> /* gettimeofday() */ 112 + #include <sys/types.h> /* getpid() */ 113 + #include <unistd.h> /* getpid() */ 114 + #endif 115 + 116 + #ifdef _WIN32 117 + #include "winconfig.h" 118 + #endif 119 + 120 + #include "ascii.h" 121 + #include "expat.h" 122 + #include "siphash.h" 123 + 124 + #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 125 + #if defined(HAVE_GETRANDOM) 126 + #include <sys/random.h> /* getrandom */ 127 + #else 128 + #include <sys/syscall.h> /* SYS_getrandom */ 129 + #include <unistd.h> /* syscall */ 130 + #endif 131 + #if !defined(GRND_NONBLOCK) 132 + #define GRND_NONBLOCK 0x0001 133 + #endif /* defined(GRND_NONBLOCK) */ 134 + #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 135 + 136 + #if defined(HAVE_LIBBSD) && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) 137 + #include <bsd/stdlib.h> 138 + #endif 139 + 140 + #if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32) 141 + #define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 142 + #endif 143 + 144 + // #if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) && !defined(HAVE_ARC4RANDOM_BUF) && \ 145 + // !defined(HAVE_ARC4RANDOM) && !defined(XML_DEV_URANDOM) && !defined(_WIN32) && !defined(XML_POOR_ENTROPY) 146 + // #error You do not have support for any sources of high quality entropy \ 147 + // enabled. For end user security, that is probably not what you want. \ 148 + // \ 149 + // Your options include: \ 150 + // * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ 151 + // * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ 152 + // * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ 153 + // * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ 154 + // * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ 155 + // * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ 156 + // * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ 157 + // * Windows >=Vista (rand_s): _WIN32. \ 158 + // \ 159 + // If insist on not using any of these, bypass this error by defining \ 160 + // XML_POOR_ENTROPY; you have been warned. \ 161 + // \ 162 + // If you have reasons to patch this detection code away or need changes \ 163 + // to the build system, please open a bug. Thank you! 164 + // #endif 165 + 166 + #ifdef XML_UNICODE 167 + #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX 168 + #define XmlConvert XmlUtf16Convert 169 + #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding 170 + #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS 171 + #define XmlEncode XmlUtf16Encode 172 + #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((uintptr_t)(s)) & 1)) 173 + typedef unsigned short ICHAR; 174 + #else 175 + #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX 176 + #define XmlConvert XmlUtf8Convert 177 + #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding 178 + #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS 179 + #define XmlEncode XmlUtf8Encode 180 + #define MUST_CONVERT(enc, s) (!(enc)->isUtf8) 181 + typedef char ICHAR; 182 + #endif 183 + 184 + #ifndef XML_NS 185 + 186 + #define XmlInitEncodingNS XmlInitEncoding 187 + #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding 188 + #undef XmlGetInternalEncodingNS 189 + #define XmlGetInternalEncodingNS XmlGetInternalEncoding 190 + #define XmlParseXmlDeclNS XmlParseXmlDecl 191 + 192 + #endif 193 + 194 + #ifdef XML_UNICODE 195 + 196 + #ifdef XML_UNICODE_WCHAR_T 197 + #define XML_T(x) (const wchar_t) x 198 + #define XML_L(x) L##x 199 + #else 200 + #define XML_T(x) (const unsigned short)x 201 + #define XML_L(x) x 202 + #endif 203 + 204 + #else 205 + 206 + #define XML_T(x) x 207 + #define XML_L(x) x 208 + 209 + #endif 210 + 211 + /* Round up n to be a multiple of sz, where sz is a power of 2. */ 212 + #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) 213 + 214 + /* Do safe (NULL-aware) pointer arithmetic */ 215 + #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) 216 + 217 + #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) 218 + 219 + #include "internal.h" 220 + #include "xmlrole.h" 221 + #include "xmltok.h" 222 + 223 + typedef const XML_Char* KEY; 224 + 225 + typedef struct { 226 + KEY name; 227 + } NAMED; 228 + 229 + typedef struct { 230 + NAMED** v; 231 + unsigned char power; 232 + size_t size; 233 + size_t used; 234 + XML_Parser parser; 235 + } HASH_TABLE; 236 + 237 + static size_t keylen(KEY s); 238 + 239 + static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey* key); 240 + 241 + /* For probing (after a collision) we need a step size relative prime 242 + to the hash table size, which is a power of 2. We use double-hashing, 243 + since we can calculate a second hash value cheaply by taking those bits 244 + of the first hash value that were discarded (masked out) when the table 245 + index was calculated: index = hash & mask, where mask = table->size - 1. 246 + We limit the maximum step size to table->size / 4 (mask >> 2) and make 247 + it odd, since odd numbers are always relative prime to a power of 2. 248 + */ 249 + #define SECOND_HASH(hash, mask, power) ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) 250 + #define PROBE_STEP(hash, mask, power) ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) 251 + 252 + typedef struct { 253 + NAMED** p; 254 + NAMED** end; 255 + } HASH_TABLE_ITER; 256 + 257 + #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ 258 + #define INIT_DATA_BUF_SIZE 1024 259 + #define INIT_ATTS_SIZE 16 260 + #define INIT_ATTS_VERSION 0xFFFFFFFF 261 + #define INIT_BLOCK_SIZE 1024 262 + #define INIT_BUFFER_SIZE 1024 263 + 264 + #define EXPAND_SPARE 24 265 + 266 + typedef struct binding { 267 + struct prefix* prefix; 268 + struct binding* nextTagBinding; 269 + struct binding* prevPrefixBinding; 270 + const struct attribute_id* attId; 271 + XML_Char* uri; 272 + int uriLen; 273 + int uriAlloc; 274 + } BINDING; 275 + 276 + typedef struct prefix { 277 + const XML_Char* name; 278 + BINDING* binding; 279 + } PREFIX; 280 + 281 + typedef struct { 282 + const XML_Char* str; 283 + const XML_Char* localPart; 284 + const XML_Char* prefix; 285 + int strLen; 286 + int uriLen; 287 + int prefixLen; 288 + } TAG_NAME; 289 + 290 + /* TAG represents an open element. 291 + The name of the element is stored in both the document and API 292 + encodings. The memory buffer 'buf' is a separately-allocated 293 + memory area which stores the name. During the XML_Parse()/ 294 + XML_ParseBuffer() when the element is open, the memory for the 'raw' 295 + version of the name (in the document encoding) is shared with the 296 + document buffer. If the element is open across calls to 297 + XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to 298 + contain the 'raw' name as well. 299 + 300 + A parser reuses these structures, maintaining a list of allocated 301 + TAG objects in a free list. 302 + */ 303 + typedef struct tag { 304 + struct tag* parent; /* parent of this element */ 305 + const char* rawName; /* tagName in the original encoding */ 306 + int rawNameLength; 307 + TAG_NAME name; /* tagName in the API encoding */ 308 + char* buf; /* buffer for name components */ 309 + char* bufEnd; /* end of the buffer */ 310 + BINDING* bindings; 311 + } TAG; 312 + 313 + typedef struct { 314 + const XML_Char* name; 315 + const XML_Char* textPtr; 316 + int textLen; /* length in XML_Chars */ 317 + int processed; /* # of processed bytes - when suspended */ 318 + const XML_Char* systemId; 319 + const XML_Char* base; 320 + const XML_Char* publicId; 321 + const XML_Char* notation; 322 + XML_Bool open; 323 + XML_Bool hasMore; /* true if entity has not been completely processed */ 324 + /* An entity can be open while being already completely processed (hasMore == 325 + XML_FALSE). The reason is the delayed closing of entities until their inner 326 + entities are processed and closed */ 327 + XML_Bool is_param; 328 + XML_Bool is_internal; /* true if declared in internal subset outside PE */ 329 + } ENTITY; 330 + 331 + typedef struct { 332 + enum XML_Content_Type type; 333 + enum XML_Content_Quant quant; 334 + const XML_Char* name; 335 + int firstchild; 336 + int lastchild; 337 + int childcnt; 338 + int nextsib; 339 + } CONTENT_SCAFFOLD; 340 + 341 + #define INIT_SCAFFOLD_ELEMENTS 32 342 + 343 + typedef struct block { 344 + struct block* next; 345 + int size; 346 + XML_Char s[1]; 347 + } BLOCK; 348 + 349 + typedef struct { 350 + BLOCK* blocks; 351 + BLOCK* freeBlocks; 352 + const XML_Char* end; 353 + XML_Char* ptr; 354 + XML_Char* start; 355 + XML_Parser parser; 356 + } STRING_POOL; 357 + 358 + /* The XML_Char before the name is used to determine whether 359 + an attribute has been specified. */ 360 + typedef struct attribute_id { 361 + XML_Char* name; 362 + PREFIX* prefix; 363 + XML_Bool maybeTokenized; 364 + XML_Bool xmlns; 365 + } ATTRIBUTE_ID; 366 + 367 + typedef struct { 368 + const ATTRIBUTE_ID* id; 369 + XML_Bool isCdata; 370 + const XML_Char* value; 371 + } DEFAULT_ATTRIBUTE; 372 + 373 + typedef struct { 374 + unsigned long version; 375 + unsigned long hash; 376 + const XML_Char* uriName; 377 + } NS_ATT; 378 + 379 + typedef struct { 380 + const XML_Char* name; 381 + PREFIX* prefix; 382 + const ATTRIBUTE_ID* idAtt; 383 + int nDefaultAtts; 384 + int allocDefaultAtts; 385 + DEFAULT_ATTRIBUTE* defaultAtts; 386 + } ELEMENT_TYPE; 387 + 388 + typedef struct { 389 + HASH_TABLE generalEntities; 390 + HASH_TABLE elementTypes; 391 + HASH_TABLE attributeIds; 392 + HASH_TABLE prefixes; 393 + STRING_POOL pool; 394 + STRING_POOL entityValuePool; 395 + /* false once a parameter entity reference has been skipped */ 396 + XML_Bool keepProcessing; 397 + /* true once an internal or external PE reference has been encountered; 398 + this includes the reference to an external subset */ 399 + XML_Bool hasParamEntityRefs; 400 + XML_Bool standalone; 401 + #ifdef XML_DTD 402 + /* indicates if external PE has been read */ 403 + XML_Bool paramEntityRead; 404 + HASH_TABLE paramEntities; 405 + #endif /* XML_DTD */ 406 + PREFIX defaultPrefix; 407 + /* === scaffolding for building content model === */ 408 + XML_Bool in_eldecl; 409 + CONTENT_SCAFFOLD* scaffold; 410 + unsigned contentStringLen; 411 + unsigned scaffSize; 412 + unsigned scaffCount; 413 + int scaffLevel; 414 + int* scaffIndex; 415 + } DTD; 416 + 417 + enum EntityType { 418 + ENTITY_INTERNAL, 419 + ENTITY_ATTRIBUTE, 420 + ENTITY_VALUE, 421 + }; 422 + 423 + typedef struct open_internal_entity { 424 + const char* internalEventPtr; 425 + const char* internalEventEndPtr; 426 + struct open_internal_entity* next; 427 + ENTITY* entity; 428 + int startTagLevel; 429 + XML_Bool betweenDecl; /* WFC: PE Between Declarations */ 430 + enum EntityType type; 431 + } OPEN_INTERNAL_ENTITY; 432 + 433 + enum XML_Account { 434 + XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ 435 + XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity 436 + expansion */ 437 + XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ 438 + }; 439 + 440 + #if XML_GE == 1 441 + typedef unsigned long long XmlBigCount; 442 + typedef struct accounting { 443 + XmlBigCount countBytesDirect; 444 + XmlBigCount countBytesIndirect; 445 + unsigned long debugLevel; 446 + float maximumAmplificationFactor; // >=1.0 447 + unsigned long long activationThresholdBytes; 448 + } ACCOUNTING; 449 + 450 + typedef struct MALLOC_TRACKER { 451 + XmlBigCount bytesAllocated; 452 + XmlBigCount peakBytesAllocated; // updated live only for debug level >=2 453 + unsigned long debugLevel; 454 + float maximumAmplificationFactor; // >=1.0 455 + XmlBigCount activationThresholdBytes; 456 + } MALLOC_TRACKER; 457 + 458 + typedef struct entity_stats { 459 + unsigned int countEverOpened; 460 + unsigned int currentDepth; 461 + unsigned int maximumDepthSeen; 462 + unsigned long debugLevel; 463 + } ENTITY_STATS; 464 + #endif /* XML_GE == 1 */ 465 + 466 + typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char* start, const char* end, const char** endPtr); 467 + 468 + static Processor prologProcessor; 469 + static Processor prologInitProcessor; 470 + static Processor contentProcessor; 471 + static Processor cdataSectionProcessor; 472 + #ifdef XML_DTD 473 + static Processor ignoreSectionProcessor; 474 + static Processor externalParEntProcessor; 475 + static Processor externalParEntInitProcessor; 476 + static Processor entityValueProcessor; 477 + static Processor entityValueInitProcessor; 478 + #endif /* XML_DTD */ 479 + static Processor epilogProcessor; 480 + static Processor errorProcessor; 481 + static Processor externalEntityInitProcessor; 482 + static Processor externalEntityInitProcessor2; 483 + static Processor externalEntityInitProcessor3; 484 + static Processor externalEntityContentProcessor; 485 + static Processor internalEntityProcessor; 486 + 487 + static enum XML_Error handleUnknownEncoding(XML_Parser parser, const XML_Char* encodingName); 488 + static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char* s, const char* next); 489 + static enum XML_Error initializeEncoding(XML_Parser parser); 490 + static enum XML_Error doProlog(XML_Parser parser, const ENCODING* enc, const char* s, const char* end, int tok, 491 + const char* next, const char** nextPtr, XML_Bool haveMore, XML_Bool allowClosingDoctype, 492 + enum XML_Account account); 493 + static enum XML_Error processEntity(XML_Parser parser, ENTITY* entity, XML_Bool betweenDecl, enum EntityType type); 494 + static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING* enc, const char* start, 495 + const char* end, const char** endPtr, XML_Bool haveMore, enum XML_Account account); 496 + static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING* enc, const char** startPtr, const char* end, 497 + const char** nextPtr, XML_Bool haveMore, enum XML_Account account); 498 + #ifdef XML_DTD 499 + static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING* enc, const char** startPtr, const char* end, 500 + const char** nextPtr, XML_Bool haveMore); 501 + #endif /* XML_DTD */ 502 + 503 + static void freeBindings(XML_Parser parser, BINDING* bindings); 504 + static enum XML_Error storeAtts(XML_Parser parser, const ENCODING* enc, const char* attStr, TAG_NAME* tagNamePtr, 505 + BINDING** bindingsPtr, enum XML_Account account); 506 + static enum XML_Error addBinding(XML_Parser parser, PREFIX* prefix, const ATTRIBUTE_ID* attId, const XML_Char* uri, 507 + BINDING** bindingsPtr); 508 + static int defineAttribute(ELEMENT_TYPE* type, ATTRIBUTE_ID* attId, XML_Bool isCdata, XML_Bool isId, 509 + const XML_Char* value, XML_Parser parser); 510 + static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING* enc, XML_Bool isCdata, const char* ptr, 511 + const char* end, STRING_POOL* pool, enum XML_Account account); 512 + static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING* enc, XML_Bool isCdata, const char* ptr, 513 + const char* end, STRING_POOL* pool, enum XML_Account account, 514 + const char** nextPtr); 515 + static ATTRIBUTE_ID* getAttributeId(XML_Parser parser, const ENCODING* enc, const char* start, const char* end); 516 + static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE* elementType); 517 + #if XML_GE == 1 518 + static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING* enc, const char* start, const char* end, 519 + enum XML_Account account, const char** nextPtr); 520 + static enum XML_Error callStoreEntityValue(XML_Parser parser, const ENCODING* enc, const char* start, const char* end, 521 + enum XML_Account account); 522 + #else 523 + static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY* entity); 524 + #endif 525 + static int reportProcessingInstruction(XML_Parser parser, const ENCODING* enc, const char* start, const char* end); 526 + static int reportComment(XML_Parser parser, const ENCODING* enc, const char* start, const char* end); 527 + static void reportDefault(XML_Parser parser, const ENCODING* enc, const char* start, const char* end); 528 + 529 + static const XML_Char* getContext(XML_Parser parser); 530 + static XML_Bool setContext(XML_Parser parser, const XML_Char* context); 531 + 532 + static void FASTCALL normalizePublicId(XML_Char* s); 533 + 534 + static DTD* dtdCreate(XML_Parser parser); 535 + /* do not call if m_parentParser != NULL */ 536 + static void dtdReset(DTD* p, XML_Parser parser); 537 + static void dtdDestroy(DTD* p, XML_Bool isDocEntity, XML_Parser parser); 538 + static int dtdCopy(XML_Parser oldParser, DTD* newDtd, const DTD* oldDtd, XML_Parser parser); 539 + static int copyEntityTable(XML_Parser oldParser, HASH_TABLE* newTable, STRING_POOL* newPool, 540 + const HASH_TABLE* oldTable); 541 + static NAMED* lookup(XML_Parser parser, HASH_TABLE* table, KEY name, size_t createSize); 542 + static void FASTCALL hashTableInit(HASH_TABLE* table, XML_Parser parser); 543 + static void FASTCALL hashTableClear(HASH_TABLE* table); 544 + static void FASTCALL hashTableDestroy(HASH_TABLE* table); 545 + static void FASTCALL hashTableIterInit(HASH_TABLE_ITER* iter, const HASH_TABLE* table); 546 + static NAMED* FASTCALL hashTableIterNext(HASH_TABLE_ITER* iter); 547 + 548 + static void FASTCALL poolInit(STRING_POOL* pool, XML_Parser parser); 549 + static void FASTCALL poolClear(STRING_POOL* pool); 550 + static void FASTCALL poolDestroy(STRING_POOL* pool); 551 + static XML_Char* poolAppend(STRING_POOL* pool, const ENCODING* enc, const char* ptr, const char* end); 552 + static XML_Char* poolStoreString(STRING_POOL* pool, const ENCODING* enc, const char* ptr, const char* end); 553 + static XML_Bool FASTCALL poolGrow(STRING_POOL* pool); 554 + static const XML_Char* FASTCALL poolCopyString(STRING_POOL* pool, const XML_Char* s); 555 + static const XML_Char* poolCopyStringN(STRING_POOL* pool, const XML_Char* s, int n); 556 + static const XML_Char* FASTCALL poolAppendString(STRING_POOL* pool, const XML_Char* s); 557 + 558 + static int FASTCALL nextScaffoldPart(XML_Parser parser); 559 + static XML_Content* build_model(XML_Parser parser); 560 + static ELEMENT_TYPE* getElementType(XML_Parser parser, const ENCODING* enc, const char* ptr, const char* end); 561 + 562 + static XML_Char* copyString(const XML_Char* s, XML_Parser parser); 563 + 564 + static unsigned long generate_hash_secret_salt(XML_Parser parser); 565 + static XML_Bool startParsing(XML_Parser parser); 566 + 567 + static XML_Parser parserCreate(const XML_Char* encodingName, const XML_Memory_Handling_Suite* memsuite, 568 + const XML_Char* nameSep, DTD* dtd, XML_Parser parentParser); 569 + 570 + static void parserInit(XML_Parser parser, const XML_Char* encodingName); 571 + 572 + #if XML_GE == 1 573 + static float accountingGetCurrentAmplification(XML_Parser rootParser); 574 + static void accountingReportStats(XML_Parser originParser, const char* epilog); 575 + static void accountingOnAbort(XML_Parser originParser); 576 + static void accountingReportDiff(XML_Parser rootParser, unsigned int levelsAwayFromRootParser, const char* before, 577 + const char* after, ptrdiff_t bytesMore, int source_line, enum XML_Account account); 578 + static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, const char* before, const char* after, 579 + int source_line, enum XML_Account account); 580 + 581 + static void entityTrackingReportStats(XML_Parser parser, ENTITY* entity, const char* action, int sourceLine); 582 + static void entityTrackingOnOpen(XML_Parser parser, ENTITY* entity, int sourceLine); 583 + static void entityTrackingOnClose(XML_Parser parser, ENTITY* entity, int sourceLine); 584 + #endif /* XML_GE == 1 */ 585 + 586 + static XML_Parser getRootParserOf(XML_Parser parser, unsigned int* outLevelDiff); 587 + 588 + static unsigned long getDebugLevel(const char* variableName, unsigned long defaultDebugLevel); 589 + 590 + #define poolStart(pool) ((pool)->start) 591 + #define poolLength(pool) ((pool)->ptr - (pool)->start) 592 + #define poolChop(pool) ((void)--(pool->ptr)) 593 + #define poolLastChar(pool) (((pool)->ptr)[-1]) 594 + #define poolDiscard(pool) ((pool)->ptr = (pool)->start) 595 + #define poolFinish(pool) ((pool)->start = (pool)->ptr) 596 + #define poolAppendChar(pool, c) (((pool)->ptr == (pool)->end && !poolGrow(pool)) ? 0 : ((*((pool)->ptr)++ = c), 1)) 597 + 598 + #if !defined(XML_TESTING) 599 + const 600 + #endif 601 + XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c 602 + #if defined(XML_TESTING) 603 + unsigned int g_bytesScanned = 0; // used for testing only 604 + #endif 605 + 606 + struct XML_ParserStruct { 607 + /* The first member must be m_userData so that the XML_GetUserData 608 + macro works. */ 609 + void* m_userData; 610 + void* m_handlerArg; 611 + 612 + // How the four parse buffer pointers below relate in time and space: 613 + // 614 + // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim 615 + // | | | | 616 + // <--parsed-->| | | 617 + // <---parsing--->| | 618 + // <--unoccupied-->| 619 + // <---------total-malloced/realloced-------->| 620 + 621 + char* m_buffer; // malloc/realloc base pointer of parse buffer 622 + const XML_Memory_Handling_Suite m_mem; 623 + const char* m_bufferPtr; // first character to be parsed 624 + char* m_bufferEnd; // past last character to be parsed 625 + const char* m_bufferLim; // allocated end of m_buffer 626 + 627 + XML_Index m_parseEndByteIndex; 628 + const char* m_parseEndPtr; 629 + size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ 630 + XML_Bool m_reparseDeferralEnabled; 631 + int m_lastBufferRequestSize; 632 + XML_Char* m_dataBuf; 633 + XML_Char* m_dataBufEnd; 634 + XML_StartElementHandler m_startElementHandler; 635 + XML_EndElementHandler m_endElementHandler; 636 + XML_CharacterDataHandler m_characterDataHandler; 637 + XML_ProcessingInstructionHandler m_processingInstructionHandler; 638 + XML_CommentHandler m_commentHandler; 639 + XML_StartCdataSectionHandler m_startCdataSectionHandler; 640 + XML_EndCdataSectionHandler m_endCdataSectionHandler; 641 + XML_DefaultHandler m_defaultHandler; 642 + XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; 643 + XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; 644 + XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; 645 + XML_NotationDeclHandler m_notationDeclHandler; 646 + XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; 647 + XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; 648 + XML_NotStandaloneHandler m_notStandaloneHandler; 649 + XML_ExternalEntityRefHandler m_externalEntityRefHandler; 650 + XML_Parser m_externalEntityRefHandlerArg; 651 + XML_SkippedEntityHandler m_skippedEntityHandler; 652 + XML_UnknownEncodingHandler m_unknownEncodingHandler; 653 + XML_ElementDeclHandler m_elementDeclHandler; 654 + XML_AttlistDeclHandler m_attlistDeclHandler; 655 + XML_EntityDeclHandler m_entityDeclHandler; 656 + XML_XmlDeclHandler m_xmlDeclHandler; 657 + const ENCODING* m_encoding; 658 + INIT_ENCODING m_initEncoding; 659 + const ENCODING* m_internalEncoding; 660 + const XML_Char* m_protocolEncodingName; 661 + XML_Bool m_ns; 662 + XML_Bool m_ns_triplets; 663 + void* m_unknownEncodingMem; 664 + void* m_unknownEncodingData; 665 + void* m_unknownEncodingHandlerData; 666 + void(XMLCALL* m_unknownEncodingRelease)(void*); 667 + PROLOG_STATE m_prologState; 668 + Processor* m_processor; 669 + enum XML_Error m_errorCode; 670 + const char* m_eventPtr; 671 + const char* m_eventEndPtr; 672 + const char* m_positionPtr; 673 + OPEN_INTERNAL_ENTITY* m_openInternalEntities; 674 + OPEN_INTERNAL_ENTITY* m_freeInternalEntities; 675 + OPEN_INTERNAL_ENTITY* m_openAttributeEntities; 676 + OPEN_INTERNAL_ENTITY* m_freeAttributeEntities; 677 + OPEN_INTERNAL_ENTITY* m_openValueEntities; 678 + OPEN_INTERNAL_ENTITY* m_freeValueEntities; 679 + XML_Bool m_defaultExpandInternalEntities; 680 + int m_tagLevel; 681 + ENTITY* m_declEntity; 682 + const XML_Char* m_doctypeName; 683 + const XML_Char* m_doctypeSysid; 684 + const XML_Char* m_doctypePubid; 685 + const XML_Char* m_declAttributeType; 686 + const XML_Char* m_declNotationName; 687 + const XML_Char* m_declNotationPublicId; 688 + ELEMENT_TYPE* m_declElementType; 689 + ATTRIBUTE_ID* m_declAttributeId; 690 + XML_Bool m_declAttributeIsCdata; 691 + XML_Bool m_declAttributeIsId; 692 + DTD* m_dtd; 693 + const XML_Char* m_curBase; 694 + TAG* m_tagStack; 695 + TAG* m_freeTagList; 696 + BINDING* m_inheritedBindings; 697 + BINDING* m_freeBindingList; 698 + int m_attsSize; 699 + int m_nSpecifiedAtts; 700 + int m_idAttIndex; 701 + ATTRIBUTE* m_atts; 702 + NS_ATT* m_nsAtts; 703 + unsigned long m_nsAttsVersion; 704 + unsigned char m_nsAttsPower; 705 + #ifdef XML_ATTR_INFO 706 + XML_AttrInfo* m_attInfo; 707 + #endif 708 + POSITION m_position; 709 + STRING_POOL m_tempPool; 710 + STRING_POOL m_temp2Pool; 711 + char* m_groupConnector; 712 + unsigned int m_groupSize; 713 + XML_Char m_namespaceSeparator; 714 + XML_Parser m_parentParser; 715 + XML_ParsingStatus m_parsingStatus; 716 + #ifdef XML_DTD 717 + XML_Bool m_isParamEntity; 718 + XML_Bool m_useForeignDTD; 719 + enum XML_ParamEntityParsing m_paramEntityParsing; 720 + #endif 721 + unsigned long m_hash_secret_salt; 722 + #if XML_GE == 1 723 + ACCOUNTING m_accounting; 724 + MALLOC_TRACKER m_alloc_tracker; 725 + ENTITY_STATS m_entity_stats; 726 + #endif 727 + XML_Bool m_reenter; 728 + }; 729 + 730 + #if XML_GE == 1 731 + #define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__)) 732 + #define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__)) 733 + #define FREE(parser, p) (expat_free((parser), (p), __LINE__)) 734 + #else 735 + #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) 736 + #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) 737 + #define FREE(parser, p) (parser->m_mem.free_fcn((p))) 738 + #endif 739 + 740 + #if XML_GE == 1 741 + static void expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff, XmlBigCount newTotal, 742 + XmlBigCount peakTotal, int sourceLine) { 743 + // NOTE: This can be +infinity or -nan 744 + const float amplification = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; 745 + fprintf(stderr, 746 + "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL( 747 + "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n", 748 + (void*)rootParser, rootParser->m_accounting.countBytesDirect, operator, absDiff, newTotal, peakTotal, 749 + (double)amplification, sourceLine); 750 + } 751 + 752 + static bool expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase, int sourceLine) { 753 + assert(rootParser != NULL); 754 + assert(increase > 0); 755 + 756 + XmlBigCount newTotal = 0; 757 + bool tolerable = true; 758 + 759 + // Detect integer overflow 760 + if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) { 761 + tolerable = false; 762 + } else { 763 + newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase; 764 + 765 + if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) { 766 + assert(newTotal > 0); 767 + // NOTE: This can be +infinity when dividing by zero but not -nan 768 + const float amplification = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; 769 + if (amplification > rootParser->m_alloc_tracker.maximumAmplificationFactor) { 770 + tolerable = false; 771 + } 772 + } 773 + } 774 + 775 + if (!tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) { 776 + expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine); 777 + } 778 + 779 + return tolerable; 780 + } 781 + 782 + #if defined(XML_TESTING) 783 + void * 784 + #else 785 + static void* 786 + #endif 787 + expat_malloc(XML_Parser parser, size_t size, int sourceLine) { 788 + // Detect integer overflow 789 + if (SIZE_MAX - size < sizeof(size_t) + EXPAT_MALLOC_PADDING) { 790 + return NULL; 791 + } 792 + 793 + const XML_Parser rootParser = getRootParserOf(parser, NULL); 794 + assert(rootParser->m_parentParser == NULL); 795 + 796 + const size_t bytesToAllocate = sizeof(size_t) + EXPAT_MALLOC_PADDING + size; 797 + 798 + if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < bytesToAllocate) { 799 + return NULL; // i.e. signal integer overflow as out-of-memory 800 + } 801 + 802 + if (!expat_heap_increase_tolerable(rootParser, bytesToAllocate, sourceLine)) { 803 + return NULL; // i.e. signal violation as out-of-memory 804 + } 805 + 806 + // Actually allocate 807 + void* const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate); 808 + 809 + if (mallocedPtr == NULL) { 810 + return NULL; 811 + } 812 + 813 + // Update in-block recorded size 814 + *(size_t*)mallocedPtr = size; 815 + 816 + // Update accounting 817 + rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate; 818 + 819 + // Report as needed 820 + if (rootParser->m_alloc_tracker.debugLevel >= 2) { 821 + if (rootParser->m_alloc_tracker.bytesAllocated > rootParser->m_alloc_tracker.peakBytesAllocated) { 822 + rootParser->m_alloc_tracker.peakBytesAllocated = rootParser->m_alloc_tracker.bytesAllocated; 823 + } 824 + expat_heap_stat(rootParser, '+', bytesToAllocate, rootParser->m_alloc_tracker.bytesAllocated, 825 + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); 826 + } 827 + 828 + return (char*)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; 829 + } 830 + 831 + #if defined(XML_TESTING) 832 + void 833 + #else 834 + static void 835 + #endif 836 + expat_free(XML_Parser parser, void *ptr, int sourceLine) { 837 + assert(parser != NULL); 838 + 839 + if (ptr == NULL) { 840 + return; 841 + } 842 + 843 + const XML_Parser rootParser = getRootParserOf(parser, NULL); 844 + assert(rootParser->m_parentParser == NULL); 845 + 846 + // Extract size (to the eyes of malloc_fcn/realloc_fcn) and 847 + // the original pointer returned by malloc/realloc 848 + void* const mallocedPtr = (char*)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); 849 + const size_t bytesAllocated = sizeof(size_t) + EXPAT_MALLOC_PADDING + *(size_t*)mallocedPtr; 850 + 851 + // Update accounting 852 + assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated); 853 + rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated; 854 + 855 + // Report as needed 856 + if (rootParser->m_alloc_tracker.debugLevel >= 2) { 857 + expat_heap_stat(rootParser, '-', bytesAllocated, rootParser->m_alloc_tracker.bytesAllocated, 858 + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); 859 + } 860 + 861 + // NOTE: This may be freeing rootParser, so freeing has to come last 862 + parser->m_mem.free_fcn(mallocedPtr); 863 + } 864 + 865 + #if defined(XML_TESTING) 866 + void * 867 + #else 868 + static void* 869 + #endif 870 + expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) { 871 + assert(parser != NULL); 872 + 873 + if (ptr == NULL) { 874 + return expat_malloc(parser, size, sourceLine); 875 + } 876 + 877 + if (size == 0) { 878 + expat_free(parser, ptr, sourceLine); 879 + return NULL; 880 + } 881 + 882 + const XML_Parser rootParser = getRootParserOf(parser, NULL); 883 + assert(rootParser->m_parentParser == NULL); 884 + 885 + // Extract original size (to the eyes of the caller) and the original 886 + // pointer returned by malloc/realloc 887 + void* mallocedPtr = (char*)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); 888 + const size_t prevSize = *(size_t*)mallocedPtr; 889 + 890 + // Classify upcoming change 891 + const bool isIncrease = (size > prevSize); 892 + const size_t absDiff = (size > prevSize) ? (size - prevSize) : (prevSize - size); 893 + 894 + // Ask for permission from accounting 895 + if (isIncrease) { 896 + if (!expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) { 897 + return NULL; // i.e. signal violation as out-of-memory 898 + } 899 + } 900 + 901 + // NOTE: Integer overflow detection has already been done for us 902 + // by expat_heap_increase_tolerable(..) above 903 + assert(SIZE_MAX - sizeof(size_t) - EXPAT_MALLOC_PADDING >= size); 904 + 905 + // Actually allocate 906 + mallocedPtr = parser->m_mem.realloc_fcn(mallocedPtr, sizeof(size_t) + EXPAT_MALLOC_PADDING + size); 907 + 908 + if (mallocedPtr == NULL) { 909 + return NULL; 910 + } 911 + 912 + // Update accounting 913 + if (isIncrease) { 914 + assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated >= absDiff); 915 + rootParser->m_alloc_tracker.bytesAllocated += absDiff; 916 + } else { // i.e. decrease 917 + assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff); 918 + rootParser->m_alloc_tracker.bytesAllocated -= absDiff; 919 + } 920 + 921 + // Report as needed 922 + if (rootParser->m_alloc_tracker.debugLevel >= 2) { 923 + if (rootParser->m_alloc_tracker.bytesAllocated > rootParser->m_alloc_tracker.peakBytesAllocated) { 924 + rootParser->m_alloc_tracker.peakBytesAllocated = rootParser->m_alloc_tracker.bytesAllocated; 925 + } 926 + expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff, rootParser->m_alloc_tracker.bytesAllocated, 927 + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); 928 + } 929 + 930 + // Update in-block recorded size 931 + *(size_t*)mallocedPtr = size; 932 + 933 + return (char*)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; 934 + } 935 + #endif // XML_GE == 1 936 + 937 + XML_Parser XMLCALL XML_ParserCreate(const XML_Char* encodingName) { 938 + return XML_ParserCreate_MM(encodingName, NULL, NULL); 939 + } 940 + 941 + XML_Parser XMLCALL XML_ParserCreateNS(const XML_Char* encodingName, XML_Char nsSep) { 942 + XML_Char tmp[2] = {nsSep, 0}; 943 + return XML_ParserCreate_MM(encodingName, NULL, tmp); 944 + } 945 + 946 + // "xml=http://www.w3.org/XML/1998/namespace" 947 + static const XML_Char implicitContext[] = { 948 + ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, 949 + ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, 950 + ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, 951 + ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, 952 + ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'}; 953 + 954 + /* To avoid warnings about unused functions: */ 955 + #if !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) 956 + 957 + #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 958 + 959 + /* Obtain entropy on Linux 3.17+ */ 960 + static int writeRandomBytes_getrandom_nonblock(void* target, size_t count) { 961 + int success = 0; /* full count bytes written? */ 962 + size_t bytesWrittenTotal = 0; 963 + const unsigned int getrandomFlags = GRND_NONBLOCK; 964 + 965 + do { 966 + void* const currentTarget = (void*)((char*)target + bytesWrittenTotal); 967 + const size_t bytesToWrite = count - bytesWrittenTotal; 968 + 969 + assert(bytesToWrite <= INT_MAX); 970 + 971 + const int bytesWrittenMore = 972 + #if defined(HAVE_GETRANDOM) 973 + (int)getrandom(currentTarget, bytesToWrite, getrandomFlags); 974 + #else 975 + (int)syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); 976 + #endif 977 + 978 + if (bytesWrittenMore > 0) { 979 + bytesWrittenTotal += bytesWrittenMore; 980 + if (bytesWrittenTotal >= count) success = 1; 981 + } 982 + } while (!success && (errno == EINTR)); 983 + 984 + return success; 985 + } 986 + 987 + #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 988 + 989 + #if !defined(_WIN32) && defined(XML_DEV_URANDOM) 990 + 991 + /* Extract entropy from /dev/urandom */ 992 + static int writeRandomBytes_dev_urandom(void* target, size_t count) { 993 + int success = 0; /* full count bytes written? */ 994 + size_t bytesWrittenTotal = 0; 995 + 996 + const int fd = open("/dev/urandom", O_RDONLY); 997 + if (fd < 0) { 998 + return 0; 999 + } 1000 + 1001 + do { 1002 + void* const currentTarget = (void*)((char*)target + bytesWrittenTotal); 1003 + const size_t bytesToWrite = count - bytesWrittenTotal; 1004 + 1005 + const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); 1006 + 1007 + if (bytesWrittenMore > 0) { 1008 + bytesWrittenTotal += bytesWrittenMore; 1009 + if (bytesWrittenTotal >= count) success = 1; 1010 + } 1011 + } while (!success && (errno == EINTR)); 1012 + 1013 + close(fd); 1014 + return success; 1015 + } 1016 + 1017 + #endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 1018 + 1019 + #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 1020 + 1021 + #if defined(HAVE_ARC4RANDOM) && !defined(HAVE_ARC4RANDOM_BUF) 1022 + 1023 + static void writeRandomBytes_arc4random(void* target, size_t count) { 1024 + size_t bytesWrittenTotal = 0; 1025 + 1026 + while (bytesWrittenTotal < count) { 1027 + const uint32_t random32 = arc4random(); 1028 + size_t i = 0; 1029 + 1030 + for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); i++, bytesWrittenTotal++) { 1031 + const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 1032 + ((uint8_t*)target)[bytesWrittenTotal] = random8; 1033 + } 1034 + } 1035 + } 1036 + 1037 + #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ 1038 + 1039 + #ifdef _WIN32 1040 + 1041 + /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), 1042 + as it didn't declare it in its header prior to version 5.3.0 of its 1043 + runtime package (mingwrt, containing stdlib.h). The upstream fix 1044 + was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ 1045 + #if defined(__MINGW32__) && defined(__MINGW32_VERSION) && __MINGW32_VERSION < 5003000L && \ 1046 + !defined(__MINGW64_VERSION_MAJOR) 1047 + __declspec(dllimport) int rand_s(unsigned int*); 1048 + #endif 1049 + 1050 + /* Obtain entropy on Windows using the rand_s() function which 1051 + * generates cryptographically secure random numbers. Internally it 1052 + * uses RtlGenRandom API which is present in Windows XP and later. 1053 + */ 1054 + static int writeRandomBytes_rand_s(void* target, size_t count) { 1055 + size_t bytesWrittenTotal = 0; 1056 + 1057 + while (bytesWrittenTotal < count) { 1058 + unsigned int random32 = 0; 1059 + size_t i = 0; 1060 + 1061 + if (rand_s(&random32)) return 0; /* failure */ 1062 + 1063 + for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); i++, bytesWrittenTotal++) { 1064 + const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 1065 + ((uint8_t*)target)[bytesWrittenTotal] = random8; 1066 + } 1067 + } 1068 + return 1; /* success */ 1069 + } 1070 + 1071 + #endif /* _WIN32 */ 1072 + 1073 + #if !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) 1074 + 1075 + static unsigned long gather_time_entropy(void) { 1076 + #ifdef _WIN32 1077 + FILETIME ft; 1078 + GetSystemTimeAsFileTime(&ft); /* never fails */ 1079 + return ft.dwHighDateTime ^ ft.dwLowDateTime; 1080 + #else 1081 + struct timeval tv; 1082 + int gettimeofday_res; 1083 + 1084 + gettimeofday_res = gettimeofday(&tv, NULL); 1085 + 1086 + #if defined(NDEBUG) 1087 + (void)gettimeofday_res; 1088 + #else 1089 + assert(gettimeofday_res == 0); 1090 + #endif /* defined(NDEBUG) */ 1091 + 1092 + /* Microseconds time is <20 bits entropy */ 1093 + return tv.tv_usec; 1094 + #endif 1095 + } 1096 + 1097 + #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 1098 + 1099 + static unsigned long ENTROPY_DEBUG(const char* label, unsigned long entropy) { 1100 + if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { 1101 + fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, (int)sizeof(entropy) * 2, entropy, 1102 + (unsigned long)sizeof(entropy)); 1103 + } 1104 + return entropy; 1105 + } 1106 + 1107 + static unsigned long generate_hash_secret_salt(XML_Parser parser) { 1108 + unsigned long entropy; 1109 + (void)parser; 1110 + 1111 + /* "Failproof" high quality providers: */ 1112 + #if defined(HAVE_ARC4RANDOM_BUF) 1113 + arc4random_buf(&entropy, sizeof(entropy)); 1114 + return ENTROPY_DEBUG("arc4random_buf", entropy); 1115 + #elif defined(HAVE_ARC4RANDOM) 1116 + writeRandomBytes_arc4random((void*)&entropy, sizeof(entropy)); 1117 + return ENTROPY_DEBUG("arc4random", entropy); 1118 + #else 1119 + /* Try high quality providers first .. */ 1120 + #ifdef _WIN32 1121 + if (writeRandomBytes_rand_s((void*)&entropy, sizeof(entropy))) { 1122 + return ENTROPY_DEBUG("rand_s", entropy); 1123 + } 1124 + #elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 1125 + if (writeRandomBytes_getrandom_nonblock((void*)&entropy, sizeof(entropy))) { 1126 + return ENTROPY_DEBUG("getrandom", entropy); 1127 + } 1128 + #endif 1129 + #if !defined(_WIN32) && defined(XML_DEV_URANDOM) 1130 + if (writeRandomBytes_dev_urandom((void*)&entropy, sizeof(entropy))) { 1131 + return ENTROPY_DEBUG("/dev/urandom", entropy); 1132 + } 1133 + #endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 1134 + /* .. and self-made low quality for backup: */ 1135 + 1136 + /* Process ID is 0 bits entropy if attacker has local access */ 1137 + entropy = gather_time_entropy() ^ getpid(); 1138 + 1139 + /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ 1140 + if (sizeof(unsigned long) == 4) { 1141 + return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); 1142 + } else { 1143 + return ENTROPY_DEBUG("fallback(8)", entropy * (unsigned long)2305843009213693951ULL); 1144 + } 1145 + #endif 1146 + } 1147 + 1148 + static unsigned long get_hash_secret_salt(XML_Parser parser) { 1149 + const XML_Parser rootParser = getRootParserOf(parser, NULL); 1150 + assert(!rootParser->m_parentParser); 1151 + 1152 + return rootParser->m_hash_secret_salt; 1153 + } 1154 + 1155 + static enum XML_Error callProcessor(XML_Parser parser, const char* start, const char* end, const char** endPtr) { 1156 + const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); 1157 + 1158 + if (parser->m_reparseDeferralEnabled && !parser->m_parsingStatus.finalBuffer) { 1159 + // Heuristic: don't try to parse a partial token again until the amount of 1160 + // available data has increased significantly. 1161 + const size_t had_before = parser->m_partialTokenBytesBefore; 1162 + // ...but *do* try anyway if we're close to causing a reallocation. 1163 + size_t available_buffer = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 1164 + #if XML_CONTEXT_BYTES > 0 1165 + available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); 1166 + #endif 1167 + available_buffer += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); 1168 + // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok 1169 + const bool enough = (have_now >= 2 * had_before) || ((size_t)parser->m_lastBufferRequestSize > available_buffer); 1170 + 1171 + if (!enough) { 1172 + *endPtr = start; // callers may expect this to be set 1173 + return XML_ERROR_NONE; 1174 + } 1175 + } 1176 + #if defined(XML_TESTING) 1177 + g_bytesScanned += (unsigned)have_now; 1178 + #endif 1179 + // Run in a loop to eliminate dangerous recursion depths 1180 + enum XML_Error ret; 1181 + *endPtr = start; 1182 + while (1) { 1183 + // Use endPtr as the new start in each iteration, since it will 1184 + // be set to the next start point by m_processor. 1185 + ret = parser->m_processor(parser, *endPtr, end, endPtr); 1186 + 1187 + // Make parsing status (and in particular XML_SUSPENDED) take 1188 + // precedence over re-enter flag when they disagree 1189 + if (parser->m_parsingStatus.parsing != XML_PARSING) { 1190 + parser->m_reenter = XML_FALSE; 1191 + } 1192 + 1193 + if (!parser->m_reenter) { 1194 + break; 1195 + } 1196 + 1197 + parser->m_reenter = XML_FALSE; 1198 + if (ret != XML_ERROR_NONE) return ret; 1199 + } 1200 + 1201 + if (ret == XML_ERROR_NONE) { 1202 + // if we consumed nothing, remember what we had on this parse attempt. 1203 + if (*endPtr == start) { 1204 + parser->m_partialTokenBytesBefore = have_now; 1205 + } else { 1206 + parser->m_partialTokenBytesBefore = 0; 1207 + } 1208 + } 1209 + return ret; 1210 + } 1211 + 1212 + static XML_Bool /* only valid for root parser */ 1213 + startParsing(XML_Parser parser) { 1214 + /* hash functions must be initialized before setContext() is called */ 1215 + if (parser->m_hash_secret_salt == 0) parser->m_hash_secret_salt = generate_hash_secret_salt(parser); 1216 + if (parser->m_ns) { 1217 + /* implicit context only set for root parser, since child 1218 + parsers (i.e. external entity parsers) will inherit it 1219 + */ 1220 + return setContext(parser, implicitContext); 1221 + } 1222 + return XML_TRUE; 1223 + } 1224 + 1225 + XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char* encodingName, const XML_Memory_Handling_Suite* memsuite, 1226 + const XML_Char* nameSep) { 1227 + return parserCreate(encodingName, memsuite, nameSep, NULL, NULL); 1228 + } 1229 + 1230 + static XML_Parser parserCreate(const XML_Char* encodingName, const XML_Memory_Handling_Suite* memsuite, 1231 + const XML_Char* nameSep, DTD* dtd, XML_Parser parentParser) { 1232 + XML_Parser parser = NULL; 1233 + 1234 + #if XML_GE == 1 1235 + const size_t increase = sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct); 1236 + 1237 + if (parentParser != NULL) { 1238 + const XML_Parser rootParser = getRootParserOf(parentParser, NULL); 1239 + if (!expat_heap_increase_tolerable(rootParser, increase, __LINE__)) { 1240 + return NULL; 1241 + } 1242 + } 1243 + #else 1244 + UNUSED_P(parentParser); 1245 + #endif 1246 + 1247 + if (memsuite) { 1248 + XML_Memory_Handling_Suite* mtemp; 1249 + #if XML_GE == 1 1250 + void* const sizeAndParser = 1251 + memsuite->malloc_fcn(sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct)); 1252 + if (sizeAndParser != NULL) { 1253 + *(size_t*)sizeAndParser = sizeof(struct XML_ParserStruct); 1254 + parser = (XML_Parser)((char*)sizeAndParser + sizeof(size_t) + EXPAT_MALLOC_PADDING); 1255 + #else 1256 + parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); 1257 + if (parser != NULL) { 1258 + #endif 1259 + mtemp = (XML_Memory_Handling_Suite*)&(parser->m_mem); 1260 + mtemp->malloc_fcn = memsuite->malloc_fcn; 1261 + mtemp->realloc_fcn = memsuite->realloc_fcn; 1262 + mtemp->free_fcn = memsuite->free_fcn; 1263 + } 1264 + } else { 1265 + XML_Memory_Handling_Suite* mtemp; 1266 + #if XML_GE == 1 1267 + void* const sizeAndParser = malloc(sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct)); 1268 + if (sizeAndParser != NULL) { 1269 + *(size_t*)sizeAndParser = sizeof(struct XML_ParserStruct); 1270 + parser = (XML_Parser)((char*)sizeAndParser + sizeof(size_t) + EXPAT_MALLOC_PADDING); 1271 + #else 1272 + parser = malloc(sizeof(struct XML_ParserStruct)); 1273 + if (parser != NULL) { 1274 + #endif 1275 + mtemp = (XML_Memory_Handling_Suite*)&(parser->m_mem); 1276 + mtemp->malloc_fcn = malloc; 1277 + mtemp->realloc_fcn = realloc; 1278 + mtemp->free_fcn = free; 1279 + } 1280 + } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0 1281 + 1282 + if (!parser) return parser; 1283 + 1284 + #if XML_GE == 1 1285 + // Initialize .m_alloc_tracker 1286 + memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER)); 1287 + if (parentParser == NULL) { 1288 + parser->m_alloc_tracker.debugLevel = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u); 1289 + parser->m_alloc_tracker.maximumAmplificationFactor = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT; 1290 + parser->m_alloc_tracker.activationThresholdBytes = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT; 1291 + 1292 + // NOTE: This initialization needs to come this early because these fields 1293 + // are read by allocation tracking code 1294 + parser->m_parentParser = NULL; 1295 + parser->m_accounting.countBytesDirect = 0; 1296 + } else { 1297 + parser->m_parentParser = parentParser; 1298 + } 1299 + 1300 + // Record XML_ParserStruct allocation we did a few lines up before 1301 + const XML_Parser rootParser = getRootParserOf(parser, NULL); 1302 + assert(rootParser->m_parentParser == NULL); 1303 + assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase); 1304 + rootParser->m_alloc_tracker.bytesAllocated += increase; 1305 + 1306 + // Report on allocation 1307 + if (rootParser->m_alloc_tracker.debugLevel >= 2) { 1308 + if (rootParser->m_alloc_tracker.bytesAllocated > rootParser->m_alloc_tracker.peakBytesAllocated) { 1309 + rootParser->m_alloc_tracker.peakBytesAllocated = rootParser->m_alloc_tracker.bytesAllocated; 1310 + } 1311 + 1312 + expat_heap_stat(rootParser, '+', increase, rootParser->m_alloc_tracker.bytesAllocated, 1313 + rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__); 1314 + } 1315 + #else 1316 + parser->m_parentParser = NULL; 1317 + #endif // XML_GE == 1 1318 + 1319 + parser->m_buffer = NULL; 1320 + parser->m_bufferLim = NULL; 1321 + 1322 + parser->m_attsSize = INIT_ATTS_SIZE; 1323 + parser->m_atts = MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); 1324 + if (parser->m_atts == NULL) { 1325 + FREE(parser, parser); 1326 + return NULL; 1327 + } 1328 + #ifdef XML_ATTR_INFO 1329 + parser->m_attInfo = MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo)); 1330 + if (parser->m_attInfo == NULL) { 1331 + FREE(parser, parser->m_atts); 1332 + FREE(parser, parser); 1333 + return NULL; 1334 + } 1335 + #endif 1336 + parser->m_dataBuf = MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); 1337 + if (parser->m_dataBuf == NULL) { 1338 + FREE(parser, parser->m_atts); 1339 + #ifdef XML_ATTR_INFO 1340 + FREE(parser, parser->m_attInfo); 1341 + #endif 1342 + FREE(parser, parser); 1343 + return NULL; 1344 + } 1345 + parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; 1346 + 1347 + if (dtd) 1348 + parser->m_dtd = dtd; 1349 + else { 1350 + parser->m_dtd = dtdCreate(parser); 1351 + if (parser->m_dtd == NULL) { 1352 + FREE(parser, parser->m_dataBuf); 1353 + FREE(parser, parser->m_atts); 1354 + #ifdef XML_ATTR_INFO 1355 + FREE(parser, parser->m_attInfo); 1356 + #endif 1357 + FREE(parser, parser); 1358 + return NULL; 1359 + } 1360 + } 1361 + 1362 + parser->m_freeBindingList = NULL; 1363 + parser->m_freeTagList = NULL; 1364 + parser->m_freeInternalEntities = NULL; 1365 + parser->m_freeAttributeEntities = NULL; 1366 + parser->m_freeValueEntities = NULL; 1367 + 1368 + parser->m_groupSize = 0; 1369 + parser->m_groupConnector = NULL; 1370 + 1371 + parser->m_unknownEncodingHandler = NULL; 1372 + parser->m_unknownEncodingHandlerData = NULL; 1373 + 1374 + parser->m_namespaceSeparator = ASCII_EXCL; 1375 + parser->m_ns = XML_FALSE; 1376 + parser->m_ns_triplets = XML_FALSE; 1377 + 1378 + parser->m_nsAtts = NULL; 1379 + parser->m_nsAttsVersion = 0; 1380 + parser->m_nsAttsPower = 0; 1381 + 1382 + parser->m_protocolEncodingName = NULL; 1383 + 1384 + poolInit(&parser->m_tempPool, parser); 1385 + poolInit(&parser->m_temp2Pool, parser); 1386 + parserInit(parser, encodingName); 1387 + 1388 + if (encodingName && !parser->m_protocolEncodingName) { 1389 + if (dtd) { 1390 + // We need to stop the upcoming call to XML_ParserFree from happily 1391 + // destroying parser->m_dtd because the DTD is shared with the parent 1392 + // parser and the only guard that keeps XML_ParserFree from destroying 1393 + // parser->m_dtd is parser->m_isParamEntity but it will be set to 1394 + // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all). 1395 + parser->m_dtd = NULL; 1396 + } 1397 + XML_ParserFree(parser); 1398 + return NULL; 1399 + } 1400 + 1401 + if (nameSep) { 1402 + parser->m_ns = XML_TRUE; 1403 + parser->m_internalEncoding = XmlGetInternalEncodingNS(); 1404 + parser->m_namespaceSeparator = *nameSep; 1405 + } else { 1406 + parser->m_internalEncoding = XmlGetInternalEncoding(); 1407 + } 1408 + 1409 + return parser; 1410 + } 1411 + 1412 + static void parserInit(XML_Parser parser, const XML_Char* encodingName) { 1413 + parser->m_processor = prologInitProcessor; 1414 + XmlPrologStateInit(&parser->m_prologState); 1415 + if (encodingName != NULL) { 1416 + parser->m_protocolEncodingName = copyString(encodingName, parser); 1417 + } 1418 + parser->m_curBase = NULL; 1419 + XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); 1420 + parser->m_userData = NULL; 1421 + parser->m_handlerArg = NULL; 1422 + parser->m_startElementHandler = NULL; 1423 + parser->m_endElementHandler = NULL; 1424 + parser->m_characterDataHandler = NULL; 1425 + parser->m_processingInstructionHandler = NULL; 1426 + parser->m_commentHandler = NULL; 1427 + parser->m_startCdataSectionHandler = NULL; 1428 + parser->m_endCdataSectionHandler = NULL; 1429 + parser->m_defaultHandler = NULL; 1430 + parser->m_startDoctypeDeclHandler = NULL; 1431 + parser->m_endDoctypeDeclHandler = NULL; 1432 + parser->m_unparsedEntityDeclHandler = NULL; 1433 + parser->m_notationDeclHandler = NULL; 1434 + parser->m_startNamespaceDeclHandler = NULL; 1435 + parser->m_endNamespaceDeclHandler = NULL; 1436 + parser->m_notStandaloneHandler = NULL; 1437 + parser->m_externalEntityRefHandler = NULL; 1438 + parser->m_externalEntityRefHandlerArg = parser; 1439 + parser->m_skippedEntityHandler = NULL; 1440 + parser->m_elementDeclHandler = NULL; 1441 + parser->m_attlistDeclHandler = NULL; 1442 + parser->m_entityDeclHandler = NULL; 1443 + parser->m_xmlDeclHandler = NULL; 1444 + parser->m_bufferPtr = parser->m_buffer; 1445 + parser->m_bufferEnd = parser->m_buffer; 1446 + parser->m_parseEndByteIndex = 0; 1447 + parser->m_parseEndPtr = NULL; 1448 + parser->m_partialTokenBytesBefore = 0; 1449 + parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; 1450 + parser->m_lastBufferRequestSize = 0; 1451 + parser->m_declElementType = NULL; 1452 + parser->m_declAttributeId = NULL; 1453 + parser->m_declEntity = NULL; 1454 + parser->m_doctypeName = NULL; 1455 + parser->m_doctypeSysid = NULL; 1456 + parser->m_doctypePubid = NULL; 1457 + parser->m_declAttributeType = NULL; 1458 + parser->m_declNotationName = NULL; 1459 + parser->m_declNotationPublicId = NULL; 1460 + parser->m_declAttributeIsCdata = XML_FALSE; 1461 + parser->m_declAttributeIsId = XML_FALSE; 1462 + memset(&parser->m_position, 0, sizeof(POSITION)); 1463 + parser->m_errorCode = XML_ERROR_NONE; 1464 + parser->m_eventPtr = NULL; 1465 + parser->m_eventEndPtr = NULL; 1466 + parser->m_positionPtr = NULL; 1467 + parser->m_openInternalEntities = NULL; 1468 + parser->m_openAttributeEntities = NULL; 1469 + parser->m_openValueEntities = NULL; 1470 + parser->m_defaultExpandInternalEntities = XML_TRUE; 1471 + parser->m_tagLevel = 0; 1472 + parser->m_tagStack = NULL; 1473 + parser->m_inheritedBindings = NULL; 1474 + parser->m_nSpecifiedAtts = 0; 1475 + parser->m_unknownEncodingMem = NULL; 1476 + parser->m_unknownEncodingRelease = NULL; 1477 + parser->m_unknownEncodingData = NULL; 1478 + parser->m_parsingStatus.parsing = XML_INITIALIZED; 1479 + // Reentry can only be triggered inside m_processor calls 1480 + parser->m_reenter = XML_FALSE; 1481 + #ifdef XML_DTD 1482 + parser->m_isParamEntity = XML_FALSE; 1483 + parser->m_useForeignDTD = XML_FALSE; 1484 + parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 1485 + #endif 1486 + parser->m_hash_secret_salt = 0; 1487 + 1488 + #if XML_GE == 1 1489 + memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); 1490 + parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); 1491 + parser->m_accounting.maximumAmplificationFactor = 1492 + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; 1493 + parser->m_accounting.activationThresholdBytes = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; 1494 + 1495 + memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); 1496 + parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); 1497 + #endif 1498 + } 1499 + 1500 + /* moves list of bindings to m_freeBindingList */ 1501 + static void FASTCALL moveToFreeBindingList(XML_Parser parser, BINDING* bindings) { 1502 + while (bindings) { 1503 + BINDING* b = bindings; 1504 + bindings = bindings->nextTagBinding; 1505 + b->nextTagBinding = parser->m_freeBindingList; 1506 + parser->m_freeBindingList = b; 1507 + } 1508 + } 1509 + 1510 + XML_Bool XMLCALL XML_ParserReset(XML_Parser parser, const XML_Char* encodingName) { 1511 + TAG* tStk; 1512 + OPEN_INTERNAL_ENTITY* openEntityList; 1513 + 1514 + if (parser == NULL) return XML_FALSE; 1515 + 1516 + if (parser->m_parentParser) return XML_FALSE; 1517 + /* move m_tagStack to m_freeTagList */ 1518 + tStk = parser->m_tagStack; 1519 + while (tStk) { 1520 + TAG* tag = tStk; 1521 + tStk = tStk->parent; 1522 + tag->parent = parser->m_freeTagList; 1523 + moveToFreeBindingList(parser, tag->bindings); 1524 + tag->bindings = NULL; 1525 + parser->m_freeTagList = tag; 1526 + } 1527 + /* move m_openInternalEntities to m_freeInternalEntities */ 1528 + openEntityList = parser->m_openInternalEntities; 1529 + while (openEntityList) { 1530 + OPEN_INTERNAL_ENTITY* openEntity = openEntityList; 1531 + openEntityList = openEntity->next; 1532 + openEntity->next = parser->m_freeInternalEntities; 1533 + parser->m_freeInternalEntities = openEntity; 1534 + } 1535 + /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but 1536 + * for attributes) */ 1537 + openEntityList = parser->m_openAttributeEntities; 1538 + while (openEntityList) { 1539 + OPEN_INTERNAL_ENTITY* openEntity = openEntityList; 1540 + openEntityList = openEntity->next; 1541 + openEntity->next = parser->m_freeAttributeEntities; 1542 + parser->m_freeAttributeEntities = openEntity; 1543 + } 1544 + /* move m_openValueEntities to m_freeValueEntities (i.e. same task but 1545 + * for value entities) */ 1546 + openEntityList = parser->m_openValueEntities; 1547 + while (openEntityList) { 1548 + OPEN_INTERNAL_ENTITY* openEntity = openEntityList; 1549 + openEntityList = openEntity->next; 1550 + openEntity->next = parser->m_freeValueEntities; 1551 + parser->m_freeValueEntities = openEntity; 1552 + } 1553 + moveToFreeBindingList(parser, parser->m_inheritedBindings); 1554 + FREE(parser, parser->m_unknownEncodingMem); 1555 + if (parser->m_unknownEncodingRelease) parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 1556 + poolClear(&parser->m_tempPool); 1557 + poolClear(&parser->m_temp2Pool); 1558 + FREE(parser, (void*)parser->m_protocolEncodingName); 1559 + parser->m_protocolEncodingName = NULL; 1560 + parserInit(parser, encodingName); 1561 + dtdReset(parser->m_dtd, parser); 1562 + return XML_TRUE; 1563 + } 1564 + 1565 + static XML_Bool parserBusy(XML_Parser parser) { 1566 + switch (parser->m_parsingStatus.parsing) { 1567 + case XML_PARSING: 1568 + case XML_SUSPENDED: 1569 + return XML_TRUE; 1570 + case XML_INITIALIZED: 1571 + case XML_FINISHED: 1572 + default: 1573 + return XML_FALSE; 1574 + } 1575 + } 1576 + 1577 + enum XML_Status XMLCALL XML_SetEncoding(XML_Parser parser, const XML_Char* encodingName) { 1578 + if (parser == NULL) return XML_STATUS_ERROR; 1579 + /* Block after XML_Parse()/XML_ParseBuffer() has been called. 1580 + XXX There's no way for the caller to determine which of the 1581 + XXX possible error cases caused the XML_STATUS_ERROR return. 1582 + */ 1583 + if (parserBusy(parser)) return XML_STATUS_ERROR; 1584 + 1585 + /* Get rid of any previous encoding name */ 1586 + FREE(parser, (void*)parser->m_protocolEncodingName); 1587 + 1588 + if (encodingName == NULL) /* No new encoding name */ 1589 + parser->m_protocolEncodingName = NULL; 1590 + else { 1591 + /* Copy the new encoding name into allocated memory */ 1592 + parser->m_protocolEncodingName = copyString(encodingName, parser); 1593 + if (!parser->m_protocolEncodingName) return XML_STATUS_ERROR; 1594 + } 1595 + return XML_STATUS_OK; 1596 + } 1597 + 1598 + XML_Parser XMLCALL XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char* context, 1599 + const XML_Char* encodingName) { 1600 + XML_Parser parser = oldParser; 1601 + DTD* newDtd = NULL; 1602 + DTD* oldDtd; 1603 + XML_StartElementHandler oldStartElementHandler; 1604 + XML_EndElementHandler oldEndElementHandler; 1605 + XML_CharacterDataHandler oldCharacterDataHandler; 1606 + XML_ProcessingInstructionHandler oldProcessingInstructionHandler; 1607 + XML_CommentHandler oldCommentHandler; 1608 + XML_StartCdataSectionHandler oldStartCdataSectionHandler; 1609 + XML_EndCdataSectionHandler oldEndCdataSectionHandler; 1610 + XML_DefaultHandler oldDefaultHandler; 1611 + XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; 1612 + XML_NotationDeclHandler oldNotationDeclHandler; 1613 + XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; 1614 + XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; 1615 + XML_NotStandaloneHandler oldNotStandaloneHandler; 1616 + XML_ExternalEntityRefHandler oldExternalEntityRefHandler; 1617 + XML_SkippedEntityHandler oldSkippedEntityHandler; 1618 + XML_UnknownEncodingHandler oldUnknownEncodingHandler; 1619 + XML_ElementDeclHandler oldElementDeclHandler; 1620 + XML_AttlistDeclHandler oldAttlistDeclHandler; 1621 + XML_EntityDeclHandler oldEntityDeclHandler; 1622 + XML_XmlDeclHandler oldXmlDeclHandler; 1623 + ELEMENT_TYPE* oldDeclElementType; 1624 + 1625 + void* oldUserData; 1626 + void* oldHandlerArg; 1627 + XML_Bool oldDefaultExpandInternalEntities; 1628 + XML_Parser oldExternalEntityRefHandlerArg; 1629 + #ifdef XML_DTD 1630 + enum XML_ParamEntityParsing oldParamEntityParsing; 1631 + int oldInEntityValue; 1632 + #endif 1633 + XML_Bool oldns_triplets; 1634 + /* Note that the new parser shares the same hash secret as the old 1635 + parser, so that dtdCopy and copyEntityTable can lookup values 1636 + from hash tables associated with either parser without us having 1637 + to worry which hash secrets each table has. 1638 + */ 1639 + unsigned long oldhash_secret_salt; 1640 + XML_Bool oldReparseDeferralEnabled; 1641 + 1642 + /* Validate the oldParser parameter before we pull everything out of it */ 1643 + if (oldParser == NULL) return NULL; 1644 + 1645 + /* Stash the original parser contents on the stack */ 1646 + oldDtd = parser->m_dtd; 1647 + oldStartElementHandler = parser->m_startElementHandler; 1648 + oldEndElementHandler = parser->m_endElementHandler; 1649 + oldCharacterDataHandler = parser->m_characterDataHandler; 1650 + oldProcessingInstructionHandler = parser->m_processingInstructionHandler; 1651 + oldCommentHandler = parser->m_commentHandler; 1652 + oldStartCdataSectionHandler = parser->m_startCdataSectionHandler; 1653 + oldEndCdataSectionHandler = parser->m_endCdataSectionHandler; 1654 + oldDefaultHandler = parser->m_defaultHandler; 1655 + oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler; 1656 + oldNotationDeclHandler = parser->m_notationDeclHandler; 1657 + oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler; 1658 + oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler; 1659 + oldNotStandaloneHandler = parser->m_notStandaloneHandler; 1660 + oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; 1661 + oldSkippedEntityHandler = parser->m_skippedEntityHandler; 1662 + oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; 1663 + oldElementDeclHandler = parser->m_elementDeclHandler; 1664 + oldAttlistDeclHandler = parser->m_attlistDeclHandler; 1665 + oldEntityDeclHandler = parser->m_entityDeclHandler; 1666 + oldXmlDeclHandler = parser->m_xmlDeclHandler; 1667 + oldDeclElementType = parser->m_declElementType; 1668 + 1669 + oldUserData = parser->m_userData; 1670 + oldHandlerArg = parser->m_handlerArg; 1671 + oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities; 1672 + oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg; 1673 + #ifdef XML_DTD 1674 + oldParamEntityParsing = parser->m_paramEntityParsing; 1675 + oldInEntityValue = parser->m_prologState.inEntityValue; 1676 + #endif 1677 + oldns_triplets = parser->m_ns_triplets; 1678 + /* Note that the new parser shares the same hash secret as the old 1679 + parser, so that dtdCopy and copyEntityTable can lookup values 1680 + from hash tables associated with either parser without us having 1681 + to worry which hash secrets each table has. 1682 + */ 1683 + oldhash_secret_salt = parser->m_hash_secret_salt; 1684 + oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; 1685 + 1686 + #ifdef XML_DTD 1687 + if (!context) newDtd = oldDtd; 1688 + #endif /* XML_DTD */ 1689 + 1690 + /* Note that the magical uses of the pre-processor to make field 1691 + access look more like C++ require that `parser' be overwritten 1692 + here. This makes this function more painful to follow than it 1693 + would be otherwise. 1694 + */ 1695 + if (parser->m_ns) { 1696 + XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; 1697 + parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser); 1698 + } else { 1699 + parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser); 1700 + } 1701 + 1702 + if (!parser) return NULL; 1703 + 1704 + parser->m_startElementHandler = oldStartElementHandler; 1705 + parser->m_endElementHandler = oldEndElementHandler; 1706 + parser->m_characterDataHandler = oldCharacterDataHandler; 1707 + parser->m_processingInstructionHandler = oldProcessingInstructionHandler; 1708 + parser->m_commentHandler = oldCommentHandler; 1709 + parser->m_startCdataSectionHandler = oldStartCdataSectionHandler; 1710 + parser->m_endCdataSectionHandler = oldEndCdataSectionHandler; 1711 + parser->m_defaultHandler = oldDefaultHandler; 1712 + parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; 1713 + parser->m_notationDeclHandler = oldNotationDeclHandler; 1714 + parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler; 1715 + parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler; 1716 + parser->m_notStandaloneHandler = oldNotStandaloneHandler; 1717 + parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; 1718 + parser->m_skippedEntityHandler = oldSkippedEntityHandler; 1719 + parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; 1720 + parser->m_elementDeclHandler = oldElementDeclHandler; 1721 + parser->m_attlistDeclHandler = oldAttlistDeclHandler; 1722 + parser->m_entityDeclHandler = oldEntityDeclHandler; 1723 + parser->m_xmlDeclHandler = oldXmlDeclHandler; 1724 + parser->m_declElementType = oldDeclElementType; 1725 + parser->m_userData = oldUserData; 1726 + if (oldUserData == oldHandlerArg) 1727 + parser->m_handlerArg = parser->m_userData; 1728 + else 1729 + parser->m_handlerArg = parser; 1730 + if (oldExternalEntityRefHandlerArg != oldParser) 1731 + parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; 1732 + parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; 1733 + parser->m_ns_triplets = oldns_triplets; 1734 + parser->m_hash_secret_salt = oldhash_secret_salt; 1735 + parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; 1736 + parser->m_parentParser = oldParser; 1737 + #ifdef XML_DTD 1738 + parser->m_paramEntityParsing = oldParamEntityParsing; 1739 + parser->m_prologState.inEntityValue = oldInEntityValue; 1740 + if (context) { 1741 + #endif /* XML_DTD */ 1742 + if (!dtdCopy(oldParser, parser->m_dtd, oldDtd, parser) || !setContext(parser, context)) { 1743 + XML_ParserFree(parser); 1744 + return NULL; 1745 + } 1746 + parser->m_processor = externalEntityInitProcessor; 1747 + #ifdef XML_DTD 1748 + } else { 1749 + /* The DTD instance referenced by parser->m_dtd is shared between the 1750 + document's root parser and external PE parsers, therefore one does not 1751 + need to call setContext. In addition, one also *must* not call 1752 + setContext, because this would overwrite existing prefix->binding 1753 + pointers in parser->m_dtd with ones that get destroyed with the external 1754 + PE parser. This would leave those prefixes with dangling pointers. 1755 + */ 1756 + parser->m_isParamEntity = XML_TRUE; 1757 + XmlPrologStateInitExternalEntity(&parser->m_prologState); 1758 + parser->m_processor = externalParEntInitProcessor; 1759 + } 1760 + #endif /* XML_DTD */ 1761 + return parser; 1762 + } 1763 + 1764 + static void FASTCALL destroyBindings(BINDING* bindings, XML_Parser parser) { 1765 + for (;;) { 1766 + BINDING* b = bindings; 1767 + if (!b) break; 1768 + bindings = b->nextTagBinding; 1769 + FREE(parser, b->uri); 1770 + FREE(parser, b); 1771 + } 1772 + } 1773 + 1774 + void XMLCALL XML_ParserFree(XML_Parser parser) { 1775 + TAG* tagList; 1776 + OPEN_INTERNAL_ENTITY* entityList; 1777 + if (parser == NULL) return; 1778 + /* free m_tagStack and m_freeTagList */ 1779 + tagList = parser->m_tagStack; 1780 + for (;;) { 1781 + TAG* p; 1782 + if (tagList == NULL) { 1783 + if (parser->m_freeTagList == NULL) break; 1784 + tagList = parser->m_freeTagList; 1785 + parser->m_freeTagList = NULL; 1786 + } 1787 + p = tagList; 1788 + tagList = tagList->parent; 1789 + FREE(parser, p->buf); 1790 + destroyBindings(p->bindings, parser); 1791 + FREE(parser, p); 1792 + } 1793 + /* free m_openInternalEntities and m_freeInternalEntities */ 1794 + entityList = parser->m_openInternalEntities; 1795 + for (;;) { 1796 + OPEN_INTERNAL_ENTITY* openEntity; 1797 + if (entityList == NULL) { 1798 + if (parser->m_freeInternalEntities == NULL) break; 1799 + entityList = parser->m_freeInternalEntities; 1800 + parser->m_freeInternalEntities = NULL; 1801 + } 1802 + openEntity = entityList; 1803 + entityList = entityList->next; 1804 + FREE(parser, openEntity); 1805 + } 1806 + /* free m_openAttributeEntities and m_freeAttributeEntities */ 1807 + entityList = parser->m_openAttributeEntities; 1808 + for (;;) { 1809 + OPEN_INTERNAL_ENTITY* openEntity; 1810 + if (entityList == NULL) { 1811 + if (parser->m_freeAttributeEntities == NULL) break; 1812 + entityList = parser->m_freeAttributeEntities; 1813 + parser->m_freeAttributeEntities = NULL; 1814 + } 1815 + openEntity = entityList; 1816 + entityList = entityList->next; 1817 + FREE(parser, openEntity); 1818 + } 1819 + /* free m_openValueEntities and m_freeValueEntities */ 1820 + entityList = parser->m_openValueEntities; 1821 + for (;;) { 1822 + OPEN_INTERNAL_ENTITY* openEntity; 1823 + if (entityList == NULL) { 1824 + if (parser->m_freeValueEntities == NULL) break; 1825 + entityList = parser->m_freeValueEntities; 1826 + parser->m_freeValueEntities = NULL; 1827 + } 1828 + openEntity = entityList; 1829 + entityList = entityList->next; 1830 + FREE(parser, openEntity); 1831 + } 1832 + destroyBindings(parser->m_freeBindingList, parser); 1833 + destroyBindings(parser->m_inheritedBindings, parser); 1834 + poolDestroy(&parser->m_tempPool); 1835 + poolDestroy(&parser->m_temp2Pool); 1836 + FREE(parser, (void*)parser->m_protocolEncodingName); 1837 + #ifdef XML_DTD 1838 + /* external parameter entity parsers share the DTD structure 1839 + parser->m_dtd with the root parser, so we must not destroy it 1840 + */ 1841 + if (!parser->m_isParamEntity && parser->m_dtd) 1842 + #else 1843 + if (parser->m_dtd) 1844 + #endif /* XML_DTD */ 1845 + dtdDestroy(parser->m_dtd, (XML_Bool)!parser->m_parentParser, parser); 1846 + FREE(parser, parser->m_atts); 1847 + #ifdef XML_ATTR_INFO 1848 + FREE(parser, parser->m_attInfo); 1849 + #endif 1850 + FREE(parser, parser->m_groupConnector); 1851 + // NOTE: We are avoiding FREE(..) here because parser->m_buffer 1852 + // is not being allocated with MALLOC(..) but with plain 1853 + // .malloc_fcn(..). 1854 + parser->m_mem.free_fcn(parser->m_buffer); 1855 + FREE(parser, parser->m_dataBuf); 1856 + FREE(parser, parser->m_nsAtts); 1857 + FREE(parser, parser->m_unknownEncodingMem); 1858 + if (parser->m_unknownEncodingRelease) parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 1859 + FREE(parser, parser); 1860 + } 1861 + 1862 + void XMLCALL XML_UseParserAsHandlerArg(XML_Parser parser) { 1863 + if (parser != NULL) parser->m_handlerArg = parser; 1864 + } 1865 + 1866 + enum XML_Error XMLCALL XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { 1867 + if (parser == NULL) return XML_ERROR_INVALID_ARGUMENT; 1868 + #ifdef XML_DTD 1869 + /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1870 + if (parserBusy(parser)) return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; 1871 + parser->m_useForeignDTD = useDTD; 1872 + return XML_ERROR_NONE; 1873 + #else 1874 + UNUSED_P(useDTD); 1875 + return XML_ERROR_FEATURE_REQUIRES_XML_DTD; 1876 + #endif 1877 + } 1878 + 1879 + void XMLCALL XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { 1880 + if (parser == NULL) return; 1881 + /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1882 + if (parserBusy(parser)) return; 1883 + parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; 1884 + } 1885 + 1886 + void XMLCALL XML_SetUserData(XML_Parser parser, void* p) { 1887 + if (parser == NULL) return; 1888 + if (parser->m_handlerArg == parser->m_userData) 1889 + parser->m_handlerArg = parser->m_userData = p; 1890 + else 1891 + parser->m_userData = p; 1892 + } 1893 + 1894 + enum XML_Status XMLCALL XML_SetBase(XML_Parser parser, const XML_Char* p) { 1895 + if (parser == NULL) return XML_STATUS_ERROR; 1896 + if (p) { 1897 + p = poolCopyString(&parser->m_dtd->pool, p); 1898 + if (!p) return XML_STATUS_ERROR; 1899 + parser->m_curBase = p; 1900 + } else 1901 + parser->m_curBase = NULL; 1902 + return XML_STATUS_OK; 1903 + } 1904 + 1905 + const XML_Char* XMLCALL XML_GetBase(XML_Parser parser) { 1906 + if (parser == NULL) return NULL; 1907 + return parser->m_curBase; 1908 + } 1909 + 1910 + int XMLCALL XML_GetSpecifiedAttributeCount(XML_Parser parser) { 1911 + if (parser == NULL) return -1; 1912 + return parser->m_nSpecifiedAtts; 1913 + } 1914 + 1915 + int XMLCALL XML_GetIdAttributeIndex(XML_Parser parser) { 1916 + if (parser == NULL) return -1; 1917 + return parser->m_idAttIndex; 1918 + } 1919 + 1920 + #ifdef XML_ATTR_INFO 1921 + const XML_AttrInfo* XMLCALL XML_GetAttributeInfo(XML_Parser parser) { 1922 + if (parser == NULL) return NULL; 1923 + return parser->m_attInfo; 1924 + } 1925 + #endif 1926 + 1927 + void XMLCALL XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end) { 1928 + if (parser == NULL) return; 1929 + parser->m_startElementHandler = start; 1930 + parser->m_endElementHandler = end; 1931 + } 1932 + 1933 + void XMLCALL XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { 1934 + if (parser != NULL) parser->m_startElementHandler = start; 1935 + } 1936 + 1937 + void XMLCALL XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { 1938 + if (parser != NULL) parser->m_endElementHandler = end; 1939 + } 1940 + 1941 + void XMLCALL XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler) { 1942 + if (parser != NULL) parser->m_characterDataHandler = handler; 1943 + } 1944 + 1945 + void XMLCALL XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler) { 1946 + if (parser != NULL) parser->m_processingInstructionHandler = handler; 1947 + } 1948 + 1949 + void XMLCALL XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { 1950 + if (parser != NULL) parser->m_commentHandler = handler; 1951 + } 1952 + 1953 + void XMLCALL XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, 1954 + XML_EndCdataSectionHandler end) { 1955 + if (parser == NULL) return; 1956 + parser->m_startCdataSectionHandler = start; 1957 + parser->m_endCdataSectionHandler = end; 1958 + } 1959 + 1960 + void XMLCALL XML_SetStartCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start) { 1961 + if (parser != NULL) parser->m_startCdataSectionHandler = start; 1962 + } 1963 + 1964 + void XMLCALL XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end) { 1965 + if (parser != NULL) parser->m_endCdataSectionHandler = end; 1966 + } 1967 + 1968 + void XMLCALL XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { 1969 + if (parser == NULL) return; 1970 + parser->m_defaultHandler = handler; 1971 + parser->m_defaultExpandInternalEntities = XML_FALSE; 1972 + } 1973 + 1974 + void XMLCALL XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { 1975 + if (parser == NULL) return; 1976 + parser->m_defaultHandler = handler; 1977 + parser->m_defaultExpandInternalEntities = XML_TRUE; 1978 + } 1979 + 1980 + void XMLCALL XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, 1981 + XML_EndDoctypeDeclHandler end) { 1982 + if (parser == NULL) return; 1983 + parser->m_startDoctypeDeclHandler = start; 1984 + parser->m_endDoctypeDeclHandler = end; 1985 + } 1986 + 1987 + void XMLCALL XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start) { 1988 + if (parser != NULL) parser->m_startDoctypeDeclHandler = start; 1989 + } 1990 + 1991 + void XMLCALL XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { 1992 + if (parser != NULL) parser->m_endDoctypeDeclHandler = end; 1993 + } 1994 + 1995 + void XMLCALL XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler) { 1996 + if (parser != NULL) parser->m_unparsedEntityDeclHandler = handler; 1997 + } 1998 + 1999 + void XMLCALL XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { 2000 + if (parser != NULL) parser->m_notationDeclHandler = handler; 2001 + } 2002 + 2003 + void XMLCALL XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, 2004 + XML_EndNamespaceDeclHandler end) { 2005 + if (parser == NULL) return; 2006 + parser->m_startNamespaceDeclHandler = start; 2007 + parser->m_endNamespaceDeclHandler = end; 2008 + } 2009 + 2010 + void XMLCALL XML_SetStartNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start) { 2011 + if (parser != NULL) parser->m_startNamespaceDeclHandler = start; 2012 + } 2013 + 2014 + void XMLCALL XML_SetEndNamespaceDeclHandler(XML_Parser parser, XML_EndNamespaceDeclHandler end) { 2015 + if (parser != NULL) parser->m_endNamespaceDeclHandler = end; 2016 + } 2017 + 2018 + void XMLCALL XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler) { 2019 + if (parser != NULL) parser->m_notStandaloneHandler = handler; 2020 + } 2021 + 2022 + void XMLCALL XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler handler) { 2023 + if (parser != NULL) parser->m_externalEntityRefHandler = handler; 2024 + } 2025 + 2026 + void XMLCALL XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void* arg) { 2027 + if (parser == NULL) return; 2028 + if (arg) 2029 + parser->m_externalEntityRefHandlerArg = (XML_Parser)arg; 2030 + else 2031 + parser->m_externalEntityRefHandlerArg = parser; 2032 + } 2033 + 2034 + void XMLCALL XML_SetSkippedEntityHandler(XML_Parser parser, XML_SkippedEntityHandler handler) { 2035 + if (parser != NULL) parser->m_skippedEntityHandler = handler; 2036 + } 2037 + 2038 + void XMLCALL XML_SetUnknownEncodingHandler(XML_Parser parser, XML_UnknownEncodingHandler handler, void* data) { 2039 + if (parser == NULL) return; 2040 + parser->m_unknownEncodingHandler = handler; 2041 + parser->m_unknownEncodingHandlerData = data; 2042 + } 2043 + 2044 + void XMLCALL XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { 2045 + if (parser != NULL) parser->m_elementDeclHandler = eldecl; 2046 + } 2047 + 2048 + void XMLCALL XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { 2049 + if (parser != NULL) parser->m_attlistDeclHandler = attdecl; 2050 + } 2051 + 2052 + void XMLCALL XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { 2053 + if (parser != NULL) parser->m_entityDeclHandler = handler; 2054 + } 2055 + 2056 + void XMLCALL XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { 2057 + if (parser != NULL) parser->m_xmlDeclHandler = handler; 2058 + } 2059 + 2060 + int XMLCALL XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing peParsing) { 2061 + if (parser == NULL) return 0; 2062 + /* block after XML_Parse()/XML_ParseBuffer() has been called */ 2063 + if (parserBusy(parser)) return 0; 2064 + #ifdef XML_DTD 2065 + parser->m_paramEntityParsing = peParsing; 2066 + return 1; 2067 + #else 2068 + return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; 2069 + #endif 2070 + } 2071 + 2072 + int XMLCALL XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { 2073 + if (parser == NULL) return 0; 2074 + 2075 + const XML_Parser rootParser = getRootParserOf(parser, NULL); 2076 + assert(!rootParser->m_parentParser); 2077 + 2078 + /* block after XML_Parse()/XML_ParseBuffer() has been called */ 2079 + if (parserBusy(rootParser)) return 0; 2080 + rootParser->m_hash_secret_salt = hash_salt; 2081 + return 1; 2082 + } 2083 + 2084 + enum XML_Status XMLCALL XML_Parse(XML_Parser parser, const char* s, int len, int isFinal) { 2085 + if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { 2086 + if (parser != NULL) parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 2087 + return XML_STATUS_ERROR; 2088 + } 2089 + switch (parser->m_parsingStatus.parsing) { 2090 + case XML_SUSPENDED: 2091 + parser->m_errorCode = XML_ERROR_SUSPENDED; 2092 + return XML_STATUS_ERROR; 2093 + case XML_FINISHED: 2094 + parser->m_errorCode = XML_ERROR_FINISHED; 2095 + return XML_STATUS_ERROR; 2096 + case XML_INITIALIZED: 2097 + if (parser->m_parentParser == NULL && !startParsing(parser)) { 2098 + parser->m_errorCode = XML_ERROR_NO_MEMORY; 2099 + return XML_STATUS_ERROR; 2100 + } 2101 + /* fall through */ 2102 + default: 2103 + parser->m_parsingStatus.parsing = XML_PARSING; 2104 + } 2105 + 2106 + #if XML_CONTEXT_BYTES == 0 2107 + if (parser->m_bufferPtr == parser->m_bufferEnd) { 2108 + const char* end; 2109 + int nLeftOver; 2110 + enum XML_Status result; 2111 + /* Detect overflow (a+b > MAX <==> b > MAX-a) */ 2112 + if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { 2113 + parser->m_errorCode = XML_ERROR_NO_MEMORY; 2114 + parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2115 + parser->m_processor = errorProcessor; 2116 + return XML_STATUS_ERROR; 2117 + } 2118 + // though this isn't a buffer request, we assume that `len` is the app's 2119 + // preferred buffer fill size, and therefore save it here. 2120 + parser->m_lastBufferRequestSize = len; 2121 + parser->m_parseEndByteIndex += len; 2122 + parser->m_positionPtr = s; 2123 + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 2124 + 2125 + parser->m_errorCode = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); 2126 + 2127 + if (parser->m_errorCode != XML_ERROR_NONE) { 2128 + parser->m_eventEndPtr = parser->m_eventPtr; 2129 + parser->m_processor = errorProcessor; 2130 + return XML_STATUS_ERROR; 2131 + } else { 2132 + switch (parser->m_parsingStatus.parsing) { 2133 + case XML_SUSPENDED: 2134 + result = XML_STATUS_SUSPENDED; 2135 + break; 2136 + case XML_INITIALIZED: 2137 + case XML_PARSING: 2138 + if (isFinal) { 2139 + parser->m_parsingStatus.parsing = XML_FINISHED; 2140 + return XML_STATUS_OK; 2141 + } 2142 + /* fall through */ 2143 + default: 2144 + result = XML_STATUS_OK; 2145 + } 2146 + } 2147 + 2148 + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, &parser->m_position); 2149 + nLeftOver = s + len - end; 2150 + if (nLeftOver) { 2151 + // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED 2152 + // (and XML_ERROR_FINISHED) from XML_GetBuffer. 2153 + const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; 2154 + parser->m_parsingStatus.parsing = XML_PARSING; 2155 + void* const temp = XML_GetBuffer(parser, nLeftOver); 2156 + parser->m_parsingStatus.parsing = originalStatus; 2157 + // GetBuffer may have overwritten this, but we want to remember what the 2158 + // app requested, not how many bytes were left over after parsing. 2159 + parser->m_lastBufferRequestSize = len; 2160 + if (temp == NULL) { 2161 + // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). 2162 + parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2163 + parser->m_processor = errorProcessor; 2164 + return XML_STATUS_ERROR; 2165 + } 2166 + // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we 2167 + // don't have any data to preserve, and can copy straight into the start 2168 + // of the buffer rather than the GetBuffer return pointer (which may be 2169 + // pointing further into the allocated buffer). 2170 + memcpy(parser->m_buffer, end, nLeftOver); 2171 + } 2172 + parser->m_bufferPtr = parser->m_buffer; 2173 + parser->m_bufferEnd = parser->m_buffer + nLeftOver; 2174 + parser->m_positionPtr = parser->m_bufferPtr; 2175 + parser->m_parseEndPtr = parser->m_bufferEnd; 2176 + parser->m_eventPtr = parser->m_bufferPtr; 2177 + parser->m_eventEndPtr = parser->m_bufferPtr; 2178 + return result; 2179 + } 2180 + #endif /* XML_CONTEXT_BYTES == 0 */ 2181 + void* buff = XML_GetBuffer(parser, len); 2182 + if (buff == NULL) return XML_STATUS_ERROR; 2183 + if (len > 0) { 2184 + assert(s != NULL); // make sure s==NULL && len!=0 was rejected above 2185 + memcpy(buff, s, len); 2186 + } 2187 + return XML_ParseBuffer(parser, len, isFinal); 2188 + } 2189 + 2190 + enum XML_Status XMLCALL XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { 2191 + const char* start; 2192 + enum XML_Status result = XML_STATUS_OK; 2193 + 2194 + if (parser == NULL) return XML_STATUS_ERROR; 2195 + 2196 + if (len < 0) { 2197 + parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 2198 + return XML_STATUS_ERROR; 2199 + } 2200 + 2201 + switch (parser->m_parsingStatus.parsing) { 2202 + case XML_SUSPENDED: 2203 + parser->m_errorCode = XML_ERROR_SUSPENDED; 2204 + return XML_STATUS_ERROR; 2205 + case XML_FINISHED: 2206 + parser->m_errorCode = XML_ERROR_FINISHED; 2207 + return XML_STATUS_ERROR; 2208 + case XML_INITIALIZED: 2209 + /* Has someone called XML_GetBuffer successfully before? */ 2210 + if (!parser->m_bufferPtr) { 2211 + parser->m_errorCode = XML_ERROR_NO_BUFFER; 2212 + return XML_STATUS_ERROR; 2213 + } 2214 + 2215 + if (parser->m_parentParser == NULL && !startParsing(parser)) { 2216 + parser->m_errorCode = XML_ERROR_NO_MEMORY; 2217 + return XML_STATUS_ERROR; 2218 + } 2219 + /* fall through */ 2220 + default: 2221 + parser->m_parsingStatus.parsing = XML_PARSING; 2222 + } 2223 + 2224 + start = parser->m_bufferPtr; 2225 + parser->m_positionPtr = start; 2226 + parser->m_bufferEnd += len; 2227 + parser->m_parseEndPtr = parser->m_bufferEnd; 2228 + parser->m_parseEndByteIndex += len; 2229 + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 2230 + 2231 + parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); 2232 + 2233 + if (parser->m_errorCode != XML_ERROR_NONE) { 2234 + parser->m_eventEndPtr = parser->m_eventPtr; 2235 + parser->m_processor = errorProcessor; 2236 + return XML_STATUS_ERROR; 2237 + } else { 2238 + switch (parser->m_parsingStatus.parsing) { 2239 + case XML_SUSPENDED: 2240 + result = XML_STATUS_SUSPENDED; 2241 + break; 2242 + case XML_INITIALIZED: 2243 + case XML_PARSING: 2244 + if (isFinal) { 2245 + parser->m_parsingStatus.parsing = XML_FINISHED; 2246 + return result; 2247 + } 2248 + default:; /* should not happen */ 2249 + } 2250 + } 2251 + 2252 + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position); 2253 + parser->m_positionPtr = parser->m_bufferPtr; 2254 + return result; 2255 + } 2256 + 2257 + void* XMLCALL XML_GetBuffer(XML_Parser parser, int len) { 2258 + if (parser == NULL) return NULL; 2259 + if (len < 0) { 2260 + parser->m_errorCode = XML_ERROR_NO_MEMORY; 2261 + return NULL; 2262 + } 2263 + switch (parser->m_parsingStatus.parsing) { 2264 + case XML_SUSPENDED: 2265 + parser->m_errorCode = XML_ERROR_SUSPENDED; 2266 + return NULL; 2267 + case XML_FINISHED: 2268 + parser->m_errorCode = XML_ERROR_FINISHED; 2269 + return NULL; 2270 + default:; 2271 + } 2272 + 2273 + // whether or not the request succeeds, `len` seems to be the app's preferred 2274 + // buffer fill size; remember it. 2275 + parser->m_lastBufferRequestSize = len; 2276 + if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) || parser->m_buffer == NULL) { 2277 + #if XML_CONTEXT_BYTES > 0 2278 + int keep; 2279 + #endif /* XML_CONTEXT_BYTES > 0 */ 2280 + /* Do not invoke signed arithmetic overflow: */ 2281 + int neededSize = (int)((unsigned)len + (unsigned)EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2282 + if (neededSize < 0) { 2283 + parser->m_errorCode = XML_ERROR_NO_MEMORY; 2284 + return NULL; 2285 + } 2286 + #if XML_CONTEXT_BYTES > 0 2287 + keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 2288 + if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; 2289 + /* Detect and prevent integer overflow */ 2290 + if (keep > INT_MAX - neededSize) { 2291 + parser->m_errorCode = XML_ERROR_NO_MEMORY; 2292 + return NULL; 2293 + } 2294 + neededSize += keep; 2295 + #endif /* XML_CONTEXT_BYTES > 0 */ 2296 + if (parser->m_buffer && parser->m_bufferPtr && 2297 + neededSize <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { 2298 + #if XML_CONTEXT_BYTES > 0 2299 + if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { 2300 + int offset = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) - keep; 2301 + /* The buffer pointers cannot be NULL here; we have at least some bytes 2302 + * in the buffer */ 2303 + memmove(parser->m_buffer, &parser->m_buffer[offset], parser->m_bufferEnd - parser->m_bufferPtr + keep); 2304 + parser->m_bufferEnd -= offset; 2305 + parser->m_bufferPtr -= offset; 2306 + } 2307 + #else 2308 + memmove(parser->m_buffer, parser->m_bufferPtr, EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2309 + parser->m_bufferEnd = parser->m_buffer + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2310 + parser->m_bufferPtr = parser->m_buffer; 2311 + #endif /* XML_CONTEXT_BYTES > 0 */ 2312 + } else { 2313 + char* newBuf; 2314 + int bufferSize = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); 2315 + if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; 2316 + do { 2317 + /* Do not invoke signed arithmetic overflow: */ 2318 + bufferSize = (int)(2U * (unsigned)bufferSize); 2319 + } while (bufferSize < neededSize && bufferSize > 0); 2320 + if (bufferSize <= 0) { 2321 + parser->m_errorCode = XML_ERROR_NO_MEMORY; 2322 + return NULL; 2323 + } 2324 + // NOTE: We are avoiding MALLOC(..) here to leave limiting 2325 + // the input size to the application using Expat. 2326 + newBuf = parser->m_mem.malloc_fcn(bufferSize); 2327 + if (newBuf == 0) { 2328 + parser->m_errorCode = XML_ERROR_NO_MEMORY; 2329 + return NULL; 2330 + } 2331 + parser->m_bufferLim = newBuf + bufferSize; 2332 + #if XML_CONTEXT_BYTES > 0 2333 + if (parser->m_bufferPtr) { 2334 + memcpy(newBuf, &parser->m_bufferPtr[-keep], 2335 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep); 2336 + // NOTE: We are avoiding FREE(..) here because parser->m_buffer 2337 + // is not being allocated with MALLOC(..) but with plain 2338 + // .malloc_fcn(..). 2339 + parser->m_mem.free_fcn(parser->m_buffer); 2340 + parser->m_buffer = newBuf; 2341 + parser->m_bufferEnd = parser->m_buffer + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep; 2342 + parser->m_bufferPtr = parser->m_buffer + keep; 2343 + } else { 2344 + /* This must be a brand new buffer with no data in it yet */ 2345 + parser->m_bufferEnd = newBuf; 2346 + parser->m_bufferPtr = parser->m_buffer = newBuf; 2347 + } 2348 + #else 2349 + if (parser->m_bufferPtr) { 2350 + memcpy(newBuf, parser->m_bufferPtr, EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2351 + // NOTE: We are avoiding FREE(..) here because parser->m_buffer 2352 + // is not being allocated with MALLOC(..) but with plain 2353 + // .malloc_fcn(..). 2354 + parser->m_mem.free_fcn(parser->m_buffer); 2355 + parser->m_bufferEnd = newBuf + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2356 + } else { 2357 + /* This must be a brand new buffer with no data in it yet */ 2358 + parser->m_bufferEnd = newBuf; 2359 + } 2360 + parser->m_bufferPtr = parser->m_buffer = newBuf; 2361 + #endif /* XML_CONTEXT_BYTES > 0 */ 2362 + } 2363 + parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2364 + parser->m_positionPtr = NULL; 2365 + } 2366 + return parser->m_bufferEnd; 2367 + } 2368 + 2369 + static void triggerReenter(XML_Parser parser) { parser->m_reenter = XML_TRUE; } 2370 + 2371 + enum XML_Status XMLCALL XML_StopParser(XML_Parser parser, XML_Bool resumable) { 2372 + if (parser == NULL) return XML_STATUS_ERROR; 2373 + switch (parser->m_parsingStatus.parsing) { 2374 + case XML_INITIALIZED: 2375 + parser->m_errorCode = XML_ERROR_NOT_STARTED; 2376 + return XML_STATUS_ERROR; 2377 + case XML_SUSPENDED: 2378 + if (resumable) { 2379 + parser->m_errorCode = XML_ERROR_SUSPENDED; 2380 + return XML_STATUS_ERROR; 2381 + } 2382 + parser->m_parsingStatus.parsing = XML_FINISHED; 2383 + break; 2384 + case XML_FINISHED: 2385 + parser->m_errorCode = XML_ERROR_FINISHED; 2386 + return XML_STATUS_ERROR; 2387 + case XML_PARSING: 2388 + if (resumable) { 2389 + #ifdef XML_DTD 2390 + if (parser->m_isParamEntity) { 2391 + parser->m_errorCode = XML_ERROR_SUSPEND_PE; 2392 + return XML_STATUS_ERROR; 2393 + } 2394 + #endif 2395 + parser->m_parsingStatus.parsing = XML_SUSPENDED; 2396 + } else 2397 + parser->m_parsingStatus.parsing = XML_FINISHED; 2398 + break; 2399 + default: 2400 + assert(0); 2401 + } 2402 + return XML_STATUS_OK; 2403 + } 2404 + 2405 + enum XML_Status XMLCALL XML_ResumeParser(XML_Parser parser) { 2406 + enum XML_Status result = XML_STATUS_OK; 2407 + 2408 + if (parser == NULL) return XML_STATUS_ERROR; 2409 + if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { 2410 + parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; 2411 + return XML_STATUS_ERROR; 2412 + } 2413 + parser->m_parsingStatus.parsing = XML_PARSING; 2414 + 2415 + parser->m_errorCode = callProcessor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); 2416 + 2417 + if (parser->m_errorCode != XML_ERROR_NONE) { 2418 + parser->m_eventEndPtr = parser->m_eventPtr; 2419 + parser->m_processor = errorProcessor; 2420 + return XML_STATUS_ERROR; 2421 + } else { 2422 + switch (parser->m_parsingStatus.parsing) { 2423 + case XML_SUSPENDED: 2424 + result = XML_STATUS_SUSPENDED; 2425 + break; 2426 + case XML_INITIALIZED: 2427 + case XML_PARSING: 2428 + if (parser->m_parsingStatus.finalBuffer) { 2429 + parser->m_parsingStatus.parsing = XML_FINISHED; 2430 + return result; 2431 + } 2432 + default:; 2433 + } 2434 + } 2435 + 2436 + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position); 2437 + parser->m_positionPtr = parser->m_bufferPtr; 2438 + return result; 2439 + } 2440 + 2441 + void XMLCALL XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus* status) { 2442 + if (parser == NULL) return; 2443 + assert(status != NULL); 2444 + *status = parser->m_parsingStatus; 2445 + } 2446 + 2447 + enum XML_Error XMLCALL XML_GetErrorCode(XML_Parser parser) { 2448 + if (parser == NULL) return XML_ERROR_INVALID_ARGUMENT; 2449 + return parser->m_errorCode; 2450 + } 2451 + 2452 + XML_Index XMLCALL XML_GetCurrentByteIndex(XML_Parser parser) { 2453 + if (parser == NULL) return -1; 2454 + if (parser->m_eventPtr) 2455 + return (XML_Index)(parser->m_parseEndByteIndex - (parser->m_parseEndPtr - parser->m_eventPtr)); 2456 + return -1; 2457 + } 2458 + 2459 + int XMLCALL XML_GetCurrentByteCount(XML_Parser parser) { 2460 + if (parser == NULL) return 0; 2461 + if (parser->m_eventEndPtr && parser->m_eventPtr) return (int)(parser->m_eventEndPtr - parser->m_eventPtr); 2462 + return 0; 2463 + } 2464 + 2465 + const char* XMLCALL XML_GetInputContext(XML_Parser parser, int* offset, int* size) { 2466 + #if XML_CONTEXT_BYTES > 0 2467 + if (parser == NULL) return NULL; 2468 + if (parser->m_eventPtr && parser->m_buffer) { 2469 + if (offset != NULL) *offset = (int)(parser->m_eventPtr - parser->m_buffer); 2470 + if (size != NULL) *size = (int)(parser->m_bufferEnd - parser->m_buffer); 2471 + return parser->m_buffer; 2472 + } 2473 + #else 2474 + (void)parser; 2475 + (void)offset; 2476 + (void)size; 2477 + #endif /* XML_CONTEXT_BYTES > 0 */ 2478 + return (const char*)0; 2479 + } 2480 + 2481 + XML_Size XMLCALL XML_GetCurrentLineNumber(XML_Parser parser) { 2482 + if (parser == NULL) return 0; 2483 + if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2484 + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position); 2485 + parser->m_positionPtr = parser->m_eventPtr; 2486 + } 2487 + return parser->m_position.lineNumber + 1; 2488 + } 2489 + 2490 + XML_Size XMLCALL XML_GetCurrentColumnNumber(XML_Parser parser) { 2491 + if (parser == NULL) return 0; 2492 + if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2493 + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position); 2494 + parser->m_positionPtr = parser->m_eventPtr; 2495 + } 2496 + return parser->m_position.columnNumber; 2497 + } 2498 + 2499 + void XMLCALL XML_FreeContentModel(XML_Parser parser, XML_Content* model) { 2500 + if (parser == NULL) return; 2501 + 2502 + // NOTE: We are avoiding FREE(..) here because the content model 2503 + // has been created using plain .malloc_fcn(..) rather than MALLOC(..). 2504 + parser->m_mem.free_fcn(model); 2505 + } 2506 + 2507 + void* XMLCALL XML_MemMalloc(XML_Parser parser, size_t size) { 2508 + if (parser == NULL) return NULL; 2509 + 2510 + // NOTE: We are avoiding MALLOC(..) here to not include 2511 + // user allocations with allocation tracking and limiting. 2512 + return parser->m_mem.malloc_fcn(size); 2513 + } 2514 + 2515 + void* XMLCALL XML_MemRealloc(XML_Parser parser, void* ptr, size_t size) { 2516 + if (parser == NULL) return NULL; 2517 + 2518 + // NOTE: We are avoiding REALLOC(..) here to not include 2519 + // user allocations with allocation tracking and limiting. 2520 + return parser->m_mem.realloc_fcn(ptr, size); 2521 + } 2522 + 2523 + void XMLCALL XML_MemFree(XML_Parser parser, void* ptr) { 2524 + if (parser == NULL) return; 2525 + 2526 + // NOTE: We are avoiding FREE(..) here because XML_MemMalloc and 2527 + // XML_MemRealloc are not using MALLOC(..) and REALLOC(..) 2528 + // but plain .malloc_fcn(..) and .realloc_fcn(..), internally. 2529 + parser->m_mem.free_fcn(ptr); 2530 + } 2531 + 2532 + void XMLCALL XML_DefaultCurrent(XML_Parser parser) { 2533 + if (parser == NULL) return; 2534 + if (parser->m_defaultHandler) { 2535 + if (parser->m_openInternalEntities) 2536 + reportDefault(parser, parser->m_internalEncoding, parser->m_openInternalEntities->internalEventPtr, 2537 + parser->m_openInternalEntities->internalEventEndPtr); 2538 + else 2539 + reportDefault(parser, parser->m_encoding, parser->m_eventPtr, parser->m_eventEndPtr); 2540 + } 2541 + } 2542 + 2543 + const XML_LChar* XMLCALL XML_ErrorString(enum XML_Error code) { 2544 + switch (code) { 2545 + case XML_ERROR_NONE: 2546 + return NULL; 2547 + case XML_ERROR_NO_MEMORY: 2548 + return XML_L("out of memory"); 2549 + case XML_ERROR_SYNTAX: 2550 + return XML_L("syntax error"); 2551 + case XML_ERROR_NO_ELEMENTS: 2552 + return XML_L("no element found"); 2553 + case XML_ERROR_INVALID_TOKEN: 2554 + return XML_L("not well-formed (invalid token)"); 2555 + case XML_ERROR_UNCLOSED_TOKEN: 2556 + return XML_L("unclosed token"); 2557 + case XML_ERROR_PARTIAL_CHAR: 2558 + return XML_L("partial character"); 2559 + case XML_ERROR_TAG_MISMATCH: 2560 + return XML_L("mismatched tag"); 2561 + case XML_ERROR_DUPLICATE_ATTRIBUTE: 2562 + return XML_L("duplicate attribute"); 2563 + case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: 2564 + return XML_L("junk after document element"); 2565 + case XML_ERROR_PARAM_ENTITY_REF: 2566 + return XML_L("illegal parameter entity reference"); 2567 + case XML_ERROR_UNDEFINED_ENTITY: 2568 + return XML_L("undefined entity"); 2569 + case XML_ERROR_RECURSIVE_ENTITY_REF: 2570 + return XML_L("recursive entity reference"); 2571 + case XML_ERROR_ASYNC_ENTITY: 2572 + return XML_L("asynchronous entity"); 2573 + case XML_ERROR_BAD_CHAR_REF: 2574 + return XML_L("reference to invalid character number"); 2575 + case XML_ERROR_BINARY_ENTITY_REF: 2576 + return XML_L("reference to binary entity"); 2577 + case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: 2578 + return XML_L("reference to external entity in attribute"); 2579 + case XML_ERROR_MISPLACED_XML_PI: 2580 + return XML_L("XML or text declaration not at start of entity"); 2581 + case XML_ERROR_UNKNOWN_ENCODING: 2582 + return XML_L("unknown encoding"); 2583 + case XML_ERROR_INCORRECT_ENCODING: 2584 + return XML_L("encoding specified in XML declaration is incorrect"); 2585 + case XML_ERROR_UNCLOSED_CDATA_SECTION: 2586 + return XML_L("unclosed CDATA section"); 2587 + case XML_ERROR_EXTERNAL_ENTITY_HANDLING: 2588 + return XML_L("error in processing external entity reference"); 2589 + case XML_ERROR_NOT_STANDALONE: 2590 + return XML_L("document is not standalone"); 2591 + case XML_ERROR_UNEXPECTED_STATE: 2592 + return XML_L("unexpected parser state - please send a bug report"); 2593 + case XML_ERROR_ENTITY_DECLARED_IN_PE: 2594 + return XML_L("entity declared in parameter entity"); 2595 + case XML_ERROR_FEATURE_REQUIRES_XML_DTD: 2596 + return XML_L("requested feature requires XML_DTD support in Expat"); 2597 + case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: 2598 + return XML_L("cannot change setting once parsing has begun"); 2599 + /* Added in 1.95.7. */ 2600 + case XML_ERROR_UNBOUND_PREFIX: 2601 + return XML_L("unbound prefix"); 2602 + /* Added in 1.95.8. */ 2603 + case XML_ERROR_UNDECLARING_PREFIX: 2604 + return XML_L("must not undeclare prefix"); 2605 + case XML_ERROR_INCOMPLETE_PE: 2606 + return XML_L("incomplete markup in parameter entity"); 2607 + case XML_ERROR_XML_DECL: 2608 + return XML_L("XML declaration not well-formed"); 2609 + case XML_ERROR_TEXT_DECL: 2610 + return XML_L("text declaration not well-formed"); 2611 + case XML_ERROR_PUBLICID: 2612 + return XML_L("illegal character(s) in public id"); 2613 + case XML_ERROR_SUSPENDED: 2614 + return XML_L("parser suspended"); 2615 + case XML_ERROR_NOT_SUSPENDED: 2616 + return XML_L("parser not suspended"); 2617 + case XML_ERROR_ABORTED: 2618 + return XML_L("parsing aborted"); 2619 + case XML_ERROR_FINISHED: 2620 + return XML_L("parsing finished"); 2621 + case XML_ERROR_SUSPEND_PE: 2622 + return XML_L("cannot suspend in external parameter entity"); 2623 + /* Added in 2.0.0. */ 2624 + case XML_ERROR_RESERVED_PREFIX_XML: 2625 + return XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"); 2626 + case XML_ERROR_RESERVED_PREFIX_XMLNS: 2627 + return XML_L("reserved prefix (xmlns) must not be declared or undeclared"); 2628 + case XML_ERROR_RESERVED_NAMESPACE_URI: 2629 + return XML_L("prefix must not be bound to one of the reserved namespace names"); 2630 + /* Added in 2.2.5. */ 2631 + case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ 2632 + return XML_L("invalid argument"); 2633 + /* Added in 2.3.0. */ 2634 + case XML_ERROR_NO_BUFFER: 2635 + return XML_L("a successful prior call to function XML_GetBuffer is required"); 2636 + /* Added in 2.4.0. */ 2637 + case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: 2638 + return XML_L("limit on input amplification factor (from DTD and entities) breached"); 2639 + /* Added in 2.6.4. */ 2640 + case XML_ERROR_NOT_STARTED: 2641 + return XML_L("parser not started"); 2642 + } 2643 + return NULL; 2644 + } 2645 + 2646 + const XML_LChar* XMLCALL XML_ExpatVersion(void) { 2647 + /* V1 is used to string-ize the version number. However, it would 2648 + string-ize the actual version macro *names* unless we get them 2649 + substituted before being passed to V1. CPP is defined to expand 2650 + a macro, then rescan for more expansions. Thus, we use V2 to expand 2651 + the version macros, then CPP will expand the resulting V1() macro 2652 + with the correct numerals. */ 2653 + /* ### I'm assuming cpp is portable in this respect... */ 2654 + 2655 + #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c) 2656 + #define V2(a, b, c) XML_L("expat_") V1(a, b, c) 2657 + 2658 + return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); 2659 + 2660 + #undef V1 2661 + #undef V2 2662 + } 2663 + 2664 + XML_Expat_Version XMLCALL XML_ExpatVersionInfo(void) { 2665 + XML_Expat_Version version; 2666 + 2667 + version.major = XML_MAJOR_VERSION; 2668 + version.minor = XML_MINOR_VERSION; 2669 + version.micro = XML_MICRO_VERSION; 2670 + 2671 + return version; 2672 + } 2673 + 2674 + const XML_Feature* XMLCALL XML_GetFeatureList(void) { 2675 + static const XML_Feature features[] = { 2676 + {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), sizeof(XML_Char)}, 2677 + {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), sizeof(XML_LChar)}, 2678 + #ifdef XML_UNICODE 2679 + {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, 2680 + #endif 2681 + #ifdef XML_UNICODE_WCHAR_T 2682 + {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, 2683 + #endif 2684 + #ifdef XML_DTD 2685 + {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, 2686 + #endif 2687 + #if XML_CONTEXT_BYTES > 0 2688 + {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), XML_CONTEXT_BYTES}, 2689 + #endif 2690 + #ifdef XML_MIN_SIZE 2691 + {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, 2692 + #endif 2693 + #ifdef XML_NS 2694 + {XML_FEATURE_NS, XML_L("XML_NS"), 0}, 2695 + #endif 2696 + #ifdef XML_LARGE_SIZE 2697 + {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, 2698 + #endif 2699 + #ifdef XML_ATTR_INFO 2700 + {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, 2701 + #endif 2702 + #if XML_GE == 1 2703 + /* Added in Expat 2.4.0 for XML_DTD defined and 2704 + * added in Expat 2.6.0 for XML_GE == 1. */ 2705 + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, XML_L("XML_BLAP_MAX_AMP"), 2706 + (long int)EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, 2707 + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, XML_L("XML_BLAP_ACT_THRES"), 2708 + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, 2709 + /* Added in Expat 2.6.0. */ 2710 + {XML_FEATURE_GE, XML_L("XML_GE"), 0}, 2711 + /* Added in Expat 2.7.2. */ 2712 + {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, XML_L("XML_AT_MAX_AMP"), 2713 + (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT}, 2714 + {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, XML_L("XML_AT_ACT_THRES"), 2715 + (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT}, 2716 + #endif 2717 + {XML_FEATURE_END, NULL, 0}}; 2718 + 2719 + return features; 2720 + } 2721 + 2722 + #if XML_GE == 1 2723 + XML_Bool XMLCALL XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser, 2724 + float maximumAmplificationFactor) { 2725 + if ((parser == NULL) || (parser->m_parentParser != NULL) || isnan(maximumAmplificationFactor) || 2726 + (maximumAmplificationFactor < 1.0f)) { 2727 + return XML_FALSE; 2728 + } 2729 + parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; 2730 + return XML_TRUE; 2731 + } 2732 + 2733 + XML_Bool XMLCALL XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser, 2734 + unsigned long long activationThresholdBytes) { 2735 + if ((parser == NULL) || (parser->m_parentParser != NULL)) { 2736 + return XML_FALSE; 2737 + } 2738 + parser->m_accounting.activationThresholdBytes = activationThresholdBytes; 2739 + return XML_TRUE; 2740 + } 2741 + 2742 + XML_Bool XMLCALL XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, float maximumAmplificationFactor) { 2743 + if ((parser == NULL) || (parser->m_parentParser != NULL) || isnan(maximumAmplificationFactor) || 2744 + (maximumAmplificationFactor < 1.0f)) { 2745 + return XML_FALSE; 2746 + } 2747 + parser->m_alloc_tracker.maximumAmplificationFactor = maximumAmplificationFactor; 2748 + return XML_TRUE; 2749 + } 2750 + 2751 + XML_Bool XMLCALL XML_SetAllocTrackerActivationThreshold(XML_Parser parser, 2752 + unsigned long long activationThresholdBytes) { 2753 + if ((parser == NULL) || (parser->m_parentParser != NULL)) { 2754 + return XML_FALSE; 2755 + } 2756 + parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes; 2757 + return XML_TRUE; 2758 + } 2759 + #endif /* XML_GE == 1 */ 2760 + 2761 + XML_Bool XMLCALL XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { 2762 + if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { 2763 + parser->m_reparseDeferralEnabled = enabled; 2764 + return XML_TRUE; 2765 + } 2766 + return XML_FALSE; 2767 + } 2768 + 2769 + /* Initially tag->rawName always points into the parse buffer; 2770 + for those TAG instances opened while the current parse buffer was 2771 + processed, and not yet closed, we need to store tag->rawName in a more 2772 + permanent location, since the parse buffer is about to be discarded. 2773 + */ 2774 + static XML_Bool storeRawNames(XML_Parser parser) { 2775 + TAG* tag = parser->m_tagStack; 2776 + while (tag) { 2777 + size_t bufSize; 2778 + size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); 2779 + size_t rawNameLen; 2780 + char* rawNameBuf = tag->buf + nameLen; 2781 + /* Stop if already stored. Since m_tagStack is a stack, we can stop 2782 + at the first entry that has already been copied; everything 2783 + below it in the stack is already been accounted for in a 2784 + previous call to this function. 2785 + */ 2786 + if (tag->rawName == rawNameBuf) break; 2787 + /* For reuse purposes we need to ensure that the 2788 + size of tag->buf is a multiple of sizeof(XML_Char). 2789 + */ 2790 + rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); 2791 + /* Detect and prevent integer overflow. */ 2792 + if (rawNameLen > (size_t)INT_MAX - nameLen) return XML_FALSE; 2793 + bufSize = nameLen + rawNameLen; 2794 + if (bufSize > (size_t)(tag->bufEnd - tag->buf)) { 2795 + char* temp = REALLOC(parser, tag->buf, bufSize); 2796 + if (temp == NULL) return XML_FALSE; 2797 + /* if tag->name.str points to tag->buf (only when namespace 2798 + processing is off) then we have to update it 2799 + */ 2800 + if (tag->name.str == (XML_Char*)tag->buf) tag->name.str = (XML_Char*)temp; 2801 + /* if tag->name.localPart is set (when namespace processing is on) 2802 + then update it as well, since it will always point into tag->buf 2803 + */ 2804 + if (tag->name.localPart) tag->name.localPart = (XML_Char*)temp + (tag->name.localPart - (XML_Char*)tag->buf); 2805 + tag->buf = temp; 2806 + tag->bufEnd = temp + bufSize; 2807 + rawNameBuf = temp + nameLen; 2808 + } 2809 + memcpy(rawNameBuf, tag->rawName, tag->rawNameLength); 2810 + tag->rawName = rawNameBuf; 2811 + tag = tag->parent; 2812 + } 2813 + return XML_TRUE; 2814 + } 2815 + 2816 + static enum XML_Error PTRCALL contentProcessor(XML_Parser parser, const char* start, const char* end, 2817 + const char** endPtr) { 2818 + enum XML_Error result = doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end, endPtr, 2819 + (XML_Bool)!parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); 2820 + if (result == XML_ERROR_NONE) { 2821 + if (!storeRawNames(parser)) return XML_ERROR_NO_MEMORY; 2822 + } 2823 + return result; 2824 + } 2825 + 2826 + static enum XML_Error PTRCALL externalEntityInitProcessor(XML_Parser parser, const char* start, const char* end, 2827 + const char** endPtr) { 2828 + enum XML_Error result = initializeEncoding(parser); 2829 + if (result != XML_ERROR_NONE) return result; 2830 + parser->m_processor = externalEntityInitProcessor2; 2831 + return externalEntityInitProcessor2(parser, start, end, endPtr); 2832 + } 2833 + 2834 + static enum XML_Error PTRCALL externalEntityInitProcessor2(XML_Parser parser, const char* start, const char* end, 2835 + const char** endPtr) { 2836 + const char* next = start; /* XmlContentTok doesn't always set the last arg */ 2837 + int tok = XmlContentTok(parser->m_encoding, start, end, &next); 2838 + switch (tok) { 2839 + case XML_TOK_BOM: 2840 + #if XML_GE == 1 2841 + if (!accountingDiffTolerated(parser, tok, start, next, __LINE__, XML_ACCOUNT_DIRECT)) { 2842 + accountingOnAbort(parser); 2843 + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 2844 + } 2845 + #endif /* XML_GE == 1 */ 2846 + 2847 + /* If we are at the end of the buffer, this would cause the next stage, 2848 + i.e. externalEntityInitProcessor3, to pass control directly to 2849 + doContent (by detecting XML_TOK_NONE) without processing any xml text 2850 + declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. 2851 + */ 2852 + if (next == end && !parser->m_parsingStatus.finalBuffer) { 2853 + *endPtr = next; 2854 + return XML_ERROR_NONE; 2855 + } 2856 + start = next; 2857 + break; 2858 + case XML_TOK_PARTIAL: 2859 + if (!parser->m_parsingStatus.finalBuffer) { 2860 + *endPtr = start; 2861 + return XML_ERROR_NONE; 2862 + } 2863 + parser->m_eventPtr = start; 2864 + return XML_ERROR_UNCLOSED_TOKEN; 2865 + case XML_TOK_PARTIAL_CHAR: 2866 + if (!parser->m_parsingStatus.finalBuffer) { 2867 + *endPtr = start; 2868 + return XML_ERROR_NONE; 2869 + } 2870 + parser->m_eventPtr = start; 2871 + return XML_ERROR_PARTIAL_CHAR; 2872 + } 2873 + parser->m_processor = externalEntityInitProcessor3; 2874 + return externalEntityInitProcessor3(parser, start, end, endPtr); 2875 + } 2876 + 2877 + static enum XML_Error PTRCALL externalEntityInitProcessor3(XML_Parser parser, const char* start, const char* end, 2878 + const char** endPtr) { 2879 + int tok; 2880 + const char* next = start; /* XmlContentTok doesn't always set the last arg */ 2881 + parser->m_eventPtr = start; 2882 + tok = XmlContentTok(parser->m_encoding, start, end, &next); 2883 + /* Note: These bytes are accounted later in: 2884 + - processXmlDecl 2885 + - externalEntityContentProcessor 2886 + */ 2887 + parser->m_eventEndPtr = next; 2888 + 2889 + switch (tok) { 2890 + case XML_TOK_XML_DECL: { 2891 + enum XML_Error result; 2892 + result = processXmlDecl(parser, 1, start, next); 2893 + if (result != XML_ERROR_NONE) return result; 2894 + switch (parser->m_parsingStatus.parsing) { 2895 + case XML_SUSPENDED: 2896 + *endPtr = next; 2897 + return XML_ERROR_NONE; 2898 + case XML_FINISHED: 2899 + return XML_ERROR_ABORTED; 2900 + case XML_PARSING: 2901 + if (parser->m_reenter) { 2902 + return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE 2903 + } 2904 + /* Fall through */ 2905 + default: 2906 + start = next; 2907 + } 2908 + } break; 2909 + case XML_TOK_PARTIAL: 2910 + if (!parser->m_parsingStatus.finalBuffer) { 2911 + *endPtr = start; 2912 + return XML_ERROR_NONE; 2913 + } 2914 + return XML_ERROR_UNCLOSED_TOKEN; 2915 + case XML_TOK_PARTIAL_CHAR: 2916 + if (!parser->m_parsingStatus.finalBuffer) { 2917 + *endPtr = start; 2918 + return XML_ERROR_NONE; 2919 + } 2920 + return XML_ERROR_PARTIAL_CHAR; 2921 + } 2922 + parser->m_processor = externalEntityContentProcessor; 2923 + parser->m_tagLevel = 1; 2924 + return externalEntityContentProcessor(parser, start, end, endPtr); 2925 + } 2926 + 2927 + static enum XML_Error PTRCALL externalEntityContentProcessor(XML_Parser parser, const char* start, const char* end, 2928 + const char** endPtr) { 2929 + enum XML_Error result = doContent(parser, 1, parser->m_encoding, start, end, endPtr, 2930 + (XML_Bool)!parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_ENTITY_EXPANSION); 2931 + if (result == XML_ERROR_NONE) { 2932 + if (!storeRawNames(parser)) return XML_ERROR_NO_MEMORY; 2933 + } 2934 + return result; 2935 + } 2936 + 2937 + static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING* enc, const char* s, 2938 + const char* end, const char** nextPtr, XML_Bool haveMore, enum XML_Account account) { 2939 + /* save one level of indirection */ 2940 + DTD* const dtd = parser->m_dtd; 2941 + 2942 + const char** eventPP; 2943 + const char** eventEndPP; 2944 + if (enc == parser->m_encoding) { 2945 + eventPP = &parser->m_eventPtr; 2946 + eventEndPP = &parser->m_eventEndPtr; 2947 + } else { 2948 + eventPP = &(parser->m_openInternalEntities->internalEventPtr); 2949 + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 2950 + } 2951 + *eventPP = s; 2952 + 2953 + for (;;) { 2954 + const char* next = s; /* XmlContentTok doesn't always set the last arg */ 2955 + int tok = XmlContentTok(enc, s, end, &next); 2956 + #if XML_GE == 1 2957 + const char* accountAfter = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) 2958 + ? (haveMore ? s /* i.e. 0 bytes */ : end) 2959 + : next; 2960 + if (!accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, account)) { 2961 + accountingOnAbort(parser); 2962 + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 2963 + } 2964 + #endif 2965 + *eventEndPP = next; 2966 + switch (tok) { 2967 + case XML_TOK_TRAILING_CR: 2968 + if (haveMore) { 2969 + *nextPtr = s; 2970 + return XML_ERROR_NONE; 2971 + } 2972 + *eventEndPP = end; 2973 + if (parser->m_characterDataHandler) { 2974 + XML_Char c = 0xA; 2975 + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 2976 + } else if (parser->m_defaultHandler) 2977 + reportDefault(parser, enc, s, end); 2978 + /* We are at the end of the final buffer, should we check for 2979 + XML_SUSPENDED, XML_FINISHED? 2980 + */ 2981 + if (startTagLevel == 0) return XML_ERROR_NO_ELEMENTS; 2982 + if (parser->m_tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; 2983 + *nextPtr = end; 2984 + return XML_ERROR_NONE; 2985 + case XML_TOK_NONE: 2986 + if (haveMore) { 2987 + *nextPtr = s; 2988 + return XML_ERROR_NONE; 2989 + } 2990 + if (startTagLevel > 0) { 2991 + if (parser->m_tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; 2992 + *nextPtr = s; 2993 + return XML_ERROR_NONE; 2994 + } 2995 + return XML_ERROR_NO_ELEMENTS; 2996 + case XML_TOK_INVALID: 2997 + *eventPP = next; 2998 + return XML_ERROR_INVALID_TOKEN; 2999 + case XML_TOK_PARTIAL: 3000 + if (haveMore) { 3001 + *nextPtr = s; 3002 + return XML_ERROR_NONE; 3003 + } 3004 + return XML_ERROR_UNCLOSED_TOKEN; 3005 + case XML_TOK_PARTIAL_CHAR: 3006 + if (haveMore) { 3007 + *nextPtr = s; 3008 + return XML_ERROR_NONE; 3009 + } 3010 + return XML_ERROR_PARTIAL_CHAR; 3011 + case XML_TOK_ENTITY_REF: { 3012 + const XML_Char* name; 3013 + ENTITY* entity; 3014 + XML_Char ch = (XML_Char)XmlPredefinedEntityName(enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 3015 + if (ch) { 3016 + #if XML_GE == 1 3017 + /* NOTE: We are replacing 4-6 characters original input for 1 character 3018 + * so there is no amplification and hence recording without 3019 + * protection. */ 3020 + accountingDiffTolerated(parser, tok, (char*)&ch, ((char*)&ch) + sizeof(XML_Char), __LINE__, 3021 + XML_ACCOUNT_ENTITY_EXPANSION); 3022 + #endif /* XML_GE == 1 */ 3023 + if (parser->m_characterDataHandler) 3024 + parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); 3025 + else if (parser->m_defaultHandler) 3026 + reportDefault(parser, enc, s, next); 3027 + break; 3028 + } 3029 + name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 3030 + if (!name) return XML_ERROR_NO_MEMORY; 3031 + entity = (ENTITY*)lookup(parser, &dtd->generalEntities, name, 0); 3032 + poolDiscard(&dtd->pool); 3033 + /* First, determine if a check for an existing declaration is needed; 3034 + if yes, check that the entity exists, and that it is internal, 3035 + otherwise call the skipped entity or default handler. 3036 + */ 3037 + if (!dtd->hasParamEntityRefs || dtd->standalone) { 3038 + if (!entity) 3039 + return XML_ERROR_UNDEFINED_ENTITY; 3040 + else if (!entity->is_internal) 3041 + return XML_ERROR_ENTITY_DECLARED_IN_PE; 3042 + } else if (!entity) { 3043 + if (parser->m_skippedEntityHandler) 3044 + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 3045 + else if (parser->m_defaultHandler) 3046 + reportDefault(parser, enc, s, next); 3047 + break; 3048 + } 3049 + if (entity->open) return XML_ERROR_RECURSIVE_ENTITY_REF; 3050 + if (entity->notation) return XML_ERROR_BINARY_ENTITY_REF; 3051 + if (entity->textPtr) { 3052 + enum XML_Error result; 3053 + if (!parser->m_defaultExpandInternalEntities) { 3054 + if (parser->m_skippedEntityHandler) 3055 + parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0); 3056 + else if (parser->m_defaultHandler) 3057 + reportDefault(parser, enc, s, next); 3058 + break; 3059 + } 3060 + result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL); 3061 + if (result != XML_ERROR_NONE) return result; 3062 + } else if (parser->m_externalEntityRefHandler) { 3063 + const XML_Char* context; 3064 + entity->open = XML_TRUE; 3065 + context = getContext(parser); 3066 + entity->open = XML_FALSE; 3067 + if (!context) return XML_ERROR_NO_MEMORY; 3068 + if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg, context, entity->base, 3069 + entity->systemId, entity->publicId)) 3070 + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 3071 + poolDiscard(&parser->m_tempPool); 3072 + } else if (parser->m_defaultHandler) 3073 + reportDefault(parser, enc, s, next); 3074 + break; 3075 + } 3076 + case XML_TOK_START_TAG_NO_ATTS: 3077 + /* fall through */ 3078 + case XML_TOK_START_TAG_WITH_ATTS: { 3079 + TAG* tag; 3080 + enum XML_Error result; 3081 + XML_Char* toPtr; 3082 + if (parser->m_freeTagList) { 3083 + tag = parser->m_freeTagList; 3084 + parser->m_freeTagList = parser->m_freeTagList->parent; 3085 + } else { 3086 + tag = MALLOC(parser, sizeof(TAG)); 3087 + if (!tag) return XML_ERROR_NO_MEMORY; 3088 + tag->buf = MALLOC(parser, INIT_TAG_BUF_SIZE); 3089 + if (!tag->buf) { 3090 + FREE(parser, tag); 3091 + return XML_ERROR_NO_MEMORY; 3092 + } 3093 + tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; 3094 + } 3095 + tag->bindings = NULL; 3096 + tag->parent = parser->m_tagStack; 3097 + parser->m_tagStack = tag; 3098 + tag->name.localPart = NULL; 3099 + tag->name.prefix = NULL; 3100 + tag->rawName = s + enc->minBytesPerChar; 3101 + tag->rawNameLength = XmlNameLength(enc, tag->rawName); 3102 + ++parser->m_tagLevel; 3103 + { 3104 + const char* rawNameEnd = tag->rawName + tag->rawNameLength; 3105 + const char* fromPtr = tag->rawName; 3106 + toPtr = (XML_Char*)tag->buf; 3107 + for (;;) { 3108 + int convLen; 3109 + const enum XML_Convert_Result convert_res = 3110 + XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR**)&toPtr, (ICHAR*)tag->bufEnd - 1); 3111 + convLen = (int)(toPtr - (XML_Char*)tag->buf); 3112 + if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { 3113 + tag->name.strLen = convLen; 3114 + break; 3115 + } 3116 + if (SIZE_MAX / 2 < (size_t)(tag->bufEnd - tag->buf)) return XML_ERROR_NO_MEMORY; 3117 + const size_t bufSize = (size_t)(tag->bufEnd - tag->buf) * 2; 3118 + { 3119 + char* temp = REALLOC(parser, tag->buf, bufSize); 3120 + if (temp == NULL) return XML_ERROR_NO_MEMORY; 3121 + tag->buf = temp; 3122 + tag->bufEnd = temp + bufSize; 3123 + toPtr = (XML_Char*)temp + convLen; 3124 + } 3125 + } 3126 + } 3127 + tag->name.str = (XML_Char*)tag->buf; 3128 + *toPtr = XML_T('\0'); 3129 + result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); 3130 + if (result) return result; 3131 + if (parser->m_startElementHandler) 3132 + parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, (const XML_Char**)parser->m_atts); 3133 + else if (parser->m_defaultHandler) 3134 + reportDefault(parser, enc, s, next); 3135 + poolClear(&parser->m_tempPool); 3136 + break; 3137 + } 3138 + case XML_TOK_EMPTY_ELEMENT_NO_ATTS: 3139 + /* fall through */ 3140 + case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { 3141 + const char* rawName = s + enc->minBytesPerChar; 3142 + enum XML_Error result; 3143 + BINDING* bindings = NULL; 3144 + XML_Bool noElmHandlers = XML_TRUE; 3145 + TAG_NAME name; 3146 + name.str = poolStoreString(&parser->m_tempPool, enc, rawName, rawName + XmlNameLength(enc, rawName)); 3147 + if (!name.str) return XML_ERROR_NO_MEMORY; 3148 + poolFinish(&parser->m_tempPool); 3149 + result = storeAtts(parser, enc, s, &name, &bindings, XML_ACCOUNT_NONE /* token spans whole start tag */); 3150 + if (result != XML_ERROR_NONE) { 3151 + freeBindings(parser, bindings); 3152 + return result; 3153 + } 3154 + poolFinish(&parser->m_tempPool); 3155 + if (parser->m_startElementHandler) { 3156 + parser->m_startElementHandler(parser->m_handlerArg, name.str, (const XML_Char**)parser->m_atts); 3157 + noElmHandlers = XML_FALSE; 3158 + } 3159 + if (parser->m_endElementHandler) { 3160 + if (parser->m_startElementHandler) *eventPP = *eventEndPP; 3161 + parser->m_endElementHandler(parser->m_handlerArg, name.str); 3162 + noElmHandlers = XML_FALSE; 3163 + } 3164 + if (noElmHandlers && parser->m_defaultHandler) reportDefault(parser, enc, s, next); 3165 + poolClear(&parser->m_tempPool); 3166 + freeBindings(parser, bindings); 3167 + } 3168 + if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3169 + if (parser->m_parsingStatus.parsing == XML_SUSPENDED || 3170 + (parser->m_parsingStatus.parsing == XML_PARSING && parser->m_reenter)) 3171 + parser->m_processor = epilogProcessor; 3172 + else 3173 + return epilogProcessor(parser, next, end, nextPtr); 3174 + } 3175 + break; 3176 + case XML_TOK_END_TAG: 3177 + if (parser->m_tagLevel == startTagLevel) 3178 + return XML_ERROR_ASYNC_ENTITY; 3179 + else { 3180 + int len; 3181 + const char* rawName; 3182 + TAG* tag = parser->m_tagStack; 3183 + rawName = s + enc->minBytesPerChar * 2; 3184 + len = XmlNameLength(enc, rawName); 3185 + if (len != tag->rawNameLength || memcmp(tag->rawName, rawName, len) != 0) { 3186 + *eventPP = rawName; 3187 + return XML_ERROR_TAG_MISMATCH; 3188 + } 3189 + parser->m_tagStack = tag->parent; 3190 + tag->parent = parser->m_freeTagList; 3191 + parser->m_freeTagList = tag; 3192 + --parser->m_tagLevel; 3193 + if (parser->m_endElementHandler) { 3194 + const XML_Char* localPart; 3195 + const XML_Char* prefix; 3196 + XML_Char* uri; 3197 + localPart = tag->name.localPart; 3198 + if (parser->m_ns && localPart) { 3199 + /* localPart and prefix may have been overwritten in 3200 + tag->name.str, since this points to the binding->uri 3201 + buffer which gets reused; so we have to add them again 3202 + */ 3203 + uri = (XML_Char*)tag->name.str + tag->name.uriLen; 3204 + /* don't need to check for space - already done in storeAtts() */ 3205 + while (*localPart) *uri++ = *localPart++; 3206 + prefix = tag->name.prefix; 3207 + if (parser->m_ns_triplets && prefix) { 3208 + *uri++ = parser->m_namespaceSeparator; 3209 + while (*prefix) *uri++ = *prefix++; 3210 + } 3211 + *uri = XML_T('\0'); 3212 + } 3213 + parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); 3214 + } else if (parser->m_defaultHandler) 3215 + reportDefault(parser, enc, s, next); 3216 + while (tag->bindings) { 3217 + BINDING* b = tag->bindings; 3218 + if (parser->m_endNamespaceDeclHandler) 3219 + parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); 3220 + tag->bindings = tag->bindings->nextTagBinding; 3221 + b->nextTagBinding = parser->m_freeBindingList; 3222 + parser->m_freeBindingList = b; 3223 + b->prefix->binding = b->prevPrefixBinding; 3224 + } 3225 + if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3226 + if (parser->m_parsingStatus.parsing == XML_SUSPENDED || 3227 + (parser->m_parsingStatus.parsing == XML_PARSING && parser->m_reenter)) 3228 + parser->m_processor = epilogProcessor; 3229 + else 3230 + return epilogProcessor(parser, next, end, nextPtr); 3231 + } 3232 + } 3233 + break; 3234 + case XML_TOK_CHAR_REF: { 3235 + int n = XmlCharRefNumber(enc, s); 3236 + if (n < 0) return XML_ERROR_BAD_CHAR_REF; 3237 + if (parser->m_characterDataHandler) { 3238 + XML_Char buf[XML_ENCODE_MAX]; 3239 + parser->m_characterDataHandler(parser->m_handlerArg, buf, XmlEncode(n, (ICHAR*)buf)); 3240 + } else if (parser->m_defaultHandler) 3241 + reportDefault(parser, enc, s, next); 3242 + } break; 3243 + case XML_TOK_XML_DECL: 3244 + return XML_ERROR_MISPLACED_XML_PI; 3245 + case XML_TOK_DATA_NEWLINE: 3246 + if (parser->m_characterDataHandler) { 3247 + XML_Char c = 0xA; 3248 + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 3249 + } else if (parser->m_defaultHandler) 3250 + reportDefault(parser, enc, s, next); 3251 + break; 3252 + case XML_TOK_CDATA_SECT_OPEN: { 3253 + enum XML_Error result; 3254 + if (parser->m_startCdataSectionHandler) parser->m_startCdataSectionHandler(parser->m_handlerArg); 3255 + /* BEGIN disabled code */ 3256 + /* Suppose you doing a transformation on a document that involves 3257 + changing only the character data. You set up a defaultHandler 3258 + and a characterDataHandler. The defaultHandler simply copies 3259 + characters through. The characterDataHandler does the 3260 + transformation and writes the characters out escaping them as 3261 + necessary. This case will fail to work if we leave out the 3262 + following two lines (because & and < inside CDATA sections will 3263 + be incorrectly escaped). 3264 + 3265 + However, now we have a start/endCdataSectionHandler, so it seems 3266 + easier to let the user deal with this. 3267 + */ 3268 + else if ((0) && parser->m_characterDataHandler) 3269 + parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); 3270 + /* END disabled code */ 3271 + else if (parser->m_defaultHandler) 3272 + reportDefault(parser, enc, s, next); 3273 + result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); 3274 + if (result != XML_ERROR_NONE) 3275 + return result; 3276 + else if (!next) { 3277 + parser->m_processor = cdataSectionProcessor; 3278 + return result; 3279 + } 3280 + } break; 3281 + case XML_TOK_TRAILING_RSQB: 3282 + if (haveMore) { 3283 + *nextPtr = s; 3284 + return XML_ERROR_NONE; 3285 + } 3286 + if (parser->m_characterDataHandler) { 3287 + if (MUST_CONVERT(enc, s)) { 3288 + ICHAR* dataPtr = (ICHAR*)parser->m_dataBuf; 3289 + XmlConvert(enc, &s, end, &dataPtr, (ICHAR*)parser->m_dataBufEnd); 3290 + parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3291 + (int)(dataPtr - (ICHAR*)parser->m_dataBuf)); 3292 + } else 3293 + parser->m_characterDataHandler(parser->m_handlerArg, (const XML_Char*)s, 3294 + (int)((const XML_Char*)end - (const XML_Char*)s)); 3295 + } else if (parser->m_defaultHandler) 3296 + reportDefault(parser, enc, s, end); 3297 + /* We are at the end of the final buffer, should we check for 3298 + XML_SUSPENDED, XML_FINISHED? 3299 + */ 3300 + if (startTagLevel == 0) { 3301 + *eventPP = end; 3302 + return XML_ERROR_NO_ELEMENTS; 3303 + } 3304 + if (parser->m_tagLevel != startTagLevel) { 3305 + *eventPP = end; 3306 + return XML_ERROR_ASYNC_ENTITY; 3307 + } 3308 + *nextPtr = end; 3309 + return XML_ERROR_NONE; 3310 + case XML_TOK_DATA_CHARS: { 3311 + XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 3312 + if (charDataHandler) { 3313 + if (MUST_CONVERT(enc, s)) { 3314 + for (;;) { 3315 + ICHAR* dataPtr = (ICHAR*)parser->m_dataBuf; 3316 + const enum XML_Convert_Result convert_res = 3317 + XmlConvert(enc, &s, next, &dataPtr, (ICHAR*)parser->m_dataBufEnd); 3318 + *eventEndPP = s; 3319 + charDataHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR*)parser->m_dataBuf)); 3320 + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; 3321 + *eventPP = s; 3322 + } 3323 + } else 3324 + charDataHandler(parser->m_handlerArg, (const XML_Char*)s, 3325 + (int)((const XML_Char*)next - (const XML_Char*)s)); 3326 + } else if (parser->m_defaultHandler) 3327 + reportDefault(parser, enc, s, next); 3328 + } break; 3329 + case XML_TOK_PI: 3330 + if (!reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; 3331 + break; 3332 + case XML_TOK_COMMENT: 3333 + if (!reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; 3334 + break; 3335 + default: 3336 + /* All of the tokens produced by XmlContentTok() have their own 3337 + * explicit cases, so this default is not strictly necessary. 3338 + * However it is a useful safety net, so we retain the code and 3339 + * simply exclude it from the coverage tests. 3340 + * 3341 + * LCOV_EXCL_START 3342 + */ 3343 + if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); 3344 + break; 3345 + /* LCOV_EXCL_STOP */ 3346 + } 3347 + switch (parser->m_parsingStatus.parsing) { 3348 + case XML_SUSPENDED: 3349 + *eventPP = next; 3350 + *nextPtr = next; 3351 + return XML_ERROR_NONE; 3352 + case XML_FINISHED: 3353 + *eventPP = next; 3354 + return XML_ERROR_ABORTED; 3355 + case XML_PARSING: 3356 + if (parser->m_reenter) { 3357 + *nextPtr = next; 3358 + return XML_ERROR_NONE; 3359 + } 3360 + /* Fall through */ 3361 + default:; 3362 + *eventPP = s = next; 3363 + } 3364 + } 3365 + /* not reached */ 3366 + } 3367 + 3368 + /* This function does not call free() on the allocated memory, merely 3369 + * moving it to the parser's m_freeBindingList where it can be freed or 3370 + * reused as appropriate. 3371 + */ 3372 + static void freeBindings(XML_Parser parser, BINDING* bindings) { 3373 + while (bindings) { 3374 + BINDING* b = bindings; 3375 + 3376 + /* m_startNamespaceDeclHandler will have been called for this 3377 + * binding in addBindings(), so call the end handler now. 3378 + */ 3379 + if (parser->m_endNamespaceDeclHandler) parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); 3380 + 3381 + bindings = bindings->nextTagBinding; 3382 + b->nextTagBinding = parser->m_freeBindingList; 3383 + parser->m_freeBindingList = b; 3384 + b->prefix->binding = b->prevPrefixBinding; 3385 + } 3386 + } 3387 + 3388 + /* Precondition: all arguments must be non-NULL; 3389 + Purpose: 3390 + - normalize attributes 3391 + - check attributes for well-formedness 3392 + - generate namespace aware attribute names (URI, prefix) 3393 + - build list of attributes for startElementHandler 3394 + - default attributes 3395 + - process namespace declarations (check and report them) 3396 + - generate namespace aware element name (URI, prefix) 3397 + */ 3398 + static enum XML_Error storeAtts(XML_Parser parser, const ENCODING* enc, const char* attStr, TAG_NAME* tagNamePtr, 3399 + BINDING** bindingsPtr, enum XML_Account account) { 3400 + DTD* const dtd = parser->m_dtd; /* save one level of indirection */ 3401 + ELEMENT_TYPE* elementType; 3402 + int nDefaultAtts; 3403 + const XML_Char** appAtts; /* the attribute list for the application */ 3404 + int attIndex = 0; 3405 + int prefixLen; 3406 + int i; 3407 + int n; 3408 + XML_Char* uri; 3409 + int nPrefixes = 0; 3410 + BINDING* binding; 3411 + const XML_Char* localPart; 3412 + 3413 + /* lookup the element type name */ 3414 + elementType = (ELEMENT_TYPE*)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); 3415 + if (!elementType) { 3416 + const XML_Char* name = poolCopyString(&dtd->pool, tagNamePtr->str); 3417 + if (!name) return XML_ERROR_NO_MEMORY; 3418 + elementType = (ELEMENT_TYPE*)lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); 3419 + if (!elementType) return XML_ERROR_NO_MEMORY; 3420 + if (parser->m_ns && !setElementTypePrefix(parser, elementType)) return XML_ERROR_NO_MEMORY; 3421 + } 3422 + nDefaultAtts = elementType->nDefaultAtts; 3423 + 3424 + /* get the attributes from the tokenizer */ 3425 + n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); 3426 + 3427 + /* Detect and prevent integer overflow */ 3428 + if (n > INT_MAX - nDefaultAtts) { 3429 + return XML_ERROR_NO_MEMORY; 3430 + } 3431 + 3432 + if (n + nDefaultAtts > parser->m_attsSize) { 3433 + int oldAttsSize = parser->m_attsSize; 3434 + ATTRIBUTE* temp; 3435 + #ifdef XML_ATTR_INFO 3436 + XML_AttrInfo* temp2; 3437 + #endif 3438 + 3439 + /* Detect and prevent integer overflow */ 3440 + if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { 3441 + return XML_ERROR_NO_MEMORY; 3442 + } 3443 + 3444 + parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; 3445 + 3446 + /* Detect and prevent integer overflow. 3447 + * The preprocessor guard addresses the "always false" warning 3448 + * from -Wtype-limits on platforms where 3449 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3450 + #if UINT_MAX >= SIZE_MAX 3451 + if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(ATTRIBUTE)) { 3452 + parser->m_attsSize = oldAttsSize; 3453 + return XML_ERROR_NO_MEMORY; 3454 + } 3455 + #endif 3456 + 3457 + temp = REALLOC(parser, parser->m_atts, parser->m_attsSize * sizeof(ATTRIBUTE)); 3458 + if (temp == NULL) { 3459 + parser->m_attsSize = oldAttsSize; 3460 + return XML_ERROR_NO_MEMORY; 3461 + } 3462 + parser->m_atts = temp; 3463 + #ifdef XML_ATTR_INFO 3464 + /* Detect and prevent integer overflow. 3465 + * The preprocessor guard addresses the "always false" warning 3466 + * from -Wtype-limits on platforms where 3467 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3468 + #if UINT_MAX >= SIZE_MAX 3469 + if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(XML_AttrInfo)) { 3470 + parser->m_attsSize = oldAttsSize; 3471 + return XML_ERROR_NO_MEMORY; 3472 + } 3473 + #endif 3474 + 3475 + temp2 = REALLOC(parser, parser->m_attInfo, parser->m_attsSize * sizeof(XML_AttrInfo)); 3476 + if (temp2 == NULL) { 3477 + parser->m_attsSize = oldAttsSize; 3478 + return XML_ERROR_NO_MEMORY; 3479 + } 3480 + parser->m_attInfo = temp2; 3481 + #endif 3482 + if (n > oldAttsSize) XmlGetAttributes(enc, attStr, n, parser->m_atts); 3483 + } 3484 + 3485 + appAtts = (const XML_Char**)parser->m_atts; 3486 + for (i = 0; i < n; i++) { 3487 + ATTRIBUTE* currAtt = &parser->m_atts[i]; 3488 + #ifdef XML_ATTR_INFO 3489 + XML_AttrInfo* currAttInfo = &parser->m_attInfo[i]; 3490 + #endif 3491 + /* add the name and value to the attribute list */ 3492 + ATTRIBUTE_ID* attId = getAttributeId(parser, enc, currAtt->name, currAtt->name + XmlNameLength(enc, currAtt->name)); 3493 + if (!attId) return XML_ERROR_NO_MEMORY; 3494 + #ifdef XML_ATTR_INFO 3495 + currAttInfo->nameStart = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name); 3496 + currAttInfo->nameEnd = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name); 3497 + currAttInfo->valueStart = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->valuePtr); 3498 + currAttInfo->valueEnd = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->valueEnd); 3499 + #endif 3500 + /* Detect duplicate attributes by their QNames. This does not work when 3501 + namespace processing is turned on and different prefixes for the same 3502 + namespace are used. For this case we have a check further down. 3503 + */ 3504 + if ((attId->name)[-1]) { 3505 + if (enc == parser->m_encoding) parser->m_eventPtr = parser->m_atts[i].name; 3506 + return XML_ERROR_DUPLICATE_ATTRIBUTE; 3507 + } 3508 + (attId->name)[-1] = 1; 3509 + appAtts[attIndex++] = attId->name; 3510 + if (!parser->m_atts[i].normalized) { 3511 + enum XML_Error result; 3512 + XML_Bool isCdata = XML_TRUE; 3513 + 3514 + /* figure out whether declared as other than CDATA */ 3515 + if (attId->maybeTokenized) { 3516 + int j; 3517 + for (j = 0; j < nDefaultAtts; j++) { 3518 + if (attId == elementType->defaultAtts[j].id) { 3519 + isCdata = elementType->defaultAtts[j].isCdata; 3520 + break; 3521 + } 3522 + } 3523 + } 3524 + 3525 + /* normalize the attribute value */ 3526 + result = storeAttributeValue(parser, enc, isCdata, parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd, 3527 + &parser->m_tempPool, account); 3528 + if (result) return result; 3529 + appAtts[attIndex] = poolStart(&parser->m_tempPool); 3530 + poolFinish(&parser->m_tempPool); 3531 + } else { 3532 + /* the value did not need normalizing */ 3533 + appAtts[attIndex] = 3534 + poolStoreString(&parser->m_tempPool, enc, parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd); 3535 + if (appAtts[attIndex] == 0) return XML_ERROR_NO_MEMORY; 3536 + poolFinish(&parser->m_tempPool); 3537 + } 3538 + /* handle prefixed attribute names */ 3539 + if (attId->prefix) { 3540 + if (attId->xmlns) { 3541 + /* deal with namespace declarations here */ 3542 + enum XML_Error result = addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr); 3543 + if (result) return result; 3544 + --attIndex; 3545 + } else { 3546 + /* deal with other prefixed names later */ 3547 + attIndex++; 3548 + nPrefixes++; 3549 + (attId->name)[-1] = 2; 3550 + } 3551 + } else 3552 + attIndex++; 3553 + } 3554 + 3555 + /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ 3556 + parser->m_nSpecifiedAtts = attIndex; 3557 + if (elementType->idAtt && (elementType->idAtt->name)[-1]) { 3558 + for (i = 0; i < attIndex; i += 2) 3559 + if (appAtts[i] == elementType->idAtt->name) { 3560 + parser->m_idAttIndex = i; 3561 + break; 3562 + } 3563 + } else 3564 + parser->m_idAttIndex = -1; 3565 + 3566 + /* do attribute defaulting */ 3567 + for (i = 0; i < nDefaultAtts; i++) { 3568 + const DEFAULT_ATTRIBUTE* da = elementType->defaultAtts + i; 3569 + if (!(da->id->name)[-1] && da->value) { 3570 + if (da->id->prefix) { 3571 + if (da->id->xmlns) { 3572 + enum XML_Error result = addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr); 3573 + if (result) return result; 3574 + } else { 3575 + (da->id->name)[-1] = 2; 3576 + nPrefixes++; 3577 + appAtts[attIndex++] = da->id->name; 3578 + appAtts[attIndex++] = da->value; 3579 + } 3580 + } else { 3581 + (da->id->name)[-1] = 1; 3582 + appAtts[attIndex++] = da->id->name; 3583 + appAtts[attIndex++] = da->value; 3584 + } 3585 + } 3586 + } 3587 + appAtts[attIndex] = 0; 3588 + 3589 + /* expand prefixed attribute names, check for duplicates, 3590 + and clear flags that say whether attributes were specified */ 3591 + i = 0; 3592 + if (nPrefixes) { 3593 + unsigned int j; /* hash table index */ 3594 + unsigned long version = parser->m_nsAttsVersion; 3595 + 3596 + /* Detect and prevent invalid shift */ 3597 + if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) { 3598 + return XML_ERROR_NO_MEMORY; 3599 + } 3600 + 3601 + unsigned int nsAttsSize = 1u << parser->m_nsAttsPower; 3602 + unsigned char oldNsAttsPower = parser->m_nsAttsPower; 3603 + /* size of hash table must be at least 2 * (# of prefixed attributes) */ 3604 + if ((nPrefixes << 1) >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ 3605 + NS_ATT* temp; 3606 + /* hash table size must also be a power of 2 and >= 8 */ 3607 + while (nPrefixes >> parser->m_nsAttsPower++); 3608 + if (parser->m_nsAttsPower < 3) parser->m_nsAttsPower = 3; 3609 + 3610 + /* Detect and prevent invalid shift */ 3611 + if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) { 3612 + /* Restore actual size of memory in m_nsAtts */ 3613 + parser->m_nsAttsPower = oldNsAttsPower; 3614 + return XML_ERROR_NO_MEMORY; 3615 + } 3616 + 3617 + nsAttsSize = 1u << parser->m_nsAttsPower; 3618 + 3619 + /* Detect and prevent integer overflow. 3620 + * The preprocessor guard addresses the "always false" warning 3621 + * from -Wtype-limits on platforms where 3622 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3623 + #if UINT_MAX >= SIZE_MAX 3624 + if (nsAttsSize > SIZE_MAX / sizeof(NS_ATT)) { 3625 + /* Restore actual size of memory in m_nsAtts */ 3626 + parser->m_nsAttsPower = oldNsAttsPower; 3627 + return XML_ERROR_NO_MEMORY; 3628 + } 3629 + #endif 3630 + 3631 + temp = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT)); 3632 + if (!temp) { 3633 + /* Restore actual size of memory in m_nsAtts */ 3634 + parser->m_nsAttsPower = oldNsAttsPower; 3635 + return XML_ERROR_NO_MEMORY; 3636 + } 3637 + parser->m_nsAtts = temp; 3638 + version = 0; /* force re-initialization of m_nsAtts hash table */ 3639 + } 3640 + /* using a version flag saves us from initializing m_nsAtts every time */ 3641 + if (!version) { /* initialize version flags when version wraps around */ 3642 + version = INIT_ATTS_VERSION; 3643 + for (j = nsAttsSize; j != 0;) parser->m_nsAtts[--j].version = version; 3644 + } 3645 + parser->m_nsAttsVersion = --version; 3646 + 3647 + /* expand prefixed names and check for duplicates */ 3648 + for (; i < attIndex; i += 2) { 3649 + const XML_Char* s = appAtts[i]; 3650 + if (s[-1] == 2) { /* prefixed */ 3651 + ATTRIBUTE_ID* id; 3652 + const BINDING* b; 3653 + unsigned long uriHash; 3654 + struct siphash sip_state; 3655 + struct sipkey sip_key; 3656 + 3657 + copy_salt_to_sipkey(parser, &sip_key); 3658 + sip24_init(&sip_state, &sip_key); 3659 + 3660 + ((XML_Char*)s)[-1] = 0; /* clear flag */ 3661 + id = (ATTRIBUTE_ID*)lookup(parser, &dtd->attributeIds, s, 0); 3662 + if (!id || !id->prefix) { 3663 + /* This code is walking through the appAtts array, dealing 3664 + * with (in this case) a prefixed attribute name. To be in 3665 + * the array, the attribute must have already been bound, so 3666 + * has to have passed through the hash table lookup once 3667 + * already. That implies that an entry for it already 3668 + * exists, so the lookup above will return a pointer to 3669 + * already allocated memory. There is no opportunaity for 3670 + * the allocator to fail, so the condition above cannot be 3671 + * fulfilled. 3672 + * 3673 + * Since it is difficult to be certain that the above 3674 + * analysis is complete, we retain the test and merely 3675 + * remove the code from coverage tests. 3676 + */ 3677 + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 3678 + } 3679 + b = id->prefix->binding; 3680 + if (!b) return XML_ERROR_UNBOUND_PREFIX; 3681 + 3682 + for (j = 0; j < (unsigned int)b->uriLen; j++) { 3683 + const XML_Char c = b->uri[j]; 3684 + if (!poolAppendChar(&parser->m_tempPool, c)) return XML_ERROR_NO_MEMORY; 3685 + } 3686 + 3687 + sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); 3688 + 3689 + while (*s++ != XML_T(ASCII_COLON)); 3690 + 3691 + sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); 3692 + 3693 + do { /* copies null terminator */ 3694 + if (!poolAppendChar(&parser->m_tempPool, *s)) return XML_ERROR_NO_MEMORY; 3695 + } while (*s++); 3696 + 3697 + uriHash = (unsigned long)sip24_final(&sip_state); 3698 + 3699 + { /* Check hash table for duplicate of expanded name (uriName). 3700 + Derived from code in lookup(parser, HASH_TABLE *table, ...). 3701 + */ 3702 + unsigned char step = 0; 3703 + unsigned long mask = nsAttsSize - 1; 3704 + j = uriHash & mask; /* index into hash table */ 3705 + while (parser->m_nsAtts[j].version == version) { 3706 + /* for speed we compare stored hash values first */ 3707 + if (uriHash == parser->m_nsAtts[j].hash) { 3708 + const XML_Char* s1 = poolStart(&parser->m_tempPool); 3709 + const XML_Char* s2 = parser->m_nsAtts[j].uriName; 3710 + /* s1 is null terminated, but not s2 */ 3711 + for (; *s1 == *s2 && *s1 != 0; s1++, s2++); 3712 + if (*s1 == 0) return XML_ERROR_DUPLICATE_ATTRIBUTE; 3713 + } 3714 + if (!step) step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower); 3715 + j < step ? (j += nsAttsSize - step) : (j -= step); 3716 + } 3717 + } 3718 + 3719 + if (parser->m_ns_triplets) { /* append namespace separator and prefix */ 3720 + parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; 3721 + s = b->prefix->name; 3722 + do { 3723 + if (!poolAppendChar(&parser->m_tempPool, *s)) return XML_ERROR_NO_MEMORY; 3724 + } while (*s++); 3725 + } 3726 + 3727 + /* store expanded name in attribute list */ 3728 + s = poolStart(&parser->m_tempPool); 3729 + poolFinish(&parser->m_tempPool); 3730 + appAtts[i] = s; 3731 + 3732 + /* fill empty slot with new version, uriName and hash value */ 3733 + parser->m_nsAtts[j].version = version; 3734 + parser->m_nsAtts[j].hash = uriHash; 3735 + parser->m_nsAtts[j].uriName = s; 3736 + 3737 + if (!--nPrefixes) { 3738 + i += 2; 3739 + break; 3740 + } 3741 + } else /* not prefixed */ 3742 + ((XML_Char*)s)[-1] = 0; /* clear flag */ 3743 + } 3744 + } 3745 + /* clear flags for the remaining attributes */ 3746 + for (; i < attIndex; i += 2) ((XML_Char*)(appAtts[i]))[-1] = 0; 3747 + for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) binding->attId->name[-1] = 0; 3748 + 3749 + if (!parser->m_ns) return XML_ERROR_NONE; 3750 + 3751 + /* expand the element type name */ 3752 + if (elementType->prefix) { 3753 + binding = elementType->prefix->binding; 3754 + if (!binding) return XML_ERROR_UNBOUND_PREFIX; 3755 + localPart = tagNamePtr->str; 3756 + while (*localPart++ != XML_T(ASCII_COLON)); 3757 + } else if (dtd->defaultPrefix.binding) { 3758 + binding = dtd->defaultPrefix.binding; 3759 + localPart = tagNamePtr->str; 3760 + } else 3761 + return XML_ERROR_NONE; 3762 + prefixLen = 0; 3763 + if (parser->m_ns_triplets && binding->prefix->name) { 3764 + while (binding->prefix->name[prefixLen++]); /* prefixLen includes null terminator */ 3765 + } 3766 + tagNamePtr->localPart = localPart; 3767 + tagNamePtr->uriLen = binding->uriLen; 3768 + tagNamePtr->prefix = binding->prefix->name; 3769 + tagNamePtr->prefixLen = prefixLen; 3770 + for (i = 0; localPart[i++];); /* i includes null terminator */ 3771 + 3772 + /* Detect and prevent integer overflow */ 3773 + if (binding->uriLen > INT_MAX - prefixLen || i > INT_MAX - (binding->uriLen + prefixLen)) { 3774 + return XML_ERROR_NO_MEMORY; 3775 + } 3776 + 3777 + n = i + binding->uriLen + prefixLen; 3778 + if (n > binding->uriAlloc) { 3779 + TAG* p; 3780 + 3781 + /* Detect and prevent integer overflow */ 3782 + if (n > INT_MAX - EXPAND_SPARE) { 3783 + return XML_ERROR_NO_MEMORY; 3784 + } 3785 + /* Detect and prevent integer overflow. 3786 + * The preprocessor guard addresses the "always false" warning 3787 + * from -Wtype-limits on platforms where 3788 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3789 + #if UINT_MAX >= SIZE_MAX 3790 + if ((unsigned)(n + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { 3791 + return XML_ERROR_NO_MEMORY; 3792 + } 3793 + #endif 3794 + 3795 + uri = MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); 3796 + if (!uri) return XML_ERROR_NO_MEMORY; 3797 + binding->uriAlloc = n + EXPAND_SPARE; 3798 + memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); 3799 + for (p = parser->m_tagStack; p; p = p->parent) 3800 + if (p->name.str == binding->uri) p->name.str = uri; 3801 + FREE(parser, binding->uri); 3802 + binding->uri = uri; 3803 + } 3804 + /* if m_namespaceSeparator != '\0' then uri includes it already */ 3805 + uri = binding->uri + binding->uriLen; 3806 + memcpy(uri, localPart, i * sizeof(XML_Char)); 3807 + /* we always have a namespace separator between localPart and prefix */ 3808 + if (prefixLen) { 3809 + uri += i - 1; 3810 + *uri = parser->m_namespaceSeparator; /* replace null terminator */ 3811 + memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); 3812 + } 3813 + tagNamePtr->str = binding->uri; 3814 + return XML_ERROR_NONE; 3815 + } 3816 + 3817 + static XML_Bool is_rfc3986_uri_char(XML_Char candidate) { 3818 + // For the RFC 3986 ANBF grammar see 3819 + // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A 3820 + 3821 + switch (candidate) { 3822 + // From rule "ALPHA" (uppercase half) 3823 + case 'A': 3824 + case 'B': 3825 + case 'C': 3826 + case 'D': 3827 + case 'E': 3828 + case 'F': 3829 + case 'G': 3830 + case 'H': 3831 + case 'I': 3832 + case 'J': 3833 + case 'K': 3834 + case 'L': 3835 + case 'M': 3836 + case 'N': 3837 + case 'O': 3838 + case 'P': 3839 + case 'Q': 3840 + case 'R': 3841 + case 'S': 3842 + case 'T': 3843 + case 'U': 3844 + case 'V': 3845 + case 'W': 3846 + case 'X': 3847 + case 'Y': 3848 + case 'Z': 3849 + 3850 + // From rule "ALPHA" (lowercase half) 3851 + case 'a': 3852 + case 'b': 3853 + case 'c': 3854 + case 'd': 3855 + case 'e': 3856 + case 'f': 3857 + case 'g': 3858 + case 'h': 3859 + case 'i': 3860 + case 'j': 3861 + case 'k': 3862 + case 'l': 3863 + case 'm': 3864 + case 'n': 3865 + case 'o': 3866 + case 'p': 3867 + case 'q': 3868 + case 'r': 3869 + case 's': 3870 + case 't': 3871 + case 'u': 3872 + case 'v': 3873 + case 'w': 3874 + case 'x': 3875 + case 'y': 3876 + case 'z': 3877 + 3878 + // From rule "DIGIT" 3879 + case '0': 3880 + case '1': 3881 + case '2': 3882 + case '3': 3883 + case '4': 3884 + case '5': 3885 + case '6': 3886 + case '7': 3887 + case '8': 3888 + case '9': 3889 + 3890 + // From rule "pct-encoded" 3891 + case '%': 3892 + 3893 + // From rule "unreserved" 3894 + case '-': 3895 + case '.': 3896 + case '_': 3897 + case '~': 3898 + 3899 + // From rule "gen-delims" 3900 + case ':': 3901 + case '/': 3902 + case '?': 3903 + case '#': 3904 + case '[': 3905 + case ']': 3906 + case '@': 3907 + 3908 + // From rule "sub-delims" 3909 + case '!': 3910 + case '$': 3911 + case '&': 3912 + case '\'': 3913 + case '(': 3914 + case ')': 3915 + case '*': 3916 + case '+': 3917 + case ',': 3918 + case ';': 3919 + case '=': 3920 + return XML_TRUE; 3921 + 3922 + default: 3923 + return XML_FALSE; 3924 + } 3925 + } 3926 + 3927 + /* addBinding() overwrites the value of prefix->binding without checking. 3928 + Therefore one must keep track of the old value outside of addBinding(). 3929 + */ 3930 + static enum XML_Error addBinding(XML_Parser parser, PREFIX* prefix, const ATTRIBUTE_ID* attId, const XML_Char* uri, 3931 + BINDING** bindingsPtr) { 3932 + // "http://www.w3.org/XML/1998/namespace" 3933 + static const XML_Char xmlNamespace[] = { 3934 + ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, 3935 + ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, 3936 + ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, 3937 + ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, 3938 + ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'}; 3939 + static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; 3940 + // "http://www.w3.org/2000/xmlns/" 3941 + static const XML_Char xmlnsNamespace[] = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 3942 + ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, 3943 + ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, 3944 + ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, 3945 + ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; 3946 + static const int xmlnsLen = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; 3947 + 3948 + XML_Bool mustBeXML = XML_FALSE; 3949 + XML_Bool isXML = XML_TRUE; 3950 + XML_Bool isXMLNS = XML_TRUE; 3951 + 3952 + BINDING* b; 3953 + int len; 3954 + 3955 + /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ 3956 + if (*uri == XML_T('\0') && prefix->name) return XML_ERROR_UNDECLARING_PREFIX; 3957 + 3958 + if (prefix->name && prefix->name[0] == XML_T(ASCII_x) && prefix->name[1] == XML_T(ASCII_m) && 3959 + prefix->name[2] == XML_T(ASCII_l)) { 3960 + /* Not allowed to bind xmlns */ 3961 + if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s) && prefix->name[5] == XML_T('\0')) 3962 + return XML_ERROR_RESERVED_PREFIX_XMLNS; 3963 + 3964 + if (prefix->name[3] == XML_T('\0')) mustBeXML = XML_TRUE; 3965 + } 3966 + 3967 + for (len = 0; uri[len]; len++) { 3968 + if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) isXML = XML_FALSE; 3969 + 3970 + if (!mustBeXML && isXMLNS && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) isXMLNS = XML_FALSE; 3971 + 3972 + // NOTE: While Expat does not validate namespace URIs against RFC 3986 3973 + // today (and is not REQUIRED to do so with regard to the XML 1.0 3974 + // namespaces specification) we have to at least make sure, that 3975 + // the application on top of Expat (that is likely splitting expanded 3976 + // element names ("qualified names") of form 3977 + // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces 3978 + // in its element handler code) cannot be confused by an attacker 3979 + // putting additional namespace separator characters into namespace 3980 + // declarations. That would be ambiguous and not to be expected. 3981 + // 3982 + // While the HTML API docs of function XML_ParserCreateNS have been 3983 + // advising against use of a namespace separator character that can 3984 + // appear in a URI for >20 years now, some widespread applications 3985 + // are using URI characters (':' (colon) in particular) for a 3986 + // namespace separator, in practice. To keep these applications 3987 + // functional, we only reject namespaces URIs containing the 3988 + // application-chosen namespace separator if the chosen separator 3989 + // is a non-URI character with regard to RFC 3986. 3990 + if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) && !is_rfc3986_uri_char(uri[len])) { 3991 + return XML_ERROR_SYNTAX; 3992 + } 3993 + } 3994 + isXML = isXML && len == xmlLen; 3995 + isXMLNS = isXMLNS && len == xmlnsLen; 3996 + 3997 + if (mustBeXML != isXML) return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML : XML_ERROR_RESERVED_NAMESPACE_URI; 3998 + 3999 + if (isXMLNS) return XML_ERROR_RESERVED_NAMESPACE_URI; 4000 + 4001 + if (parser->m_namespaceSeparator) len++; 4002 + if (parser->m_freeBindingList) { 4003 + b = parser->m_freeBindingList; 4004 + if (len > b->uriAlloc) { 4005 + /* Detect and prevent integer overflow */ 4006 + if (len > INT_MAX - EXPAND_SPARE) { 4007 + return XML_ERROR_NO_MEMORY; 4008 + } 4009 + 4010 + /* Detect and prevent integer overflow. 4011 + * The preprocessor guard addresses the "always false" warning 4012 + * from -Wtype-limits on platforms where 4013 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4014 + #if UINT_MAX >= SIZE_MAX 4015 + if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { 4016 + return XML_ERROR_NO_MEMORY; 4017 + } 4018 + #endif 4019 + 4020 + XML_Char* temp = REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4021 + if (temp == NULL) return XML_ERROR_NO_MEMORY; 4022 + b->uri = temp; 4023 + b->uriAlloc = len + EXPAND_SPARE; 4024 + } 4025 + parser->m_freeBindingList = b->nextTagBinding; 4026 + } else { 4027 + b = MALLOC(parser, sizeof(BINDING)); 4028 + if (!b) return XML_ERROR_NO_MEMORY; 4029 + 4030 + /* Detect and prevent integer overflow */ 4031 + if (len > INT_MAX - EXPAND_SPARE) { 4032 + return XML_ERROR_NO_MEMORY; 4033 + } 4034 + /* Detect and prevent integer overflow. 4035 + * The preprocessor guard addresses the "always false" warning 4036 + * from -Wtype-limits on platforms where 4037 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4038 + #if UINT_MAX >= SIZE_MAX 4039 + if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { 4040 + return XML_ERROR_NO_MEMORY; 4041 + } 4042 + #endif 4043 + 4044 + b->uri = MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4045 + if (!b->uri) { 4046 + FREE(parser, b); 4047 + return XML_ERROR_NO_MEMORY; 4048 + } 4049 + b->uriAlloc = len + EXPAND_SPARE; 4050 + } 4051 + b->uriLen = len; 4052 + memcpy(b->uri, uri, len * sizeof(XML_Char)); 4053 + if (parser->m_namespaceSeparator) b->uri[len - 1] = parser->m_namespaceSeparator; 4054 + b->prefix = prefix; 4055 + b->attId = attId; 4056 + b->prevPrefixBinding = prefix->binding; 4057 + /* NULL binding when default namespace undeclared */ 4058 + if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix) 4059 + prefix->binding = NULL; 4060 + else 4061 + prefix->binding = b; 4062 + b->nextTagBinding = *bindingsPtr; 4063 + *bindingsPtr = b; 4064 + /* if attId == NULL then we are not starting a namespace scope */ 4065 + if (attId && parser->m_startNamespaceDeclHandler) 4066 + parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, prefix->binding ? uri : 0); 4067 + return XML_ERROR_NONE; 4068 + } 4069 + 4070 + /* The idea here is to avoid using stack for each CDATA section when 4071 + the whole file is parsed with one call. 4072 + */ 4073 + static enum XML_Error PTRCALL cdataSectionProcessor(XML_Parser parser, const char* start, const char* end, 4074 + const char** endPtr) { 4075 + enum XML_Error result = doCdataSection(parser, parser->m_encoding, &start, end, endPtr, 4076 + (XML_Bool)!parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); 4077 + if (result != XML_ERROR_NONE) return result; 4078 + if (start) { 4079 + if (parser->m_parentParser) { /* we are parsing an external entity */ 4080 + parser->m_processor = externalEntityContentProcessor; 4081 + return externalEntityContentProcessor(parser, start, end, endPtr); 4082 + } else { 4083 + parser->m_processor = contentProcessor; 4084 + return contentProcessor(parser, start, end, endPtr); 4085 + } 4086 + } 4087 + return result; 4088 + } 4089 + 4090 + /* startPtr gets set to non-null if the section is closed, and to null if 4091 + the section is not yet closed. 4092 + */ 4093 + static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING* enc, const char** startPtr, const char* end, 4094 + const char** nextPtr, XML_Bool haveMore, enum XML_Account account) { 4095 + const char* s = *startPtr; 4096 + const char** eventPP; 4097 + const char** eventEndPP; 4098 + if (enc == parser->m_encoding) { 4099 + eventPP = &parser->m_eventPtr; 4100 + *eventPP = s; 4101 + eventEndPP = &parser->m_eventEndPtr; 4102 + } else { 4103 + eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4104 + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4105 + } 4106 + *eventPP = s; 4107 + *startPtr = NULL; 4108 + 4109 + for (;;) { 4110 + const char* next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4111 + int tok = XmlCdataSectionTok(enc, s, end, &next); 4112 + #if XML_GE == 1 4113 + if (!accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 4114 + accountingOnAbort(parser); 4115 + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4116 + } 4117 + #else 4118 + UNUSED_P(account); 4119 + #endif 4120 + *eventEndPP = next; 4121 + switch (tok) { 4122 + case XML_TOK_CDATA_SECT_CLOSE: 4123 + if (parser->m_endCdataSectionHandler) parser->m_endCdataSectionHandler(parser->m_handlerArg); 4124 + /* BEGIN disabled code */ 4125 + /* see comment under XML_TOK_CDATA_SECT_OPEN */ 4126 + else if ((0) && parser->m_characterDataHandler) 4127 + parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); 4128 + /* END disabled code */ 4129 + else if (parser->m_defaultHandler) 4130 + reportDefault(parser, enc, s, next); 4131 + *startPtr = next; 4132 + *nextPtr = next; 4133 + if (parser->m_parsingStatus.parsing == XML_FINISHED) 4134 + return XML_ERROR_ABORTED; 4135 + else 4136 + return XML_ERROR_NONE; 4137 + case XML_TOK_DATA_NEWLINE: 4138 + if (parser->m_characterDataHandler) { 4139 + XML_Char c = 0xA; 4140 + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 4141 + } else if (parser->m_defaultHandler) 4142 + reportDefault(parser, enc, s, next); 4143 + break; 4144 + case XML_TOK_DATA_CHARS: { 4145 + XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 4146 + if (charDataHandler) { 4147 + if (MUST_CONVERT(enc, s)) { 4148 + for (;;) { 4149 + ICHAR* dataPtr = (ICHAR*)parser->m_dataBuf; 4150 + const enum XML_Convert_Result convert_res = 4151 + XmlConvert(enc, &s, next, &dataPtr, (ICHAR*)parser->m_dataBufEnd); 4152 + *eventEndPP = next; 4153 + charDataHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR*)parser->m_dataBuf)); 4154 + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; 4155 + *eventPP = s; 4156 + } 4157 + } else 4158 + charDataHandler(parser->m_handlerArg, (const XML_Char*)s, 4159 + (int)((const XML_Char*)next - (const XML_Char*)s)); 4160 + } else if (parser->m_defaultHandler) 4161 + reportDefault(parser, enc, s, next); 4162 + } break; 4163 + case XML_TOK_INVALID: 4164 + *eventPP = next; 4165 + return XML_ERROR_INVALID_TOKEN; 4166 + case XML_TOK_PARTIAL_CHAR: 4167 + if (haveMore) { 4168 + *nextPtr = s; 4169 + return XML_ERROR_NONE; 4170 + } 4171 + return XML_ERROR_PARTIAL_CHAR; 4172 + case XML_TOK_PARTIAL: 4173 + case XML_TOK_NONE: 4174 + if (haveMore) { 4175 + *nextPtr = s; 4176 + return XML_ERROR_NONE; 4177 + } 4178 + return XML_ERROR_UNCLOSED_CDATA_SECTION; 4179 + default: 4180 + /* Every token returned by XmlCdataSectionTok() has its own 4181 + * explicit case, so this default case will never be executed. 4182 + * We retain it as a safety net and exclude it from the coverage 4183 + * statistics. 4184 + * 4185 + * LCOV_EXCL_START 4186 + */ 4187 + *eventPP = next; 4188 + return XML_ERROR_UNEXPECTED_STATE; 4189 + /* LCOV_EXCL_STOP */ 4190 + } 4191 + 4192 + switch (parser->m_parsingStatus.parsing) { 4193 + case XML_SUSPENDED: 4194 + *eventPP = next; 4195 + *nextPtr = next; 4196 + return XML_ERROR_NONE; 4197 + case XML_FINISHED: 4198 + *eventPP = next; 4199 + return XML_ERROR_ABORTED; 4200 + case XML_PARSING: 4201 + if (parser->m_reenter) { 4202 + return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE 4203 + } 4204 + /* Fall through */ 4205 + default:; 4206 + *eventPP = s = next; 4207 + } 4208 + } 4209 + /* not reached */ 4210 + } 4211 + 4212 + #ifdef XML_DTD 4213 + 4214 + /* The idea here is to avoid using stack for each IGNORE section when 4215 + the whole file is parsed with one call. 4216 + */ 4217 + static enum XML_Error PTRCALL ignoreSectionProcessor(XML_Parser parser, const char* start, const char* end, 4218 + const char** endPtr) { 4219 + enum XML_Error result = 4220 + doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer); 4221 + if (result != XML_ERROR_NONE) return result; 4222 + if (start) { 4223 + parser->m_processor = prologProcessor; 4224 + return prologProcessor(parser, start, end, endPtr); 4225 + } 4226 + return result; 4227 + } 4228 + 4229 + /* startPtr gets set to non-null is the section is closed, and to null 4230 + if the section is not yet closed. 4231 + */ 4232 + static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING* enc, const char** startPtr, const char* end, 4233 + const char** nextPtr, XML_Bool haveMore) { 4234 + const char* next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4235 + int tok; 4236 + const char* s = *startPtr; 4237 + const char** eventPP; 4238 + const char** eventEndPP; 4239 + if (enc == parser->m_encoding) { 4240 + eventPP = &parser->m_eventPtr; 4241 + *eventPP = s; 4242 + eventEndPP = &parser->m_eventEndPtr; 4243 + } else { 4244 + /* It's not entirely clear, but it seems the following two lines 4245 + * of code cannot be executed. The only occasions on which 'enc' 4246 + * is not 'encoding' are when this function is called 4247 + * from the internal entity processing, and IGNORE sections are an 4248 + * error in internal entities. 4249 + * 4250 + * Since it really isn't clear that this is true, we keep the code 4251 + * and just remove it from our coverage tests. 4252 + * 4253 + * LCOV_EXCL_START 4254 + */ 4255 + eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4256 + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4257 + /* LCOV_EXCL_STOP */ 4258 + } 4259 + *eventPP = s; 4260 + *startPtr = NULL; 4261 + tok = XmlIgnoreSectionTok(enc, s, end, &next); 4262 + #if XML_GE == 1 4263 + if (!accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { 4264 + accountingOnAbort(parser); 4265 + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4266 + } 4267 + #endif 4268 + *eventEndPP = next; 4269 + switch (tok) { 4270 + case XML_TOK_IGNORE_SECT: 4271 + if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); 4272 + *startPtr = next; 4273 + *nextPtr = next; 4274 + if (parser->m_parsingStatus.parsing == XML_FINISHED) 4275 + return XML_ERROR_ABORTED; 4276 + else 4277 + return XML_ERROR_NONE; 4278 + case XML_TOK_INVALID: 4279 + *eventPP = next; 4280 + return XML_ERROR_INVALID_TOKEN; 4281 + case XML_TOK_PARTIAL_CHAR: 4282 + if (haveMore) { 4283 + *nextPtr = s; 4284 + return XML_ERROR_NONE; 4285 + } 4286 + return XML_ERROR_PARTIAL_CHAR; 4287 + case XML_TOK_PARTIAL: 4288 + case XML_TOK_NONE: 4289 + if (haveMore) { 4290 + *nextPtr = s; 4291 + return XML_ERROR_NONE; 4292 + } 4293 + return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ 4294 + default: 4295 + /* All of the tokens that XmlIgnoreSectionTok() returns have 4296 + * explicit cases to handle them, so this default case is never 4297 + * executed. We keep it as a safety net anyway, and remove it 4298 + * from our test coverage statistics. 4299 + * 4300 + * LCOV_EXCL_START 4301 + */ 4302 + *eventPP = next; 4303 + return XML_ERROR_UNEXPECTED_STATE; 4304 + /* LCOV_EXCL_STOP */ 4305 + } 4306 + /* not reached */ 4307 + } 4308 + 4309 + #endif /* XML_DTD */ 4310 + 4311 + static enum XML_Error initializeEncoding(XML_Parser parser) { 4312 + const char* s; 4313 + #ifdef XML_UNICODE 4314 + char encodingBuf[128]; 4315 + /* See comments about `protocolEncodingName` in parserInit() */ 4316 + if (!parser->m_protocolEncodingName) 4317 + s = NULL; 4318 + else { 4319 + int i; 4320 + for (i = 0; parser->m_protocolEncodingName[i]; i++) { 4321 + if (i == sizeof(encodingBuf) - 1 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) { 4322 + encodingBuf[0] = '\0'; 4323 + break; 4324 + } 4325 + encodingBuf[i] = (char)parser->m_protocolEncodingName[i]; 4326 + } 4327 + encodingBuf[i] = '\0'; 4328 + s = encodingBuf; 4329 + } 4330 + #else 4331 + s = parser->m_protocolEncodingName; 4332 + #endif 4333 + if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(&parser->m_initEncoding, &parser->m_encoding, s)) 4334 + return XML_ERROR_NONE; 4335 + return handleUnknownEncoding(parser, parser->m_protocolEncodingName); 4336 + } 4337 + 4338 + static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char* s, const char* next) { 4339 + const char* encodingName = NULL; 4340 + const XML_Char* storedEncName = NULL; 4341 + const ENCODING* newEncoding = NULL; 4342 + const char* version = NULL; 4343 + const char* versionend = NULL; 4344 + const XML_Char* storedversion = NULL; 4345 + int standalone = -1; 4346 + 4347 + #if XML_GE == 1 4348 + if (!accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { 4349 + accountingOnAbort(parser); 4350 + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4351 + } 4352 + #endif 4353 + 4354 + if (!(parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(isGeneralTextEntity, parser->m_encoding, s, next, 4355 + &parser->m_eventPtr, &version, &versionend, &encodingName, 4356 + &newEncoding, &standalone)) { 4357 + if (isGeneralTextEntity) 4358 + return XML_ERROR_TEXT_DECL; 4359 + else 4360 + return XML_ERROR_XML_DECL; 4361 + } 4362 + if (!isGeneralTextEntity && standalone == 1) { 4363 + parser->m_dtd->standalone = XML_TRUE; 4364 + #ifdef XML_DTD 4365 + if (parser->m_paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) 4366 + parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 4367 + #endif /* XML_DTD */ 4368 + } 4369 + if (parser->m_xmlDeclHandler) { 4370 + if (encodingName != NULL) { 4371 + storedEncName = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, encodingName, 4372 + encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4373 + if (!storedEncName) return XML_ERROR_NO_MEMORY; 4374 + poolFinish(&parser->m_temp2Pool); 4375 + } 4376 + if (version) { 4377 + storedversion = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version, 4378 + versionend - parser->m_encoding->minBytesPerChar); 4379 + if (!storedversion) return XML_ERROR_NO_MEMORY; 4380 + } 4381 + parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, standalone); 4382 + } else if (parser->m_defaultHandler) 4383 + reportDefault(parser, parser->m_encoding, s, next); 4384 + if (parser->m_protocolEncodingName == NULL) { 4385 + if (newEncoding) { 4386 + /* Check that the specified encoding does not conflict with what 4387 + * the parser has already deduced. Do we have the same number 4388 + * of bytes in the smallest representation of a character? If 4389 + * this is UTF-16, is it the same endianness? 4390 + */ 4391 + if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar || 4392 + (newEncoding->minBytesPerChar == 2 && newEncoding != parser->m_encoding)) { 4393 + parser->m_eventPtr = encodingName; 4394 + return XML_ERROR_INCORRECT_ENCODING; 4395 + } 4396 + parser->m_encoding = newEncoding; 4397 + } else if (encodingName) { 4398 + enum XML_Error result; 4399 + if (!storedEncName) { 4400 + storedEncName = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, encodingName, 4401 + encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4402 + if (!storedEncName) return XML_ERROR_NO_MEMORY; 4403 + } 4404 + result = handleUnknownEncoding(parser, storedEncName); 4405 + poolClear(&parser->m_temp2Pool); 4406 + if (result == XML_ERROR_UNKNOWN_ENCODING) parser->m_eventPtr = encodingName; 4407 + return result; 4408 + } 4409 + } 4410 + 4411 + if (storedEncName || storedversion) poolClear(&parser->m_temp2Pool); 4412 + 4413 + return XML_ERROR_NONE; 4414 + } 4415 + 4416 + static enum XML_Error handleUnknownEncoding(XML_Parser parser, const XML_Char* encodingName) { 4417 + if (parser->m_unknownEncodingHandler) { 4418 + XML_Encoding info; 4419 + int i; 4420 + for (i = 0; i < 256; i++) info.map[i] = -1; 4421 + info.convert = NULL; 4422 + info.data = NULL; 4423 + info.release = NULL; 4424 + if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, encodingName, &info)) { 4425 + ENCODING* enc; 4426 + parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); 4427 + if (!parser->m_unknownEncodingMem) { 4428 + if (info.release) info.release(info.data); 4429 + return XML_ERROR_NO_MEMORY; 4430 + } 4431 + enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(parser->m_unknownEncodingMem, info.map, 4432 + info.convert, info.data); 4433 + if (enc) { 4434 + parser->m_unknownEncodingData = info.data; 4435 + parser->m_unknownEncodingRelease = info.release; 4436 + parser->m_encoding = enc; 4437 + return XML_ERROR_NONE; 4438 + } 4439 + } 4440 + if (info.release != NULL) info.release(info.data); 4441 + } 4442 + return XML_ERROR_UNKNOWN_ENCODING; 4443 + } 4444 + 4445 + static enum XML_Error PTRCALL prologInitProcessor(XML_Parser parser, const char* s, const char* end, 4446 + const char** nextPtr) { 4447 + enum XML_Error result = initializeEncoding(parser); 4448 + if (result != XML_ERROR_NONE) return result; 4449 + parser->m_processor = prologProcessor; 4450 + return prologProcessor(parser, s, end, nextPtr); 4451 + } 4452 + 4453 + #ifdef XML_DTD 4454 + 4455 + static enum XML_Error PTRCALL externalParEntInitProcessor(XML_Parser parser, const char* s, const char* end, 4456 + const char** nextPtr) { 4457 + enum XML_Error result = initializeEncoding(parser); 4458 + if (result != XML_ERROR_NONE) return result; 4459 + 4460 + /* we know now that XML_Parse(Buffer) has been called, 4461 + so we consider the external parameter entity read */ 4462 + parser->m_dtd->paramEntityRead = XML_TRUE; 4463 + 4464 + if (parser->m_prologState.inEntityValue) { 4465 + parser->m_processor = entityValueInitProcessor; 4466 + return entityValueInitProcessor(parser, s, end, nextPtr); 4467 + } else { 4468 + parser->m_processor = externalParEntProcessor; 4469 + return externalParEntProcessor(parser, s, end, nextPtr); 4470 + } 4471 + } 4472 + 4473 + static enum XML_Error PTRCALL entityValueInitProcessor(XML_Parser parser, const char* s, const char* end, 4474 + const char** nextPtr) { 4475 + int tok; 4476 + const char* start = s; 4477 + const char* next = start; 4478 + parser->m_eventPtr = start; 4479 + 4480 + for (;;) { 4481 + tok = XmlPrologTok(parser->m_encoding, start, end, &next); 4482 + /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: 4483 + - storeEntityValue 4484 + - processXmlDecl 4485 + */ 4486 + parser->m_eventEndPtr = next; 4487 + if (tok <= 0) { 4488 + if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4489 + *nextPtr = s; 4490 + return XML_ERROR_NONE; 4491 + } 4492 + switch (tok) { 4493 + case XML_TOK_INVALID: 4494 + return XML_ERROR_INVALID_TOKEN; 4495 + case XML_TOK_PARTIAL: 4496 + return XML_ERROR_UNCLOSED_TOKEN; 4497 + case XML_TOK_PARTIAL_CHAR: 4498 + return XML_ERROR_PARTIAL_CHAR; 4499 + case XML_TOK_NONE: /* start == end */ 4500 + default: 4501 + break; 4502 + } 4503 + /* found end of entity value - can store it now */ 4504 + return storeEntityValue(parser, parser->m_encoding, s, end, XML_ACCOUNT_DIRECT, NULL); 4505 + } else if (tok == XML_TOK_XML_DECL) { 4506 + enum XML_Error result; 4507 + result = processXmlDecl(parser, 0, start, next); 4508 + if (result != XML_ERROR_NONE) return result; 4509 + /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For 4510 + * that to happen, a parameter entity parsing handler must have attempted 4511 + * to suspend the parser, which fails and raises an error. The parser can 4512 + * be aborted, but can't be suspended. 4513 + */ 4514 + if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; 4515 + *nextPtr = next; 4516 + /* stop scanning for text declaration - we found one */ 4517 + parser->m_processor = entityValueProcessor; 4518 + return entityValueProcessor(parser, next, end, nextPtr); 4519 + } 4520 + /* XmlPrologTok has now set the encoding based on the BOM it found, and we 4521 + must move s and nextPtr forward to consume the BOM. 4522 + 4523 + If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we 4524 + would leave the BOM in the buffer and return. On the next call to this 4525 + function, our XmlPrologTok call would return XML_TOK_INVALID, since it 4526 + is not valid to have multiple BOMs. 4527 + */ 4528 + else if (tok == XML_TOK_BOM) { 4529 + #if XML_GE == 1 4530 + if (!accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { 4531 + accountingOnAbort(parser); 4532 + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4533 + } 4534 + #endif 4535 + 4536 + *nextPtr = next; 4537 + s = next; 4538 + } 4539 + /* If we get this token, we have the start of what might be a 4540 + normal tag, but not a declaration (i.e. it doesn't begin with 4541 + "<!"). In a DTD context, that isn't legal. 4542 + */ 4543 + else if (tok == XML_TOK_INSTANCE_START) { 4544 + *nextPtr = next; 4545 + return XML_ERROR_SYNTAX; 4546 + } 4547 + start = next; 4548 + parser->m_eventPtr = start; 4549 + } 4550 + } 4551 + 4552 + static enum XML_Error PTRCALL externalParEntProcessor(XML_Parser parser, const char* s, const char* end, 4553 + const char** nextPtr) { 4554 + const char* next = s; 4555 + int tok; 4556 + 4557 + tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4558 + if (tok <= 0) { 4559 + if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4560 + *nextPtr = s; 4561 + return XML_ERROR_NONE; 4562 + } 4563 + switch (tok) { 4564 + case XML_TOK_INVALID: 4565 + return XML_ERROR_INVALID_TOKEN; 4566 + case XML_TOK_PARTIAL: 4567 + return XML_ERROR_UNCLOSED_TOKEN; 4568 + case XML_TOK_PARTIAL_CHAR: 4569 + return XML_ERROR_PARTIAL_CHAR; 4570 + case XML_TOK_NONE: /* start == end */ 4571 + default: 4572 + break; 4573 + } 4574 + } 4575 + /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. 4576 + However, when parsing an external subset, doProlog will not accept a BOM 4577 + as valid, and report a syntax error, so we have to skip the BOM, and 4578 + account for the BOM bytes. 4579 + */ 4580 + else if (tok == XML_TOK_BOM) { 4581 + if (!accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { 4582 + accountingOnAbort(parser); 4583 + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4584 + } 4585 + 4586 + s = next; 4587 + tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4588 + } 4589 + 4590 + parser->m_processor = prologProcessor; 4591 + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 4592 + (XML_Bool)!parser->m_parsingStatus.finalBuffer, XML_TRUE, XML_ACCOUNT_DIRECT); 4593 + } 4594 + 4595 + static enum XML_Error PTRCALL entityValueProcessor(XML_Parser parser, const char* s, const char* end, 4596 + const char** nextPtr) { 4597 + const char* start = s; 4598 + const char* next = s; 4599 + const ENCODING* enc = parser->m_encoding; 4600 + int tok; 4601 + 4602 + for (;;) { 4603 + tok = XmlPrologTok(enc, start, end, &next); 4604 + /* Note: These bytes are accounted later in: 4605 + - storeEntityValue 4606 + */ 4607 + if (tok <= 0) { 4608 + if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4609 + *nextPtr = s; 4610 + return XML_ERROR_NONE; 4611 + } 4612 + switch (tok) { 4613 + case XML_TOK_INVALID: 4614 + return XML_ERROR_INVALID_TOKEN; 4615 + case XML_TOK_PARTIAL: 4616 + return XML_ERROR_UNCLOSED_TOKEN; 4617 + case XML_TOK_PARTIAL_CHAR: 4618 + return XML_ERROR_PARTIAL_CHAR; 4619 + case XML_TOK_NONE: /* start == end */ 4620 + default: 4621 + break; 4622 + } 4623 + /* found end of entity value - can store it now */ 4624 + return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL); 4625 + } 4626 + start = next; 4627 + } 4628 + } 4629 + 4630 + #endif /* XML_DTD */ 4631 + 4632 + static enum XML_Error PTRCALL prologProcessor(XML_Parser parser, const char* s, const char* end, const char** nextPtr) { 4633 + const char* next = s; 4634 + int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4635 + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 4636 + (XML_Bool)!parser->m_parsingStatus.finalBuffer, XML_TRUE, XML_ACCOUNT_DIRECT); 4637 + } 4638 + 4639 + static enum XML_Error doProlog(XML_Parser parser, const ENCODING* enc, const char* s, const char* end, int tok, 4640 + const char* next, const char** nextPtr, XML_Bool haveMore, XML_Bool allowClosingDoctype, 4641 + enum XML_Account account) { 4642 + #ifdef XML_DTD 4643 + static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; 4644 + #endif /* XML_DTD */ 4645 + static const XML_Char atypeCDATA[] = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; 4646 + static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'}; 4647 + static const XML_Char atypeIDREF[] = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; 4648 + static const XML_Char atypeIDREFS[] = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; 4649 + static const XML_Char atypeENTITY[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; 4650 + static const XML_Char atypeENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, 4651 + ASCII_I, ASCII_E, ASCII_S, '\0'}; 4652 + static const XML_Char atypeNMTOKEN[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; 4653 + static const XML_Char atypeNMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, 4654 + ASCII_E, ASCII_N, ASCII_S, '\0'}; 4655 + static const XML_Char notationPrefix[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, 4656 + ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; 4657 + static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; 4658 + static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; 4659 + 4660 + #ifndef XML_DTD 4661 + UNUSED_P(account); 4662 + #endif 4663 + 4664 + /* save one level of indirection */ 4665 + DTD* const dtd = parser->m_dtd; 4666 + 4667 + const char** eventPP; 4668 + const char** eventEndPP; 4669 + enum XML_Content_Quant quant; 4670 + 4671 + if (enc == parser->m_encoding) { 4672 + eventPP = &parser->m_eventPtr; 4673 + eventEndPP = &parser->m_eventEndPtr; 4674 + } else { 4675 + eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4676 + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4677 + } 4678 + 4679 + for (;;) { 4680 + int role; 4681 + XML_Bool handleDefault = XML_TRUE; 4682 + *eventPP = s; 4683 + *eventEndPP = next; 4684 + if (tok <= 0) { 4685 + if (haveMore && tok != XML_TOK_INVALID) { 4686 + *nextPtr = s; 4687 + return XML_ERROR_NONE; 4688 + } 4689 + switch (tok) { 4690 + case XML_TOK_INVALID: 4691 + *eventPP = next; 4692 + return XML_ERROR_INVALID_TOKEN; 4693 + case XML_TOK_PARTIAL: 4694 + return XML_ERROR_UNCLOSED_TOKEN; 4695 + case XML_TOK_PARTIAL_CHAR: 4696 + return XML_ERROR_PARTIAL_CHAR; 4697 + case -XML_TOK_PROLOG_S: 4698 + tok = -tok; 4699 + break; 4700 + case XML_TOK_NONE: 4701 + #ifdef XML_DTD 4702 + /* for internal PE NOT referenced between declarations */ 4703 + if (enc != parser->m_encoding && !parser->m_openInternalEntities->betweenDecl) { 4704 + *nextPtr = s; 4705 + return XML_ERROR_NONE; 4706 + } 4707 + /* WFC: PE Between Declarations - must check that PE contains 4708 + complete markup, not only for external PEs, but also for 4709 + internal PEs if the reference occurs between declarations. 4710 + */ 4711 + if (parser->m_isParamEntity || enc != parser->m_encoding) { 4712 + if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc) == XML_ROLE_ERROR) 4713 + return XML_ERROR_INCOMPLETE_PE; 4714 + *nextPtr = s; 4715 + return XML_ERROR_NONE; 4716 + } 4717 + #endif /* XML_DTD */ 4718 + return XML_ERROR_NO_ELEMENTS; 4719 + default: 4720 + tok = -tok; 4721 + next = end; 4722 + break; 4723 + } 4724 + } 4725 + role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); 4726 + #if XML_GE == 1 4727 + switch (role) { 4728 + case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor 4729 + case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl 4730 + #ifdef XML_DTD 4731 + case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl 4732 + #endif 4733 + break; 4734 + default: 4735 + if (!accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 4736 + accountingOnAbort(parser); 4737 + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4738 + } 4739 + } 4740 + #endif 4741 + switch (role) { 4742 + case XML_ROLE_XML_DECL: { 4743 + enum XML_Error result = processXmlDecl(parser, 0, s, next); 4744 + if (result != XML_ERROR_NONE) return result; 4745 + enc = parser->m_encoding; 4746 + handleDefault = XML_FALSE; 4747 + } break; 4748 + case XML_ROLE_DOCTYPE_NAME: 4749 + if (parser->m_startDoctypeDeclHandler) { 4750 + parser->m_doctypeName = poolStoreString(&parser->m_tempPool, enc, s, next); 4751 + if (!parser->m_doctypeName) return XML_ERROR_NO_MEMORY; 4752 + poolFinish(&parser->m_tempPool); 4753 + parser->m_doctypePubid = NULL; 4754 + handleDefault = XML_FALSE; 4755 + } 4756 + parser->m_doctypeSysid = NULL; /* always initialize to NULL */ 4757 + break; 4758 + case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: 4759 + if (parser->m_startDoctypeDeclHandler) { 4760 + parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 4761 + parser->m_doctypePubid, 1); 4762 + parser->m_doctypeName = NULL; 4763 + poolClear(&parser->m_tempPool); 4764 + handleDefault = XML_FALSE; 4765 + } 4766 + break; 4767 + #ifdef XML_DTD 4768 + case XML_ROLE_TEXT_DECL: { 4769 + enum XML_Error result = processXmlDecl(parser, 1, s, next); 4770 + if (result != XML_ERROR_NONE) return result; 4771 + enc = parser->m_encoding; 4772 + handleDefault = XML_FALSE; 4773 + } break; 4774 + #endif /* XML_DTD */ 4775 + case XML_ROLE_DOCTYPE_PUBLIC_ID: 4776 + #ifdef XML_DTD 4777 + parser->m_useForeignDTD = XML_FALSE; 4778 + parser->m_declEntity = (ENTITY*)lookup(parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 4779 + if (!parser->m_declEntity) return XML_ERROR_NO_MEMORY; 4780 + #endif /* XML_DTD */ 4781 + dtd->hasParamEntityRefs = XML_TRUE; 4782 + if (parser->m_startDoctypeDeclHandler) { 4783 + XML_Char* pubId; 4784 + if (!XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; 4785 + pubId = poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 4786 + if (!pubId) return XML_ERROR_NO_MEMORY; 4787 + normalizePublicId(pubId); 4788 + poolFinish(&parser->m_tempPool); 4789 + parser->m_doctypePubid = pubId; 4790 + handleDefault = XML_FALSE; 4791 + goto alreadyChecked; 4792 + } 4793 + /* fall through */ 4794 + case XML_ROLE_ENTITY_PUBLIC_ID: 4795 + if (!XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; 4796 + alreadyChecked: 4797 + if (dtd->keepProcessing && parser->m_declEntity) { 4798 + XML_Char* tem = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 4799 + if (!tem) return XML_ERROR_NO_MEMORY; 4800 + normalizePublicId(tem); 4801 + parser->m_declEntity->publicId = tem; 4802 + poolFinish(&dtd->pool); 4803 + /* Don't suppress the default handler if we fell through from 4804 + * the XML_ROLE_DOCTYPE_PUBLIC_ID case. 4805 + */ 4806 + if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID) handleDefault = XML_FALSE; 4807 + } 4808 + break; 4809 + case XML_ROLE_DOCTYPE_CLOSE: 4810 + if (allowClosingDoctype != XML_TRUE) { 4811 + /* Must not close doctype from within expanded parameter entities */ 4812 + return XML_ERROR_INVALID_TOKEN; 4813 + } 4814 + 4815 + if (parser->m_doctypeName) { 4816 + parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 4817 + parser->m_doctypePubid, 0); 4818 + poolClear(&parser->m_tempPool); 4819 + handleDefault = XML_FALSE; 4820 + } 4821 + /* parser->m_doctypeSysid will be non-NULL in the case of a previous 4822 + XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler 4823 + was not set, indicating an external subset 4824 + */ 4825 + #ifdef XML_DTD 4826 + if (parser->m_doctypeSysid || parser->m_useForeignDTD) { 4827 + XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 4828 + dtd->hasParamEntityRefs = XML_TRUE; 4829 + if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) { 4830 + ENTITY* entity = (ENTITY*)lookup(parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 4831 + if (!entity) { 4832 + /* The external subset name "#" will have already been 4833 + * inserted into the hash table at the start of the 4834 + * external entity parsing, so no allocation will happen 4835 + * and lookup() cannot fail. 4836 + */ 4837 + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 4838 + } 4839 + if (parser->m_useForeignDTD) entity->base = parser->m_curBase; 4840 + dtd->paramEntityRead = XML_FALSE; 4841 + if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg, 0, entity->base, 4842 + entity->systemId, entity->publicId)) 4843 + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 4844 + if (dtd->paramEntityRead) { 4845 + if (!dtd->standalone && parser->m_notStandaloneHandler && 4846 + !parser->m_notStandaloneHandler(parser->m_handlerArg)) 4847 + return XML_ERROR_NOT_STANDALONE; 4848 + } 4849 + /* if we didn't read the foreign DTD then this means that there 4850 + is no external subset and we must reset dtd->hasParamEntityRefs 4851 + */ 4852 + else if (!parser->m_doctypeSysid) 4853 + dtd->hasParamEntityRefs = hadParamEntityRefs; 4854 + /* end of DTD - no need to update dtd->keepProcessing */ 4855 + } 4856 + parser->m_useForeignDTD = XML_FALSE; 4857 + } 4858 + #endif /* XML_DTD */ 4859 + if (parser->m_endDoctypeDeclHandler) { 4860 + parser->m_endDoctypeDeclHandler(parser->m_handlerArg); 4861 + handleDefault = XML_FALSE; 4862 + } 4863 + break; 4864 + case XML_ROLE_INSTANCE_START: 4865 + #ifdef XML_DTD 4866 + /* if there is no DOCTYPE declaration then now is the 4867 + last chance to read the foreign DTD 4868 + */ 4869 + if (parser->m_useForeignDTD) { 4870 + XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 4871 + dtd->hasParamEntityRefs = XML_TRUE; 4872 + if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) { 4873 + ENTITY* entity = (ENTITY*)lookup(parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 4874 + if (!entity) return XML_ERROR_NO_MEMORY; 4875 + entity->base = parser->m_curBase; 4876 + dtd->paramEntityRead = XML_FALSE; 4877 + if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg, 0, entity->base, 4878 + entity->systemId, entity->publicId)) 4879 + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 4880 + if (dtd->paramEntityRead) { 4881 + if (!dtd->standalone && parser->m_notStandaloneHandler && 4882 + !parser->m_notStandaloneHandler(parser->m_handlerArg)) 4883 + return XML_ERROR_NOT_STANDALONE; 4884 + } 4885 + /* if we didn't read the foreign DTD then this means that there 4886 + is no external subset and we must reset dtd->hasParamEntityRefs 4887 + */ 4888 + else 4889 + dtd->hasParamEntityRefs = hadParamEntityRefs; 4890 + /* end of DTD - no need to update dtd->keepProcessing */ 4891 + } 4892 + } 4893 + #endif /* XML_DTD */ 4894 + parser->m_processor = contentProcessor; 4895 + return contentProcessor(parser, s, end, nextPtr); 4896 + case XML_ROLE_ATTLIST_ELEMENT_NAME: 4897 + parser->m_declElementType = getElementType(parser, enc, s, next); 4898 + if (!parser->m_declElementType) return XML_ERROR_NO_MEMORY; 4899 + goto checkAttListDeclHandler; 4900 + case XML_ROLE_ATTRIBUTE_NAME: 4901 + parser->m_declAttributeId = getAttributeId(parser, enc, s, next); 4902 + if (!parser->m_declAttributeId) return XML_ERROR_NO_MEMORY; 4903 + parser->m_declAttributeIsCdata = XML_FALSE; 4904 + parser->m_declAttributeType = NULL; 4905 + parser->m_declAttributeIsId = XML_FALSE; 4906 + goto checkAttListDeclHandler; 4907 + case XML_ROLE_ATTRIBUTE_TYPE_CDATA: 4908 + parser->m_declAttributeIsCdata = XML_TRUE; 4909 + parser->m_declAttributeType = atypeCDATA; 4910 + goto checkAttListDeclHandler; 4911 + case XML_ROLE_ATTRIBUTE_TYPE_ID: 4912 + parser->m_declAttributeIsId = XML_TRUE; 4913 + parser->m_declAttributeType = atypeID; 4914 + goto checkAttListDeclHandler; 4915 + case XML_ROLE_ATTRIBUTE_TYPE_IDREF: 4916 + parser->m_declAttributeType = atypeIDREF; 4917 + goto checkAttListDeclHandler; 4918 + case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: 4919 + parser->m_declAttributeType = atypeIDREFS; 4920 + goto checkAttListDeclHandler; 4921 + case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: 4922 + parser->m_declAttributeType = atypeENTITY; 4923 + goto checkAttListDeclHandler; 4924 + case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: 4925 + parser->m_declAttributeType = atypeENTITIES; 4926 + goto checkAttListDeclHandler; 4927 + case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: 4928 + parser->m_declAttributeType = atypeNMTOKEN; 4929 + goto checkAttListDeclHandler; 4930 + case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: 4931 + parser->m_declAttributeType = atypeNMTOKENS; 4932 + checkAttListDeclHandler: 4933 + if (dtd->keepProcessing && parser->m_attlistDeclHandler) handleDefault = XML_FALSE; 4934 + break; 4935 + case XML_ROLE_ATTRIBUTE_ENUM_VALUE: 4936 + case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: 4937 + if (dtd->keepProcessing && parser->m_attlistDeclHandler) { 4938 + const XML_Char* prefix; 4939 + if (parser->m_declAttributeType) { 4940 + prefix = enumValueSep; 4941 + } else { 4942 + prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix : enumValueStart); 4943 + } 4944 + if (!poolAppendString(&parser->m_tempPool, prefix)) return XML_ERROR_NO_MEMORY; 4945 + if (!poolAppend(&parser->m_tempPool, enc, s, next)) return XML_ERROR_NO_MEMORY; 4946 + parser->m_declAttributeType = parser->m_tempPool.start; 4947 + handleDefault = XML_FALSE; 4948 + } 4949 + break; 4950 + case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: 4951 + case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: 4952 + if (dtd->keepProcessing) { 4953 + if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId, parser->m_declAttributeIsCdata, 4954 + parser->m_declAttributeIsId, 0, parser)) 4955 + return XML_ERROR_NO_MEMORY; 4956 + if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 4957 + if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) || 4958 + (*parser->m_declAttributeType == XML_T(ASCII_N) && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 4959 + /* Enumerated or Notation type */ 4960 + if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) || 4961 + !poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 4962 + return XML_ERROR_NO_MEMORY; 4963 + parser->m_declAttributeType = parser->m_tempPool.start; 4964 + poolFinish(&parser->m_tempPool); 4965 + } 4966 + *eventEndPP = s; 4967 + parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, 4968 + parser->m_declAttributeId->name, parser->m_declAttributeType, 0, 4969 + role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); 4970 + handleDefault = XML_FALSE; 4971 + } 4972 + } 4973 + poolClear(&parser->m_tempPool); 4974 + break; 4975 + case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: 4976 + case XML_ROLE_FIXED_ATTRIBUTE_VALUE: 4977 + if (dtd->keepProcessing) { 4978 + const XML_Char* attVal; 4979 + enum XML_Error result = 4980 + storeAttributeValue(parser, enc, parser->m_declAttributeIsCdata, s + enc->minBytesPerChar, 4981 + next - enc->minBytesPerChar, &dtd->pool, XML_ACCOUNT_NONE); 4982 + if (result) return result; 4983 + attVal = poolStart(&dtd->pool); 4984 + poolFinish(&dtd->pool); 4985 + /* ID attributes aren't allowed to have a default */ 4986 + if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId, parser->m_declAttributeIsCdata, 4987 + XML_FALSE, attVal, parser)) 4988 + return XML_ERROR_NO_MEMORY; 4989 + if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 4990 + if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) || 4991 + (*parser->m_declAttributeType == XML_T(ASCII_N) && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 4992 + /* Enumerated or Notation type */ 4993 + if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) || 4994 + !poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 4995 + return XML_ERROR_NO_MEMORY; 4996 + parser->m_declAttributeType = parser->m_tempPool.start; 4997 + poolFinish(&parser->m_tempPool); 4998 + } 4999 + *eventEndPP = s; 5000 + parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, 5001 + parser->m_declAttributeId->name, parser->m_declAttributeType, attVal, 5002 + role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); 5003 + poolClear(&parser->m_tempPool); 5004 + handleDefault = XML_FALSE; 5005 + } 5006 + } 5007 + break; 5008 + case XML_ROLE_ENTITY_VALUE: 5009 + if (dtd->keepProcessing) { 5010 + #if XML_GE == 1 5011 + // This will store the given replacement text in 5012 + // parser->m_declEntity->textPtr. 5013 + enum XML_Error result = callStoreEntityValue(parser, enc, s + enc->minBytesPerChar, 5014 + next - enc->minBytesPerChar, XML_ACCOUNT_NONE); 5015 + if (parser->m_declEntity) { 5016 + parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); 5017 + parser->m_declEntity->textLen = (int)(poolLength(&dtd->entityValuePool)); 5018 + poolFinish(&dtd->entityValuePool); 5019 + if (parser->m_entityDeclHandler) { 5020 + *eventEndPP = s; 5021 + parser->m_entityDeclHandler(parser->m_handlerArg, parser->m_declEntity->name, 5022 + parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5023 + parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5024 + handleDefault = XML_FALSE; 5025 + } 5026 + } else 5027 + poolDiscard(&dtd->entityValuePool); 5028 + if (result != XML_ERROR_NONE) return result; 5029 + #else 5030 + // This will store "&amp;entity123;" in parser->m_declEntity->textPtr 5031 + // to end up as "&entity123;" in the handler. 5032 + if (parser->m_declEntity != NULL) { 5033 + const enum XML_Error result = storeSelfEntityValue(parser, parser->m_declEntity); 5034 + if (result != XML_ERROR_NONE) return result; 5035 + 5036 + if (parser->m_entityDeclHandler) { 5037 + *eventEndPP = s; 5038 + parser->m_entityDeclHandler(parser->m_handlerArg, parser->m_declEntity->name, 5039 + parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5040 + parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5041 + handleDefault = XML_FALSE; 5042 + } 5043 + } 5044 + #endif 5045 + } 5046 + break; 5047 + case XML_ROLE_DOCTYPE_SYSTEM_ID: 5048 + #ifdef XML_DTD 5049 + parser->m_useForeignDTD = XML_FALSE; 5050 + #endif /* XML_DTD */ 5051 + dtd->hasParamEntityRefs = XML_TRUE; 5052 + if (parser->m_startDoctypeDeclHandler) { 5053 + parser->m_doctypeSysid = 5054 + poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 5055 + if (parser->m_doctypeSysid == NULL) return XML_ERROR_NO_MEMORY; 5056 + poolFinish(&parser->m_tempPool); 5057 + handleDefault = XML_FALSE; 5058 + } 5059 + #ifdef XML_DTD 5060 + else 5061 + /* use externalSubsetName to make parser->m_doctypeSysid non-NULL 5062 + for the case where no parser->m_startDoctypeDeclHandler is set */ 5063 + parser->m_doctypeSysid = externalSubsetName; 5064 + #endif /* XML_DTD */ 5065 + if (!dtd->standalone 5066 + #ifdef XML_DTD 5067 + && !parser->m_paramEntityParsing 5068 + #endif /* XML_DTD */ 5069 + && parser->m_notStandaloneHandler && !parser->m_notStandaloneHandler(parser->m_handlerArg)) 5070 + return XML_ERROR_NOT_STANDALONE; 5071 + #ifndef XML_DTD 5072 + break; 5073 + #else /* XML_DTD */ 5074 + if (!parser->m_declEntity) { 5075 + parser->m_declEntity = (ENTITY*)lookup(parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 5076 + if (!parser->m_declEntity) return XML_ERROR_NO_MEMORY; 5077 + parser->m_declEntity->publicId = NULL; 5078 + } 5079 + #endif /* XML_DTD */ 5080 + /* fall through */ 5081 + case XML_ROLE_ENTITY_SYSTEM_ID: 5082 + if (dtd->keepProcessing && parser->m_declEntity) { 5083 + parser->m_declEntity->systemId = 5084 + poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 5085 + if (!parser->m_declEntity->systemId) return XML_ERROR_NO_MEMORY; 5086 + parser->m_declEntity->base = parser->m_curBase; 5087 + poolFinish(&dtd->pool); 5088 + /* Don't suppress the default handler if we fell through from 5089 + * the XML_ROLE_DOCTYPE_SYSTEM_ID case. 5090 + */ 5091 + if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID) handleDefault = XML_FALSE; 5092 + } 5093 + break; 5094 + case XML_ROLE_ENTITY_COMPLETE: 5095 + #if XML_GE == 0 5096 + // This will store "&amp;entity123;" in entity->textPtr 5097 + // to end up as "&entity123;" in the handler. 5098 + if (parser->m_declEntity != NULL) { 5099 + const enum XML_Error result = storeSelfEntityValue(parser, parser->m_declEntity); 5100 + if (result != XML_ERROR_NONE) return result; 5101 + } 5102 + #endif 5103 + if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) { 5104 + *eventEndPP = s; 5105 + parser->m_entityDeclHandler(parser->m_handlerArg, parser->m_declEntity->name, parser->m_declEntity->is_param, 5106 + 0, 0, parser->m_declEntity->base, parser->m_declEntity->systemId, 5107 + parser->m_declEntity->publicId, 0); 5108 + handleDefault = XML_FALSE; 5109 + } 5110 + break; 5111 + case XML_ROLE_ENTITY_NOTATION_NAME: 5112 + if (dtd->keepProcessing && parser->m_declEntity) { 5113 + parser->m_declEntity->notation = poolStoreString(&dtd->pool, enc, s, next); 5114 + if (!parser->m_declEntity->notation) return XML_ERROR_NO_MEMORY; 5115 + poolFinish(&dtd->pool); 5116 + if (parser->m_unparsedEntityDeclHandler) { 5117 + *eventEndPP = s; 5118 + parser->m_unparsedEntityDeclHandler(parser->m_handlerArg, parser->m_declEntity->name, 5119 + parser->m_declEntity->base, parser->m_declEntity->systemId, 5120 + parser->m_declEntity->publicId, parser->m_declEntity->notation); 5121 + handleDefault = XML_FALSE; 5122 + } else if (parser->m_entityDeclHandler) { 5123 + *eventEndPP = s; 5124 + parser->m_entityDeclHandler(parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, 5125 + parser->m_declEntity->base, parser->m_declEntity->systemId, 5126 + parser->m_declEntity->publicId, parser->m_declEntity->notation); 5127 + handleDefault = XML_FALSE; 5128 + } 5129 + } 5130 + break; 5131 + case XML_ROLE_GENERAL_ENTITY_NAME: { 5132 + if (XmlPredefinedEntityName(enc, s, next)) { 5133 + parser->m_declEntity = NULL; 5134 + break; 5135 + } 5136 + if (dtd->keepProcessing) { 5137 + const XML_Char* name = poolStoreString(&dtd->pool, enc, s, next); 5138 + if (!name) return XML_ERROR_NO_MEMORY; 5139 + parser->m_declEntity = (ENTITY*)lookup(parser, &dtd->generalEntities, name, sizeof(ENTITY)); 5140 + if (!parser->m_declEntity) return XML_ERROR_NO_MEMORY; 5141 + if (parser->m_declEntity->name != name) { 5142 + poolDiscard(&dtd->pool); 5143 + parser->m_declEntity = NULL; 5144 + } else { 5145 + poolFinish(&dtd->pool); 5146 + parser->m_declEntity->publicId = NULL; 5147 + parser->m_declEntity->is_param = XML_FALSE; 5148 + /* if we have a parent parser or are reading an internal parameter 5149 + entity, then the entity declaration is not considered "internal" 5150 + */ 5151 + parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities); 5152 + if (parser->m_entityDeclHandler) handleDefault = XML_FALSE; 5153 + } 5154 + } else { 5155 + poolDiscard(&dtd->pool); 5156 + parser->m_declEntity = NULL; 5157 + } 5158 + } break; 5159 + case XML_ROLE_PARAM_ENTITY_NAME: 5160 + #ifdef XML_DTD 5161 + if (dtd->keepProcessing) { 5162 + const XML_Char* name = poolStoreString(&dtd->pool, enc, s, next); 5163 + if (!name) return XML_ERROR_NO_MEMORY; 5164 + parser->m_declEntity = (ENTITY*)lookup(parser, &dtd->paramEntities, name, sizeof(ENTITY)); 5165 + if (!parser->m_declEntity) return XML_ERROR_NO_MEMORY; 5166 + if (parser->m_declEntity->name != name) { 5167 + poolDiscard(&dtd->pool); 5168 + parser->m_declEntity = NULL; 5169 + } else { 5170 + poolFinish(&dtd->pool); 5171 + parser->m_declEntity->publicId = NULL; 5172 + parser->m_declEntity->is_param = XML_TRUE; 5173 + /* if we have a parent parser or are reading an internal parameter 5174 + entity, then the entity declaration is not considered "internal" 5175 + */ 5176 + parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities); 5177 + if (parser->m_entityDeclHandler) handleDefault = XML_FALSE; 5178 + } 5179 + } else { 5180 + poolDiscard(&dtd->pool); 5181 + parser->m_declEntity = NULL; 5182 + } 5183 + #else /* not XML_DTD */ 5184 + parser->m_declEntity = NULL; 5185 + #endif /* XML_DTD */ 5186 + break; 5187 + case XML_ROLE_NOTATION_NAME: 5188 + parser->m_declNotationPublicId = NULL; 5189 + parser->m_declNotationName = NULL; 5190 + if (parser->m_notationDeclHandler) { 5191 + parser->m_declNotationName = poolStoreString(&parser->m_tempPool, enc, s, next); 5192 + if (!parser->m_declNotationName) return XML_ERROR_NO_MEMORY; 5193 + poolFinish(&parser->m_tempPool); 5194 + handleDefault = XML_FALSE; 5195 + } 5196 + break; 5197 + case XML_ROLE_NOTATION_PUBLIC_ID: 5198 + if (!XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; 5199 + if (parser->m_declNotationName) { /* means m_notationDeclHandler != NULL */ 5200 + XML_Char* tem = 5201 + poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 5202 + if (!tem) return XML_ERROR_NO_MEMORY; 5203 + normalizePublicId(tem); 5204 + parser->m_declNotationPublicId = tem; 5205 + poolFinish(&parser->m_tempPool); 5206 + handleDefault = XML_FALSE; 5207 + } 5208 + break; 5209 + case XML_ROLE_NOTATION_SYSTEM_ID: 5210 + if (parser->m_declNotationName && parser->m_notationDeclHandler) { 5211 + const XML_Char* systemId = 5212 + poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 5213 + if (!systemId) return XML_ERROR_NO_MEMORY; 5214 + *eventEndPP = s; 5215 + parser->m_notationDeclHandler(parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, systemId, 5216 + parser->m_declNotationPublicId); 5217 + handleDefault = XML_FALSE; 5218 + } 5219 + poolClear(&parser->m_tempPool); 5220 + break; 5221 + case XML_ROLE_NOTATION_NO_SYSTEM_ID: 5222 + if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { 5223 + *eventEndPP = s; 5224 + parser->m_notationDeclHandler(parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 0, 5225 + parser->m_declNotationPublicId); 5226 + handleDefault = XML_FALSE; 5227 + } 5228 + poolClear(&parser->m_tempPool); 5229 + break; 5230 + case XML_ROLE_ERROR: 5231 + switch (tok) { 5232 + case XML_TOK_PARAM_ENTITY_REF: 5233 + /* PE references in internal subset are 5234 + not allowed within declarations. */ 5235 + return XML_ERROR_PARAM_ENTITY_REF; 5236 + case XML_TOK_XML_DECL: 5237 + return XML_ERROR_MISPLACED_XML_PI; 5238 + default: 5239 + return XML_ERROR_SYNTAX; 5240 + } 5241 + #ifdef XML_DTD 5242 + case XML_ROLE_IGNORE_SECT: { 5243 + enum XML_Error result; 5244 + if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); 5245 + handleDefault = XML_FALSE; 5246 + result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); 5247 + if (result != XML_ERROR_NONE) 5248 + return result; 5249 + else if (!next) { 5250 + parser->m_processor = ignoreSectionProcessor; 5251 + return result; 5252 + } 5253 + } break; 5254 + #endif /* XML_DTD */ 5255 + case XML_ROLE_GROUP_OPEN: 5256 + if (parser->m_prologState.level >= parser->m_groupSize) { 5257 + if (parser->m_groupSize) { 5258 + { 5259 + /* Detect and prevent integer overflow */ 5260 + if (parser->m_groupSize > (unsigned int)(-1) / 2u) { 5261 + return XML_ERROR_NO_MEMORY; 5262 + } 5263 + 5264 + char* const new_connector = REALLOC(parser, parser->m_groupConnector, parser->m_groupSize *= 2); 5265 + if (new_connector == NULL) { 5266 + parser->m_groupSize /= 2; 5267 + return XML_ERROR_NO_MEMORY; 5268 + } 5269 + parser->m_groupConnector = new_connector; 5270 + } 5271 + 5272 + if (dtd->scaffIndex) { 5273 + /* Detect and prevent integer overflow. 5274 + * The preprocessor guard addresses the "always false" warning 5275 + * from -Wtype-limits on platforms where 5276 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 5277 + #if UINT_MAX >= SIZE_MAX 5278 + if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { 5279 + parser->m_groupSize /= 2; 5280 + return XML_ERROR_NO_MEMORY; 5281 + } 5282 + #endif 5283 + 5284 + int* const new_scaff_index = REALLOC(parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); 5285 + if (new_scaff_index == NULL) { 5286 + parser->m_groupSize /= 2; 5287 + return XML_ERROR_NO_MEMORY; 5288 + } 5289 + dtd->scaffIndex = new_scaff_index; 5290 + } 5291 + } else { 5292 + parser->m_groupConnector = MALLOC(parser, parser->m_groupSize = 32); 5293 + if (!parser->m_groupConnector) { 5294 + parser->m_groupSize = 0; 5295 + return XML_ERROR_NO_MEMORY; 5296 + } 5297 + } 5298 + } 5299 + parser->m_groupConnector[parser->m_prologState.level] = 0; 5300 + if (dtd->in_eldecl) { 5301 + int myindex = nextScaffoldPart(parser); 5302 + if (myindex < 0) return XML_ERROR_NO_MEMORY; 5303 + assert(dtd->scaffIndex != NULL); 5304 + dtd->scaffIndex[dtd->scaffLevel] = myindex; 5305 + dtd->scaffLevel++; 5306 + dtd->scaffold[myindex].type = XML_CTYPE_SEQ; 5307 + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; 5308 + } 5309 + break; 5310 + case XML_ROLE_GROUP_SEQUENCE: 5311 + if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE) return XML_ERROR_SYNTAX; 5312 + parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA; 5313 + if (dtd->in_eldecl && parser->m_elementDeclHandler) handleDefault = XML_FALSE; 5314 + break; 5315 + case XML_ROLE_GROUP_CHOICE: 5316 + if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA) return XML_ERROR_SYNTAX; 5317 + if (dtd->in_eldecl && !parser->m_groupConnector[parser->m_prologState.level] && 5318 + (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type != XML_CTYPE_MIXED)) { 5319 + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type = XML_CTYPE_CHOICE; 5320 + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; 5321 + } 5322 + parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE; 5323 + break; 5324 + case XML_ROLE_PARAM_ENTITY_REF: 5325 + #ifdef XML_DTD 5326 + case XML_ROLE_INNER_PARAM_ENTITY_REF: 5327 + dtd->hasParamEntityRefs = XML_TRUE; 5328 + if (!parser->m_paramEntityParsing) 5329 + dtd->keepProcessing = dtd->standalone; 5330 + else { 5331 + const XML_Char* name; 5332 + ENTITY* entity; 5333 + name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 5334 + if (!name) return XML_ERROR_NO_MEMORY; 5335 + entity = (ENTITY*)lookup(parser, &dtd->paramEntities, name, 0); 5336 + poolDiscard(&dtd->pool); 5337 + /* first, determine if a check for an existing declaration is needed; 5338 + if yes, check that the entity exists, and that it is internal, 5339 + otherwise call the skipped entity handler 5340 + */ 5341 + if (parser->m_prologState.documentEntity && 5342 + (dtd->standalone ? !parser->m_openInternalEntities : !dtd->hasParamEntityRefs)) { 5343 + if (!entity) 5344 + return XML_ERROR_UNDEFINED_ENTITY; 5345 + else if (!entity->is_internal) { 5346 + /* It's hard to exhaustively search the code to be sure, 5347 + * but there doesn't seem to be a way of executing the 5348 + * following line. There are two cases: 5349 + * 5350 + * If 'standalone' is false, the DTD must have no 5351 + * parameter entities or we wouldn't have passed the outer 5352 + * 'if' statement. That means the only entity in the hash 5353 + * table is the external subset name "#" which cannot be 5354 + * given as a parameter entity name in XML syntax, so the 5355 + * lookup must have returned NULL and we don't even reach 5356 + * the test for an internal entity. 5357 + * 5358 + * If 'standalone' is true, it does not seem to be 5359 + * possible to create entities taking this code path that 5360 + * are not internal entities, so fail the test above. 5361 + * 5362 + * Because this analysis is very uncertain, the code is 5363 + * being left in place and merely removed from the 5364 + * coverage test statistics. 5365 + */ 5366 + return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ 5367 + } 5368 + } else if (!entity) { 5369 + dtd->keepProcessing = dtd->standalone; 5370 + /* cannot report skipped entities in declarations */ 5371 + if ((role == XML_ROLE_PARAM_ENTITY_REF) && parser->m_skippedEntityHandler) { 5372 + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); 5373 + handleDefault = XML_FALSE; 5374 + } 5375 + break; 5376 + } 5377 + if (entity->open) return XML_ERROR_RECURSIVE_ENTITY_REF; 5378 + if (entity->textPtr) { 5379 + enum XML_Error result; 5380 + XML_Bool betweenDecl = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); 5381 + result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL); 5382 + if (result != XML_ERROR_NONE) return result; 5383 + handleDefault = XML_FALSE; 5384 + break; 5385 + } 5386 + if (parser->m_externalEntityRefHandler) { 5387 + dtd->paramEntityRead = XML_FALSE; 5388 + entity->open = XML_TRUE; 5389 + entityTrackingOnOpen(parser, entity, __LINE__); 5390 + if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg, 0, entity->base, 5391 + entity->systemId, entity->publicId)) { 5392 + entityTrackingOnClose(parser, entity, __LINE__); 5393 + entity->open = XML_FALSE; 5394 + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 5395 + } 5396 + entityTrackingOnClose(parser, entity, __LINE__); 5397 + entity->open = XML_FALSE; 5398 + handleDefault = XML_FALSE; 5399 + if (!dtd->paramEntityRead) { 5400 + dtd->keepProcessing = dtd->standalone; 5401 + break; 5402 + } 5403 + } else { 5404 + dtd->keepProcessing = dtd->standalone; 5405 + break; 5406 + } 5407 + } 5408 + #endif /* XML_DTD */ 5409 + if (!dtd->standalone && parser->m_notStandaloneHandler && !parser->m_notStandaloneHandler(parser->m_handlerArg)) 5410 + return XML_ERROR_NOT_STANDALONE; 5411 + break; 5412 + 5413 + /* Element declaration stuff */ 5414 + 5415 + case XML_ROLE_ELEMENT_NAME: 5416 + if (parser->m_elementDeclHandler) { 5417 + parser->m_declElementType = getElementType(parser, enc, s, next); 5418 + if (!parser->m_declElementType) return XML_ERROR_NO_MEMORY; 5419 + dtd->scaffLevel = 0; 5420 + dtd->scaffCount = 0; 5421 + dtd->in_eldecl = XML_TRUE; 5422 + handleDefault = XML_FALSE; 5423 + } 5424 + break; 5425 + 5426 + case XML_ROLE_CONTENT_ANY: 5427 + case XML_ROLE_CONTENT_EMPTY: 5428 + if (dtd->in_eldecl) { 5429 + if (parser->m_elementDeclHandler) { 5430 + // NOTE: We are avoiding MALLOC(..) here to so that 5431 + // applications that are not using XML_FreeContentModel but 5432 + // plain free(..) or .free_fcn() to free the content model's 5433 + // memory are safe. 5434 + XML_Content* content = parser->m_mem.malloc_fcn(sizeof(XML_Content)); 5435 + if (!content) return XML_ERROR_NO_MEMORY; 5436 + content->quant = XML_CQUANT_NONE; 5437 + content->name = NULL; 5438 + content->numchildren = 0; 5439 + content->children = NULL; 5440 + content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY : XML_CTYPE_EMPTY); 5441 + *eventEndPP = s; 5442 + parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, content); 5443 + handleDefault = XML_FALSE; 5444 + } 5445 + dtd->in_eldecl = XML_FALSE; 5446 + } 5447 + break; 5448 + 5449 + case XML_ROLE_CONTENT_PCDATA: 5450 + if (dtd->in_eldecl) { 5451 + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type = XML_CTYPE_MIXED; 5452 + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; 5453 + } 5454 + break; 5455 + 5456 + case XML_ROLE_CONTENT_ELEMENT: 5457 + quant = XML_CQUANT_NONE; 5458 + goto elementContent; 5459 + case XML_ROLE_CONTENT_ELEMENT_OPT: 5460 + quant = XML_CQUANT_OPT; 5461 + goto elementContent; 5462 + case XML_ROLE_CONTENT_ELEMENT_REP: 5463 + quant = XML_CQUANT_REP; 5464 + goto elementContent; 5465 + case XML_ROLE_CONTENT_ELEMENT_PLUS: 5466 + quant = XML_CQUANT_PLUS; 5467 + elementContent: 5468 + if (dtd->in_eldecl) { 5469 + ELEMENT_TYPE* el; 5470 + const XML_Char* name; 5471 + size_t nameLen; 5472 + const char* nxt = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); 5473 + int myindex = nextScaffoldPart(parser); 5474 + if (myindex < 0) return XML_ERROR_NO_MEMORY; 5475 + dtd->scaffold[myindex].type = XML_CTYPE_NAME; 5476 + dtd->scaffold[myindex].quant = quant; 5477 + el = getElementType(parser, enc, s, nxt); 5478 + if (!el) return XML_ERROR_NO_MEMORY; 5479 + name = el->name; 5480 + dtd->scaffold[myindex].name = name; 5481 + nameLen = 0; 5482 + while (name[nameLen++]); 5483 + 5484 + /* Detect and prevent integer overflow */ 5485 + if (nameLen > UINT_MAX - dtd->contentStringLen) { 5486 + return XML_ERROR_NO_MEMORY; 5487 + } 5488 + 5489 + dtd->contentStringLen += (unsigned)nameLen; 5490 + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; 5491 + } 5492 + break; 5493 + 5494 + case XML_ROLE_GROUP_CLOSE: 5495 + quant = XML_CQUANT_NONE; 5496 + goto closeGroup; 5497 + case XML_ROLE_GROUP_CLOSE_OPT: 5498 + quant = XML_CQUANT_OPT; 5499 + goto closeGroup; 5500 + case XML_ROLE_GROUP_CLOSE_REP: 5501 + quant = XML_CQUANT_REP; 5502 + goto closeGroup; 5503 + case XML_ROLE_GROUP_CLOSE_PLUS: 5504 + quant = XML_CQUANT_PLUS; 5505 + closeGroup: 5506 + if (dtd->in_eldecl) { 5507 + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; 5508 + dtd->scaffLevel--; 5509 + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; 5510 + if (dtd->scaffLevel == 0) { 5511 + if (!handleDefault) { 5512 + XML_Content* model = build_model(parser); 5513 + if (!model) return XML_ERROR_NO_MEMORY; 5514 + *eventEndPP = s; 5515 + parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, model); 5516 + } 5517 + dtd->in_eldecl = XML_FALSE; 5518 + dtd->contentStringLen = 0; 5519 + } 5520 + } 5521 + break; 5522 + /* End element declaration stuff */ 5523 + 5524 + case XML_ROLE_PI: 5525 + if (!reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; 5526 + handleDefault = XML_FALSE; 5527 + break; 5528 + case XML_ROLE_COMMENT: 5529 + if (!reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; 5530 + handleDefault = XML_FALSE; 5531 + break; 5532 + case XML_ROLE_NONE: 5533 + switch (tok) { 5534 + case XML_TOK_BOM: 5535 + handleDefault = XML_FALSE; 5536 + break; 5537 + } 5538 + break; 5539 + case XML_ROLE_DOCTYPE_NONE: 5540 + if (parser->m_startDoctypeDeclHandler) handleDefault = XML_FALSE; 5541 + break; 5542 + case XML_ROLE_ENTITY_NONE: 5543 + if (dtd->keepProcessing && parser->m_entityDeclHandler) handleDefault = XML_FALSE; 5544 + break; 5545 + case XML_ROLE_NOTATION_NONE: 5546 + if (parser->m_notationDeclHandler) handleDefault = XML_FALSE; 5547 + break; 5548 + case XML_ROLE_ATTLIST_NONE: 5549 + if (dtd->keepProcessing && parser->m_attlistDeclHandler) handleDefault = XML_FALSE; 5550 + break; 5551 + case XML_ROLE_ELEMENT_NONE: 5552 + if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; 5553 + break; 5554 + } /* end of big switch */ 5555 + 5556 + if (handleDefault && parser->m_defaultHandler) reportDefault(parser, enc, s, next); 5557 + 5558 + switch (parser->m_parsingStatus.parsing) { 5559 + case XML_SUSPENDED: 5560 + *nextPtr = next; 5561 + return XML_ERROR_NONE; 5562 + case XML_FINISHED: 5563 + return XML_ERROR_ABORTED; 5564 + case XML_PARSING: 5565 + if (parser->m_reenter) { 5566 + *nextPtr = next; 5567 + return XML_ERROR_NONE; 5568 + } 5569 + /* Fall through */ 5570 + default: 5571 + s = next; 5572 + tok = XmlPrologTok(enc, s, end, &next); 5573 + } 5574 + } 5575 + /* not reached */ 5576 + } 5577 + 5578 + static enum XML_Error PTRCALL epilogProcessor(XML_Parser parser, const char* s, const char* end, const char** nextPtr) { 5579 + parser->m_processor = epilogProcessor; 5580 + parser->m_eventPtr = s; 5581 + for (;;) { 5582 + const char* next = NULL; 5583 + int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5584 + #if XML_GE == 1 5585 + if (!accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { 5586 + accountingOnAbort(parser); 5587 + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5588 + } 5589 + #endif 5590 + parser->m_eventEndPtr = next; 5591 + switch (tok) { 5592 + /* report partial linebreak - it might be the last token */ 5593 + case -XML_TOK_PROLOG_S: 5594 + if (parser->m_defaultHandler) { 5595 + reportDefault(parser, parser->m_encoding, s, next); 5596 + if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; 5597 + } 5598 + *nextPtr = next; 5599 + return XML_ERROR_NONE; 5600 + case XML_TOK_NONE: 5601 + *nextPtr = s; 5602 + return XML_ERROR_NONE; 5603 + case XML_TOK_PROLOG_S: 5604 + if (parser->m_defaultHandler) reportDefault(parser, parser->m_encoding, s, next); 5605 + break; 5606 + case XML_TOK_PI: 5607 + if (!reportProcessingInstruction(parser, parser->m_encoding, s, next)) return XML_ERROR_NO_MEMORY; 5608 + break; 5609 + case XML_TOK_COMMENT: 5610 + if (!reportComment(parser, parser->m_encoding, s, next)) return XML_ERROR_NO_MEMORY; 5611 + break; 5612 + case XML_TOK_INVALID: 5613 + parser->m_eventPtr = next; 5614 + return XML_ERROR_INVALID_TOKEN; 5615 + case XML_TOK_PARTIAL: 5616 + if (!parser->m_parsingStatus.finalBuffer) { 5617 + *nextPtr = s; 5618 + return XML_ERROR_NONE; 5619 + } 5620 + return XML_ERROR_UNCLOSED_TOKEN; 5621 + case XML_TOK_PARTIAL_CHAR: 5622 + if (!parser->m_parsingStatus.finalBuffer) { 5623 + *nextPtr = s; 5624 + return XML_ERROR_NONE; 5625 + } 5626 + return XML_ERROR_PARTIAL_CHAR; 5627 + default: 5628 + return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; 5629 + } 5630 + switch (parser->m_parsingStatus.parsing) { 5631 + case XML_SUSPENDED: 5632 + parser->m_eventPtr = next; 5633 + *nextPtr = next; 5634 + return XML_ERROR_NONE; 5635 + case XML_FINISHED: 5636 + parser->m_eventPtr = next; 5637 + return XML_ERROR_ABORTED; 5638 + case XML_PARSING: 5639 + if (parser->m_reenter) { 5640 + return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE 5641 + } 5642 + /* Fall through */ 5643 + default:; 5644 + parser->m_eventPtr = s = next; 5645 + } 5646 + } 5647 + } 5648 + 5649 + static enum XML_Error processEntity(XML_Parser parser, ENTITY* entity, XML_Bool betweenDecl, enum EntityType type) { 5650 + OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList; 5651 + switch (type) { 5652 + case ENTITY_INTERNAL: 5653 + parser->m_processor = internalEntityProcessor; 5654 + openEntityList = &parser->m_openInternalEntities; 5655 + freeEntityList = &parser->m_freeInternalEntities; 5656 + break; 5657 + case ENTITY_ATTRIBUTE: 5658 + openEntityList = &parser->m_openAttributeEntities; 5659 + freeEntityList = &parser->m_freeAttributeEntities; 5660 + break; 5661 + case ENTITY_VALUE: 5662 + openEntityList = &parser->m_openValueEntities; 5663 + freeEntityList = &parser->m_freeValueEntities; 5664 + break; 5665 + /* default case serves merely as a safety net in case of a 5666 + * wrong entityType. Therefore we exclude the following lines 5667 + * from the test coverage. 5668 + * 5669 + * LCOV_EXCL_START 5670 + */ 5671 + default: 5672 + // Should not reach here 5673 + assert(0); 5674 + /* LCOV_EXCL_STOP */ 5675 + } 5676 + 5677 + if (*freeEntityList) { 5678 + openEntity = *freeEntityList; 5679 + *freeEntityList = openEntity->next; 5680 + } else { 5681 + openEntity = MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); 5682 + if (!openEntity) return XML_ERROR_NO_MEMORY; 5683 + } 5684 + entity->open = XML_TRUE; 5685 + entity->hasMore = XML_TRUE; 5686 + #if XML_GE == 1 5687 + entityTrackingOnOpen(parser, entity, __LINE__); 5688 + #endif 5689 + entity->processed = 0; 5690 + openEntity->next = *openEntityList; 5691 + *openEntityList = openEntity; 5692 + openEntity->entity = entity; 5693 + openEntity->type = type; 5694 + openEntity->startTagLevel = parser->m_tagLevel; 5695 + openEntity->betweenDecl = betweenDecl; 5696 + openEntity->internalEventPtr = NULL; 5697 + openEntity->internalEventEndPtr = NULL; 5698 + 5699 + // Only internal entities make use of the reenter flag 5700 + // therefore no need to set it for other entity types 5701 + if (type == ENTITY_INTERNAL) { 5702 + triggerReenter(parser); 5703 + } 5704 + return XML_ERROR_NONE; 5705 + } 5706 + 5707 + static enum XML_Error PTRCALL internalEntityProcessor(XML_Parser parser, const char* s, const char* end, 5708 + const char** nextPtr) { 5709 + UNUSED_P(s); 5710 + UNUSED_P(end); 5711 + UNUSED_P(nextPtr); 5712 + ENTITY* entity; 5713 + const char *textStart, *textEnd; 5714 + const char* next; 5715 + enum XML_Error result; 5716 + OPEN_INTERNAL_ENTITY* openEntity = parser->m_openInternalEntities; 5717 + if (!openEntity) return XML_ERROR_UNEXPECTED_STATE; 5718 + 5719 + entity = openEntity->entity; 5720 + 5721 + // This will return early 5722 + if (entity->hasMore) { 5723 + textStart = ((const char*)entity->textPtr) + entity->processed; 5724 + textEnd = (const char*)(entity->textPtr + entity->textLen); 5725 + /* Set a safe default value in case 'next' does not get set */ 5726 + next = textStart; 5727 + 5728 + if (entity->is_param) { 5729 + int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); 5730 + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok, next, &next, XML_FALSE, XML_FALSE, 5731 + XML_ACCOUNT_ENTITY_EXPANSION); 5732 + } else { 5733 + result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding, textStart, textEnd, &next, 5734 + XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); 5735 + } 5736 + 5737 + if (result != XML_ERROR_NONE) return result; 5738 + // Check if entity is complete, if not, mark down how much of it is 5739 + // processed 5740 + if (textEnd != next && (parser->m_parsingStatus.parsing == XML_SUSPENDED || 5741 + (parser->m_parsingStatus.parsing == XML_PARSING && parser->m_reenter))) { 5742 + entity->processed = (int)(next - (const char*)entity->textPtr); 5743 + return result; 5744 + } 5745 + 5746 + // Entity is complete. We cannot close it here since we need to first 5747 + // process its possible inner entities (which are added to the 5748 + // m_openInternalEntities during doProlog or doContent calls above) 5749 + entity->hasMore = XML_FALSE; 5750 + if (!entity->is_param && (openEntity->startTagLevel != parser->m_tagLevel)) { 5751 + return XML_ERROR_ASYNC_ENTITY; 5752 + } 5753 + triggerReenter(parser); 5754 + return result; 5755 + } // End of entity processing, "if" block will return here 5756 + 5757 + // Remove fully processed openEntity from open entity list. 5758 + #if XML_GE == 1 5759 + entityTrackingOnClose(parser, entity, __LINE__); 5760 + #endif 5761 + // openEntity is m_openInternalEntities' head, as we set it at the start of 5762 + // this function and we skipped doProlog and doContent calls with hasMore set 5763 + // to false. This means we can directly remove the head of 5764 + // m_openInternalEntities 5765 + assert(parser->m_openInternalEntities == openEntity); 5766 + entity->open = XML_FALSE; 5767 + parser->m_openInternalEntities = parser->m_openInternalEntities->next; 5768 + 5769 + /* put openEntity back in list of free instances */ 5770 + openEntity->next = parser->m_freeInternalEntities; 5771 + parser->m_freeInternalEntities = openEntity; 5772 + 5773 + if (parser->m_openInternalEntities == NULL) { 5774 + parser->m_processor = entity->is_param ? prologProcessor : contentProcessor; 5775 + } 5776 + triggerReenter(parser); 5777 + return XML_ERROR_NONE; 5778 + } 5779 + 5780 + static enum XML_Error PTRCALL errorProcessor(XML_Parser parser, const char* s, const char* end, const char** nextPtr) { 5781 + UNUSED_P(s); 5782 + UNUSED_P(end); 5783 + UNUSED_P(nextPtr); 5784 + return parser->m_errorCode; 5785 + } 5786 + 5787 + static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING* enc, XML_Bool isCdata, const char* ptr, 5788 + const char* end, STRING_POOL* pool, enum XML_Account account) { 5789 + const char* next = ptr; 5790 + enum XML_Error result = XML_ERROR_NONE; 5791 + 5792 + while (1) { 5793 + if (!parser->m_openAttributeEntities) { 5794 + result = appendAttributeValue(parser, enc, isCdata, next, end, pool, account, &next); 5795 + } else { 5796 + OPEN_INTERNAL_ENTITY* const openEntity = parser->m_openAttributeEntities; 5797 + if (!openEntity) return XML_ERROR_UNEXPECTED_STATE; 5798 + 5799 + ENTITY* const entity = openEntity->entity; 5800 + const char* const textStart = ((const char*)entity->textPtr) + entity->processed; 5801 + const char* const textEnd = (const char*)(entity->textPtr + entity->textLen); 5802 + /* Set a safe default value in case 'next' does not get set */ 5803 + const char* nextInEntity = textStart; 5804 + if (entity->hasMore) { 5805 + result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata, textStart, textEnd, pool, 5806 + XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity); 5807 + if (result != XML_ERROR_NONE) break; 5808 + // Check if entity is complete, if not, mark down how much of it is 5809 + // processed. A XML_SUSPENDED check here is not required as 5810 + // appendAttributeValue will never suspend the parser. 5811 + if (textEnd != nextInEntity) { 5812 + entity->processed = (int)(nextInEntity - (const char*)entity->textPtr); 5813 + continue; 5814 + } 5815 + 5816 + // Entity is complete. We cannot close it here since we need to first 5817 + // process its possible inner entities (which are added to the 5818 + // m_openAttributeEntities during appendAttributeValue) 5819 + entity->hasMore = XML_FALSE; 5820 + continue; 5821 + } // End of entity processing, "if" block skips the rest 5822 + 5823 + // Remove fully processed openEntity from open entity list. 5824 + #if XML_GE == 1 5825 + entityTrackingOnClose(parser, entity, __LINE__); 5826 + #endif 5827 + // openEntity is m_openAttributeEntities' head, since we set it at the 5828 + // start of this function and because we skipped appendAttributeValue call 5829 + // with hasMore set to false. This means we can directly remove the head 5830 + // of m_openAttributeEntities 5831 + assert(parser->m_openAttributeEntities == openEntity); 5832 + entity->open = XML_FALSE; 5833 + parser->m_openAttributeEntities = parser->m_openAttributeEntities->next; 5834 + 5835 + /* put openEntity back in list of free instances */ 5836 + openEntity->next = parser->m_freeAttributeEntities; 5837 + parser->m_freeAttributeEntities = openEntity; 5838 + } 5839 + 5840 + // Break if an error occurred or there is nothing left to process 5841 + if (result || (parser->m_openAttributeEntities == NULL && end == next)) { 5842 + break; 5843 + } 5844 + } 5845 + 5846 + if (result) return result; 5847 + if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) poolChop(pool); 5848 + if (!poolAppendChar(pool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; 5849 + return XML_ERROR_NONE; 5850 + } 5851 + 5852 + static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING* enc, XML_Bool isCdata, const char* ptr, 5853 + const char* end, STRING_POOL* pool, enum XML_Account account, 5854 + const char** nextPtr) { 5855 + DTD* const dtd = parser->m_dtd; /* save one level of indirection */ 5856 + #ifndef XML_DTD 5857 + UNUSED_P(account); 5858 + #endif 5859 + 5860 + for (;;) { 5861 + const char* next = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ 5862 + int tok = XmlAttributeValueTok(enc, ptr, end, &next); 5863 + #if XML_GE == 1 5864 + if (!accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { 5865 + accountingOnAbort(parser); 5866 + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5867 + } 5868 + #endif 5869 + switch (tok) { 5870 + case XML_TOK_NONE: 5871 + if (nextPtr) { 5872 + *nextPtr = next; 5873 + } 5874 + return XML_ERROR_NONE; 5875 + case XML_TOK_INVALID: 5876 + if (enc == parser->m_encoding) parser->m_eventPtr = next; 5877 + return XML_ERROR_INVALID_TOKEN; 5878 + case XML_TOK_PARTIAL: 5879 + if (enc == parser->m_encoding) parser->m_eventPtr = ptr; 5880 + return XML_ERROR_INVALID_TOKEN; 5881 + case XML_TOK_CHAR_REF: { 5882 + XML_Char buf[XML_ENCODE_MAX]; 5883 + int i; 5884 + int n = XmlCharRefNumber(enc, ptr); 5885 + if (n < 0) { 5886 + if (enc == parser->m_encoding) parser->m_eventPtr = ptr; 5887 + return XML_ERROR_BAD_CHAR_REF; 5888 + } 5889 + if (!isCdata && n == 0x20 /* space */ 5890 + && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 5891 + break; 5892 + n = XmlEncode(n, (ICHAR*)buf); 5893 + /* The XmlEncode() functions can never return 0 here. That 5894 + * error return happens if the code point passed in is either 5895 + * negative or greater than or equal to 0x110000. The 5896 + * XmlCharRefNumber() functions will all return a number 5897 + * strictly less than 0x110000 or a negative value if an error 5898 + * occurred. The negative value is intercepted above, so 5899 + * XmlEncode() is never passed a value it might return an 5900 + * error for. 5901 + */ 5902 + for (i = 0; i < n; i++) { 5903 + if (!poolAppendChar(pool, buf[i])) return XML_ERROR_NO_MEMORY; 5904 + } 5905 + } break; 5906 + case XML_TOK_DATA_CHARS: 5907 + if (!poolAppend(pool, enc, ptr, next)) return XML_ERROR_NO_MEMORY; 5908 + break; 5909 + case XML_TOK_TRAILING_CR: 5910 + next = ptr + enc->minBytesPerChar; 5911 + /* fall through */ 5912 + case XML_TOK_ATTRIBUTE_VALUE_S: 5913 + case XML_TOK_DATA_NEWLINE: 5914 + if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; 5915 + if (!poolAppendChar(pool, 0x20)) return XML_ERROR_NO_MEMORY; 5916 + break; 5917 + case XML_TOK_ENTITY_REF: { 5918 + const XML_Char* name; 5919 + ENTITY* entity; 5920 + bool checkEntityDecl; 5921 + XML_Char ch = (XML_Char)XmlPredefinedEntityName(enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); 5922 + if (ch) { 5923 + #if XML_GE == 1 5924 + /* NOTE: We are replacing 4-6 characters original input for 1 character 5925 + * so there is no amplification and hence recording without 5926 + * protection. */ 5927 + accountingDiffTolerated(parser, tok, (char*)&ch, ((char*)&ch) + sizeof(XML_Char), __LINE__, 5928 + XML_ACCOUNT_ENTITY_EXPANSION); 5929 + #endif /* XML_GE == 1 */ 5930 + if (!poolAppendChar(pool, ch)) return XML_ERROR_NO_MEMORY; 5931 + break; 5932 + } 5933 + name = poolStoreString(&parser->m_temp2Pool, enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); 5934 + if (!name) return XML_ERROR_NO_MEMORY; 5935 + entity = (ENTITY*)lookup(parser, &dtd->generalEntities, name, 0); 5936 + poolDiscard(&parser->m_temp2Pool); 5937 + /* First, determine if a check for an existing declaration is needed; 5938 + if yes, check that the entity exists, and that it is internal. 5939 + */ 5940 + if (pool == &dtd->pool) /* are we called from prolog? */ 5941 + checkEntityDecl = 5942 + #ifdef XML_DTD 5943 + parser->m_prologState.documentEntity && 5944 + #endif /* XML_DTD */ 5945 + (dtd->standalone ? !parser->m_openInternalEntities : !dtd->hasParamEntityRefs); 5946 + else /* if (pool == &parser->m_tempPool): we are called from content */ 5947 + checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone; 5948 + if (checkEntityDecl) { 5949 + if (!entity) 5950 + return XML_ERROR_UNDEFINED_ENTITY; 5951 + else if (!entity->is_internal) 5952 + return XML_ERROR_ENTITY_DECLARED_IN_PE; 5953 + } else if (!entity) { 5954 + /* Cannot report skipped entity here - see comments on 5955 + parser->m_skippedEntityHandler. 5956 + if (parser->m_skippedEntityHandler) 5957 + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 5958 + */ 5959 + /* Cannot call the default handler because this would be 5960 + out of sync with the call to the startElementHandler. 5961 + if ((pool == &parser->m_tempPool) && parser->m_defaultHandler) 5962 + reportDefault(parser, enc, ptr, next); 5963 + */ 5964 + break; 5965 + } 5966 + if (entity->open) { 5967 + if (enc == parser->m_encoding) { 5968 + /* It does not appear that this line can be executed. 5969 + * 5970 + * The "if (entity->open)" check catches recursive entity 5971 + * definitions. In order to be called with an open 5972 + * entity, it must have gone through this code before and 5973 + * been through the recursive call to 5974 + * appendAttributeValue() some lines below. That call 5975 + * sets the local encoding ("enc") to the parser's 5976 + * internal encoding (internal_utf8 or internal_utf16), 5977 + * which can never be the same as the principle encoding. 5978 + * It doesn't appear there is another code path that gets 5979 + * here with entity->open being TRUE. 5980 + * 5981 + * Since it is not certain that this logic is watertight, 5982 + * we keep the line and merely exclude it from coverage 5983 + * tests. 5984 + */ 5985 + parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */ 5986 + } 5987 + return XML_ERROR_RECURSIVE_ENTITY_REF; 5988 + } 5989 + if (entity->notation) { 5990 + if (enc == parser->m_encoding) parser->m_eventPtr = ptr; 5991 + return XML_ERROR_BINARY_ENTITY_REF; 5992 + } 5993 + if (!entity->textPtr) { 5994 + if (enc == parser->m_encoding) parser->m_eventPtr = ptr; 5995 + return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; 5996 + } else { 5997 + enum XML_Error result; 5998 + result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE); 5999 + if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) { 6000 + *nextPtr = next; 6001 + } 6002 + return result; 6003 + } 6004 + } break; 6005 + default: 6006 + /* The only token returned by XmlAttributeValueTok() that does 6007 + * not have an explicit case here is XML_TOK_PARTIAL_CHAR. 6008 + * Getting that would require an entity name to contain an 6009 + * incomplete XML character (e.g. \xE2\x82); however previous 6010 + * tokenisers will have already recognised and rejected such 6011 + * names before XmlAttributeValueTok() gets a look-in. This 6012 + * default case should be retained as a safety net, but the code 6013 + * excluded from coverage tests. 6014 + * 6015 + * LCOV_EXCL_START 6016 + */ 6017 + if (enc == parser->m_encoding) parser->m_eventPtr = ptr; 6018 + return XML_ERROR_UNEXPECTED_STATE; 6019 + /* LCOV_EXCL_STOP */ 6020 + } 6021 + ptr = next; 6022 + } 6023 + /* not reached */ 6024 + } 6025 + 6026 + #if XML_GE == 1 6027 + static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING* enc, const char* entityTextPtr, 6028 + const char* entityTextEnd, enum XML_Account account, const char** nextPtr) { 6029 + DTD* const dtd = parser->m_dtd; /* save one level of indirection */ 6030 + STRING_POOL* pool = &(dtd->entityValuePool); 6031 + enum XML_Error result = XML_ERROR_NONE; 6032 + #ifdef XML_DTD 6033 + int oldInEntityValue = parser->m_prologState.inEntityValue; 6034 + parser->m_prologState.inEntityValue = 1; 6035 + #else 6036 + UNUSED_P(account); 6037 + #endif /* XML_DTD */ 6038 + /* never return Null for the value argument in EntityDeclHandler, 6039 + since this would indicate an external entity; therefore we 6040 + have to make sure that entityValuePool.start is not null */ 6041 + if (!pool->blocks) { 6042 + if (!poolGrow(pool)) return XML_ERROR_NO_MEMORY; 6043 + } 6044 + 6045 + const char* next; 6046 + for (;;) { 6047 + next = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ 6048 + int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); 6049 + 6050 + if (!accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, account)) { 6051 + accountingOnAbort(parser); 6052 + result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6053 + goto endEntityValue; 6054 + } 6055 + 6056 + switch (tok) { 6057 + case XML_TOK_PARAM_ENTITY_REF: 6058 + #ifdef XML_DTD 6059 + if (parser->m_isParamEntity || enc != parser->m_encoding) { 6060 + const XML_Char* name; 6061 + ENTITY* entity; 6062 + name = poolStoreString(&parser->m_tempPool, enc, entityTextPtr + enc->minBytesPerChar, 6063 + next - enc->minBytesPerChar); 6064 + if (!name) { 6065 + result = XML_ERROR_NO_MEMORY; 6066 + goto endEntityValue; 6067 + } 6068 + entity = (ENTITY*)lookup(parser, &dtd->paramEntities, name, 0); 6069 + poolDiscard(&parser->m_tempPool); 6070 + if (!entity) { 6071 + /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ 6072 + /* cannot report skipped entity here - see comments on 6073 + parser->m_skippedEntityHandler 6074 + if (parser->m_skippedEntityHandler) 6075 + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6076 + */ 6077 + dtd->keepProcessing = dtd->standalone; 6078 + goto endEntityValue; 6079 + } 6080 + if (entity->open || (entity == parser->m_declEntity)) { 6081 + if (enc == parser->m_encoding) parser->m_eventPtr = entityTextPtr; 6082 + result = XML_ERROR_RECURSIVE_ENTITY_REF; 6083 + goto endEntityValue; 6084 + } 6085 + if (entity->systemId) { 6086 + if (parser->m_externalEntityRefHandler) { 6087 + dtd->paramEntityRead = XML_FALSE; 6088 + entity->open = XML_TRUE; 6089 + entityTrackingOnOpen(parser, entity, __LINE__); 6090 + if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg, 0, entity->base, 6091 + entity->systemId, entity->publicId)) { 6092 + entityTrackingOnClose(parser, entity, __LINE__); 6093 + entity->open = XML_FALSE; 6094 + result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; 6095 + goto endEntityValue; 6096 + } 6097 + entityTrackingOnClose(parser, entity, __LINE__); 6098 + entity->open = XML_FALSE; 6099 + if (!dtd->paramEntityRead) dtd->keepProcessing = dtd->standalone; 6100 + } else 6101 + dtd->keepProcessing = dtd->standalone; 6102 + } else { 6103 + result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE); 6104 + goto endEntityValue; 6105 + } 6106 + break; 6107 + } 6108 + #endif /* XML_DTD */ 6109 + /* In the internal subset, PE references are not legal 6110 + within markup declarations, e.g entity values in this case. */ 6111 + parser->m_eventPtr = entityTextPtr; 6112 + result = XML_ERROR_PARAM_ENTITY_REF; 6113 + goto endEntityValue; 6114 + case XML_TOK_NONE: 6115 + result = XML_ERROR_NONE; 6116 + goto endEntityValue; 6117 + case XML_TOK_ENTITY_REF: 6118 + case XML_TOK_DATA_CHARS: 6119 + if (!poolAppend(pool, enc, entityTextPtr, next)) { 6120 + result = XML_ERROR_NO_MEMORY; 6121 + goto endEntityValue; 6122 + } 6123 + break; 6124 + case XML_TOK_TRAILING_CR: 6125 + next = entityTextPtr + enc->minBytesPerChar; 6126 + /* fall through */ 6127 + case XML_TOK_DATA_NEWLINE: 6128 + if (pool->end == pool->ptr && !poolGrow(pool)) { 6129 + result = XML_ERROR_NO_MEMORY; 6130 + goto endEntityValue; 6131 + } 6132 + *(pool->ptr)++ = 0xA; 6133 + break; 6134 + case XML_TOK_CHAR_REF: { 6135 + XML_Char buf[XML_ENCODE_MAX]; 6136 + int i; 6137 + int n = XmlCharRefNumber(enc, entityTextPtr); 6138 + if (n < 0) { 6139 + if (enc == parser->m_encoding) parser->m_eventPtr = entityTextPtr; 6140 + result = XML_ERROR_BAD_CHAR_REF; 6141 + goto endEntityValue; 6142 + } 6143 + n = XmlEncode(n, (ICHAR*)buf); 6144 + /* The XmlEncode() functions can never return 0 here. That 6145 + * error return happens if the code point passed in is either 6146 + * negative or greater than or equal to 0x110000. The 6147 + * XmlCharRefNumber() functions will all return a number 6148 + * strictly less than 0x110000 or a negative value if an error 6149 + * occurred. The negative value is intercepted above, so 6150 + * XmlEncode() is never passed a value it might return an 6151 + * error for. 6152 + */ 6153 + for (i = 0; i < n; i++) { 6154 + if (pool->end == pool->ptr && !poolGrow(pool)) { 6155 + result = XML_ERROR_NO_MEMORY; 6156 + goto endEntityValue; 6157 + } 6158 + *(pool->ptr)++ = buf[i]; 6159 + } 6160 + } break; 6161 + case XML_TOK_PARTIAL: 6162 + if (enc == parser->m_encoding) parser->m_eventPtr = entityTextPtr; 6163 + result = XML_ERROR_INVALID_TOKEN; 6164 + goto endEntityValue; 6165 + case XML_TOK_INVALID: 6166 + if (enc == parser->m_encoding) parser->m_eventPtr = next; 6167 + result = XML_ERROR_INVALID_TOKEN; 6168 + goto endEntityValue; 6169 + default: 6170 + /* This default case should be unnecessary -- all the tokens 6171 + * that XmlEntityValueTok() can return have their own explicit 6172 + * cases -- but should be retained for safety. We do however 6173 + * exclude it from the coverage statistics. 6174 + * 6175 + * LCOV_EXCL_START 6176 + */ 6177 + if (enc == parser->m_encoding) parser->m_eventPtr = entityTextPtr; 6178 + result = XML_ERROR_UNEXPECTED_STATE; 6179 + goto endEntityValue; 6180 + /* LCOV_EXCL_STOP */ 6181 + } 6182 + entityTextPtr = next; 6183 + } 6184 + endEntityValue: 6185 + #ifdef XML_DTD 6186 + parser->m_prologState.inEntityValue = oldInEntityValue; 6187 + #endif /* XML_DTD */ 6188 + // If 'nextPtr' is given, it should be updated during the processing 6189 + if (nextPtr != NULL) { 6190 + *nextPtr = next; 6191 + } 6192 + return result; 6193 + } 6194 + 6195 + static enum XML_Error callStoreEntityValue(XML_Parser parser, const ENCODING* enc, const char* entityTextPtr, 6196 + const char* entityTextEnd, enum XML_Account account) { 6197 + const char* next = entityTextPtr; 6198 + enum XML_Error result = XML_ERROR_NONE; 6199 + while (1) { 6200 + if (!parser->m_openValueEntities) { 6201 + result = storeEntityValue(parser, enc, next, entityTextEnd, account, &next); 6202 + } else { 6203 + OPEN_INTERNAL_ENTITY* const openEntity = parser->m_openValueEntities; 6204 + if (!openEntity) return XML_ERROR_UNEXPECTED_STATE; 6205 + 6206 + ENTITY* const entity = openEntity->entity; 6207 + const char* const textStart = ((const char*)entity->textPtr) + entity->processed; 6208 + const char* const textEnd = (const char*)(entity->textPtr + entity->textLen); 6209 + /* Set a safe default value in case 'next' does not get set */ 6210 + const char* nextInEntity = textStart; 6211 + if (entity->hasMore) { 6212 + result = storeEntityValue(parser, parser->m_internalEncoding, textStart, textEnd, XML_ACCOUNT_ENTITY_EXPANSION, 6213 + &nextInEntity); 6214 + if (result != XML_ERROR_NONE) break; 6215 + // Check if entity is complete, if not, mark down how much of it is 6216 + // processed. A XML_SUSPENDED check here is not required as 6217 + // appendAttributeValue will never suspend the parser. 6218 + if (textEnd != nextInEntity) { 6219 + entity->processed = (int)(nextInEntity - (const char*)entity->textPtr); 6220 + continue; 6221 + } 6222 + 6223 + // Entity is complete. We cannot close it here since we need to first 6224 + // process its possible inner entities (which are added to the 6225 + // m_openValueEntities during storeEntityValue) 6226 + entity->hasMore = XML_FALSE; 6227 + continue; 6228 + } // End of entity processing, "if" block skips the rest 6229 + 6230 + // Remove fully processed openEntity from open entity list. 6231 + #if XML_GE == 1 6232 + entityTrackingOnClose(parser, entity, __LINE__); 6233 + #endif 6234 + // openEntity is m_openValueEntities' head, since we set it at the 6235 + // start of this function and because we skipped storeEntityValue call 6236 + // with hasMore set to false. This means we can directly remove the head 6237 + // of m_openValueEntities 6238 + assert(parser->m_openValueEntities == openEntity); 6239 + entity->open = XML_FALSE; 6240 + parser->m_openValueEntities = parser->m_openValueEntities->next; 6241 + 6242 + /* put openEntity back in list of free instances */ 6243 + openEntity->next = parser->m_freeValueEntities; 6244 + parser->m_freeValueEntities = openEntity; 6245 + } 6246 + 6247 + // Break if an error occurred or there is nothing left to process 6248 + if (result || (parser->m_openValueEntities == NULL && entityTextEnd == next)) { 6249 + break; 6250 + } 6251 + } 6252 + 6253 + return result; 6254 + } 6255 + 6256 + #else /* XML_GE == 0 */ 6257 + 6258 + static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY* entity) { 6259 + // This will store "&amp;entity123;" in entity->textPtr 6260 + // to end up as "&entity123;" in the handler. 6261 + const char* const entity_start = "&amp;"; 6262 + const char* const entity_end = ";"; 6263 + 6264 + STRING_POOL* const pool = &(parser->m_dtd->entityValuePool); 6265 + if (!poolAppendString(pool, entity_start) || !poolAppendString(pool, entity->name) || 6266 + !poolAppendString(pool, entity_end)) { 6267 + poolDiscard(pool); 6268 + return XML_ERROR_NO_MEMORY; 6269 + } 6270 + 6271 + entity->textPtr = poolStart(pool); 6272 + entity->textLen = (int)(poolLength(pool)); 6273 + poolFinish(pool); 6274 + 6275 + return XML_ERROR_NONE; 6276 + } 6277 + 6278 + #endif /* XML_GE == 0 */ 6279 + 6280 + static void FASTCALL normalizeLines(XML_Char* s) { 6281 + XML_Char* p; 6282 + for (;; s++) { 6283 + if (*s == XML_T('\0')) return; 6284 + if (*s == 0xD) break; 6285 + } 6286 + p = s; 6287 + do { 6288 + if (*s == 0xD) { 6289 + *p++ = 0xA; 6290 + if (*++s == 0xA) s++; 6291 + } else 6292 + *p++ = *s++; 6293 + } while (*s); 6294 + *p = XML_T('\0'); 6295 + } 6296 + 6297 + static int reportProcessingInstruction(XML_Parser parser, const ENCODING* enc, const char* start, const char* end) { 6298 + const XML_Char* target; 6299 + XML_Char* data; 6300 + const char* tem; 6301 + if (!parser->m_processingInstructionHandler) { 6302 + if (parser->m_defaultHandler) reportDefault(parser, enc, start, end); 6303 + return 1; 6304 + } 6305 + start += enc->minBytesPerChar * 2; 6306 + tem = start + XmlNameLength(enc, start); 6307 + target = poolStoreString(&parser->m_tempPool, enc, start, tem); 6308 + if (!target) return 0; 6309 + poolFinish(&parser->m_tempPool); 6310 + data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem), end - enc->minBytesPerChar * 2); 6311 + if (!data) return 0; 6312 + normalizeLines(data); 6313 + parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); 6314 + poolClear(&parser->m_tempPool); 6315 + return 1; 6316 + } 6317 + 6318 + static int reportComment(XML_Parser parser, const ENCODING* enc, const char* start, const char* end) { 6319 + XML_Char* data; 6320 + if (!parser->m_commentHandler) { 6321 + if (parser->m_defaultHandler) reportDefault(parser, enc, start, end); 6322 + return 1; 6323 + } 6324 + data = poolStoreString(&parser->m_tempPool, enc, start + enc->minBytesPerChar * 4, end - enc->minBytesPerChar * 3); 6325 + if (!data) return 0; 6326 + normalizeLines(data); 6327 + parser->m_commentHandler(parser->m_handlerArg, data); 6328 + poolClear(&parser->m_tempPool); 6329 + return 1; 6330 + } 6331 + 6332 + static void reportDefault(XML_Parser parser, const ENCODING* enc, const char* s, const char* end) { 6333 + if (MUST_CONVERT(enc, s)) { 6334 + enum XML_Convert_Result convert_res; 6335 + const char** eventPP; 6336 + const char** eventEndPP; 6337 + if (enc == parser->m_encoding) { 6338 + eventPP = &parser->m_eventPtr; 6339 + eventEndPP = &parser->m_eventEndPtr; 6340 + } else { 6341 + /* To get here, two things must be true; the parser must be 6342 + * using a character encoding that is not the same as the 6343 + * encoding passed in, and the encoding passed in must need 6344 + * conversion to the internal format (UTF-8 unless XML_UNICODE 6345 + * is defined). The only occasions on which the encoding passed 6346 + * in is not the same as the parser's encoding are when it is 6347 + * the internal encoding (e.g. a previously defined parameter 6348 + * entity, already converted to internal format). This by 6349 + * definition doesn't need conversion, so the whole branch never 6350 + * gets executed. 6351 + * 6352 + * For safety's sake we don't delete these lines and merely 6353 + * exclude them from coverage statistics. 6354 + * 6355 + * LCOV_EXCL_START 6356 + */ 6357 + eventPP = &(parser->m_openInternalEntities->internalEventPtr); 6358 + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 6359 + /* LCOV_EXCL_STOP */ 6360 + } 6361 + do { 6362 + ICHAR* dataPtr = (ICHAR*)parser->m_dataBuf; 6363 + convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR*)parser->m_dataBufEnd); 6364 + *eventEndPP = s; 6365 + parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR*)parser->m_dataBuf)); 6366 + *eventPP = s; 6367 + } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); 6368 + } else 6369 + parser->m_defaultHandler(parser->m_handlerArg, (const XML_Char*)s, 6370 + (int)((const XML_Char*)end - (const XML_Char*)s)); 6371 + } 6372 + 6373 + static int defineAttribute(ELEMENT_TYPE* type, ATTRIBUTE_ID* attId, XML_Bool isCdata, XML_Bool isId, 6374 + const XML_Char* value, XML_Parser parser) { 6375 + DEFAULT_ATTRIBUTE* att; 6376 + if (value || isId) { 6377 + /* The handling of default attributes gets messed up if we have 6378 + a default which duplicates a non-default. */ 6379 + int i; 6380 + for (i = 0; i < type->nDefaultAtts; i++) 6381 + if (attId == type->defaultAtts[i].id) return 1; 6382 + if (isId && !type->idAtt && !attId->xmlns) type->idAtt = attId; 6383 + } 6384 + if (type->nDefaultAtts == type->allocDefaultAtts) { 6385 + if (type->allocDefaultAtts == 0) { 6386 + type->allocDefaultAtts = 8; 6387 + type->defaultAtts = MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 6388 + if (!type->defaultAtts) { 6389 + type->allocDefaultAtts = 0; 6390 + return 0; 6391 + } 6392 + } else { 6393 + DEFAULT_ATTRIBUTE* temp; 6394 + 6395 + /* Detect and prevent integer overflow */ 6396 + if (type->allocDefaultAtts > INT_MAX / 2) { 6397 + return 0; 6398 + } 6399 + 6400 + int count = type->allocDefaultAtts * 2; 6401 + 6402 + /* Detect and prevent integer overflow. 6403 + * The preprocessor guard addresses the "always false" warning 6404 + * from -Wtype-limits on platforms where 6405 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 6406 + #if UINT_MAX >= SIZE_MAX 6407 + if ((unsigned)count > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { 6408 + return 0; 6409 + } 6410 + #endif 6411 + 6412 + temp = REALLOC(parser, type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE))); 6413 + if (temp == NULL) return 0; 6414 + type->allocDefaultAtts = count; 6415 + type->defaultAtts = temp; 6416 + } 6417 + } 6418 + att = type->defaultAtts + type->nDefaultAtts; 6419 + att->id = attId; 6420 + att->value = value; 6421 + att->isCdata = isCdata; 6422 + if (!isCdata) attId->maybeTokenized = XML_TRUE; 6423 + type->nDefaultAtts += 1; 6424 + return 1; 6425 + } 6426 + 6427 + static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE* elementType) { 6428 + DTD* const dtd = parser->m_dtd; /* save one level of indirection */ 6429 + const XML_Char* name; 6430 + for (name = elementType->name; *name; name++) { 6431 + if (*name == XML_T(ASCII_COLON)) { 6432 + PREFIX* prefix; 6433 + const XML_Char* s; 6434 + for (s = elementType->name; s != name; s++) { 6435 + if (!poolAppendChar(&dtd->pool, *s)) return 0; 6436 + } 6437 + if (!poolAppendChar(&dtd->pool, XML_T('\0'))) return 0; 6438 + prefix = (PREFIX*)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); 6439 + if (!prefix) return 0; 6440 + if (prefix->name == poolStart(&dtd->pool)) 6441 + poolFinish(&dtd->pool); 6442 + else 6443 + poolDiscard(&dtd->pool); 6444 + elementType->prefix = prefix; 6445 + break; 6446 + } 6447 + } 6448 + return 1; 6449 + } 6450 + 6451 + static ATTRIBUTE_ID* getAttributeId(XML_Parser parser, const ENCODING* enc, const char* start, const char* end) { 6452 + DTD* const dtd = parser->m_dtd; /* save one level of indirection */ 6453 + ATTRIBUTE_ID* id; 6454 + const XML_Char* name; 6455 + if (!poolAppendChar(&dtd->pool, XML_T('\0'))) return NULL; 6456 + name = poolStoreString(&dtd->pool, enc, start, end); 6457 + if (!name) return NULL; 6458 + /* skip quotation mark - its storage will be reused (like in name[-1]) */ 6459 + ++name; 6460 + id = (ATTRIBUTE_ID*)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); 6461 + if (!id) return NULL; 6462 + if (id->name != name) 6463 + poolDiscard(&dtd->pool); 6464 + else { 6465 + poolFinish(&dtd->pool); 6466 + if (!parser->m_ns) 6467 + ; 6468 + else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m) && name[2] == XML_T(ASCII_l) && 6469 + name[3] == XML_T(ASCII_n) && name[4] == XML_T(ASCII_s) && 6470 + (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { 6471 + if (name[5] == XML_T('\0')) 6472 + id->prefix = &dtd->defaultPrefix; 6473 + else 6474 + id->prefix = (PREFIX*)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX)); 6475 + id->xmlns = XML_TRUE; 6476 + } else { 6477 + int i; 6478 + for (i = 0; name[i]; i++) { 6479 + /* attributes without prefix are *not* in the default namespace */ 6480 + if (name[i] == XML_T(ASCII_COLON)) { 6481 + int j; 6482 + for (j = 0; j < i; j++) { 6483 + if (!poolAppendChar(&dtd->pool, name[j])) return NULL; 6484 + } 6485 + if (!poolAppendChar(&dtd->pool, XML_T('\0'))) return NULL; 6486 + id->prefix = (PREFIX*)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); 6487 + if (!id->prefix) return NULL; 6488 + if (id->prefix->name == poolStart(&dtd->pool)) 6489 + poolFinish(&dtd->pool); 6490 + else 6491 + poolDiscard(&dtd->pool); 6492 + break; 6493 + } 6494 + } 6495 + } 6496 + } 6497 + return id; 6498 + } 6499 + 6500 + #define CONTEXT_SEP XML_T(ASCII_FF) 6501 + 6502 + static const XML_Char* getContext(XML_Parser parser) { 6503 + DTD* const dtd = parser->m_dtd; /* save one level of indirection */ 6504 + HASH_TABLE_ITER iter; 6505 + XML_Bool needSep = XML_FALSE; 6506 + 6507 + if (dtd->defaultPrefix.binding) { 6508 + int i; 6509 + int len; 6510 + if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) return NULL; 6511 + len = dtd->defaultPrefix.binding->uriLen; 6512 + if (parser->m_namespaceSeparator) len--; 6513 + for (i = 0; i < len; i++) { 6514 + if (!poolAppendChar(&parser->m_tempPool, dtd->defaultPrefix.binding->uri[i])) { 6515 + /* Because of memory caching, I don't believe this line can be 6516 + * executed. 6517 + * 6518 + * This is part of a loop copying the default prefix binding 6519 + * URI into the parser's temporary string pool. Previously, 6520 + * that URI was copied into the same string pool, with a 6521 + * terminating NUL character, as part of setContext(). When 6522 + * the pool was cleared, that leaves a block definitely big 6523 + * enough to hold the URI on the free block list of the pool. 6524 + * The URI copy in getContext() therefore cannot run out of 6525 + * memory. 6526 + * 6527 + * If the pool is used between the setContext() and 6528 + * getContext() calls, the worst it can do is leave a bigger 6529 + * block on the front of the free list. Given that this is 6530 + * all somewhat inobvious and program logic can be changed, we 6531 + * don't delete the line but we do exclude it from the test 6532 + * coverage statistics. 6533 + */ 6534 + return NULL; /* LCOV_EXCL_LINE */ 6535 + } 6536 + } 6537 + needSep = XML_TRUE; 6538 + } 6539 + 6540 + hashTableIterInit(&iter, &(dtd->prefixes)); 6541 + for (;;) { 6542 + int i; 6543 + int len; 6544 + const XML_Char* s; 6545 + PREFIX* prefix = (PREFIX*)hashTableIterNext(&iter); 6546 + if (!prefix) break; 6547 + if (!prefix->binding) { 6548 + /* This test appears to be (justifiable) paranoia. There does 6549 + * not seem to be a way of injecting a prefix without a binding 6550 + * that doesn't get errored long before this function is called. 6551 + * The test should remain for safety's sake, so we instead 6552 + * exclude the following line from the coverage statistics. 6553 + */ 6554 + continue; /* LCOV_EXCL_LINE */ 6555 + } 6556 + if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) return NULL; 6557 + for (s = prefix->name; *s; s++) 6558 + if (!poolAppendChar(&parser->m_tempPool, *s)) return NULL; 6559 + if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) return NULL; 6560 + len = prefix->binding->uriLen; 6561 + if (parser->m_namespaceSeparator) len--; 6562 + for (i = 0; i < len; i++) 6563 + if (!poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) return NULL; 6564 + needSep = XML_TRUE; 6565 + } 6566 + 6567 + hashTableIterInit(&iter, &(dtd->generalEntities)); 6568 + for (;;) { 6569 + const XML_Char* s; 6570 + ENTITY* e = (ENTITY*)hashTableIterNext(&iter); 6571 + if (!e) break; 6572 + if (!e->open) continue; 6573 + if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) return NULL; 6574 + for (s = e->name; *s; s++) 6575 + if (!poolAppendChar(&parser->m_tempPool, *s)) return 0; 6576 + needSep = XML_TRUE; 6577 + } 6578 + 6579 + if (!poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return NULL; 6580 + return parser->m_tempPool.start; 6581 + } 6582 + 6583 + static XML_Bool setContext(XML_Parser parser, const XML_Char* context) { 6584 + if (context == NULL) { 6585 + return XML_FALSE; 6586 + } 6587 + 6588 + DTD* const dtd = parser->m_dtd; /* save one level of indirection */ 6589 + const XML_Char* s = context; 6590 + 6591 + while (*context != XML_T('\0')) { 6592 + if (*s == CONTEXT_SEP || *s == XML_T('\0')) { 6593 + ENTITY* e; 6594 + if (!poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; 6595 + e = (ENTITY*)lookup(parser, &dtd->generalEntities, poolStart(&parser->m_tempPool), 0); 6596 + if (e) e->open = XML_TRUE; 6597 + if (*s != XML_T('\0')) s++; 6598 + context = s; 6599 + poolDiscard(&parser->m_tempPool); 6600 + } else if (*s == XML_T(ASCII_EQUALS)) { 6601 + PREFIX* prefix; 6602 + if (poolLength(&parser->m_tempPool) == 0) 6603 + prefix = &dtd->defaultPrefix; 6604 + else { 6605 + if (!poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; 6606 + prefix = (PREFIX*)lookup(parser, &dtd->prefixes, poolStart(&parser->m_tempPool), sizeof(PREFIX)); 6607 + if (!prefix) return XML_FALSE; 6608 + if (prefix->name == poolStart(&parser->m_tempPool)) { 6609 + prefix->name = poolCopyString(&dtd->pool, prefix->name); 6610 + if (!prefix->name) return XML_FALSE; 6611 + } 6612 + poolDiscard(&parser->m_tempPool); 6613 + } 6614 + for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++) 6615 + if (!poolAppendChar(&parser->m_tempPool, *context)) return XML_FALSE; 6616 + if (!poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; 6617 + if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool), &parser->m_inheritedBindings) != 6618 + XML_ERROR_NONE) 6619 + return XML_FALSE; 6620 + poolDiscard(&parser->m_tempPool); 6621 + if (*context != XML_T('\0')) ++context; 6622 + s = context; 6623 + } else { 6624 + if (!poolAppendChar(&parser->m_tempPool, *s)) return XML_FALSE; 6625 + s++; 6626 + } 6627 + } 6628 + return XML_TRUE; 6629 + } 6630 + 6631 + static void FASTCALL normalizePublicId(XML_Char* publicId) { 6632 + XML_Char* p = publicId; 6633 + XML_Char* s; 6634 + for (s = publicId; *s; s++) { 6635 + switch (*s) { 6636 + case 0x20: 6637 + case 0xD: 6638 + case 0xA: 6639 + if (p != publicId && p[-1] != 0x20) *p++ = 0x20; 6640 + break; 6641 + default: 6642 + *p++ = *s; 6643 + } 6644 + } 6645 + if (p != publicId && p[-1] == 0x20) --p; 6646 + *p = XML_T('\0'); 6647 + } 6648 + 6649 + static DTD* dtdCreate(XML_Parser parser) { 6650 + DTD* p = MALLOC(parser, sizeof(DTD)); 6651 + if (p == NULL) return p; 6652 + poolInit(&(p->pool), parser); 6653 + poolInit(&(p->entityValuePool), parser); 6654 + hashTableInit(&(p->generalEntities), parser); 6655 + hashTableInit(&(p->elementTypes), parser); 6656 + hashTableInit(&(p->attributeIds), parser); 6657 + hashTableInit(&(p->prefixes), parser); 6658 + #ifdef XML_DTD 6659 + p->paramEntityRead = XML_FALSE; 6660 + hashTableInit(&(p->paramEntities), parser); 6661 + #endif /* XML_DTD */ 6662 + p->defaultPrefix.name = NULL; 6663 + p->defaultPrefix.binding = NULL; 6664 + 6665 + p->in_eldecl = XML_FALSE; 6666 + p->scaffIndex = NULL; 6667 + p->scaffold = NULL; 6668 + p->scaffLevel = 0; 6669 + p->scaffSize = 0; 6670 + p->scaffCount = 0; 6671 + p->contentStringLen = 0; 6672 + 6673 + p->keepProcessing = XML_TRUE; 6674 + p->hasParamEntityRefs = XML_FALSE; 6675 + p->standalone = XML_FALSE; 6676 + return p; 6677 + } 6678 + 6679 + static void dtdReset(DTD* p, XML_Parser parser) { 6680 + HASH_TABLE_ITER iter; 6681 + hashTableIterInit(&iter, &(p->elementTypes)); 6682 + for (;;) { 6683 + ELEMENT_TYPE* e = (ELEMENT_TYPE*)hashTableIterNext(&iter); 6684 + if (!e) break; 6685 + if (e->allocDefaultAtts != 0) FREE(parser, e->defaultAtts); 6686 + } 6687 + hashTableClear(&(p->generalEntities)); 6688 + #ifdef XML_DTD 6689 + p->paramEntityRead = XML_FALSE; 6690 + hashTableClear(&(p->paramEntities)); 6691 + #endif /* XML_DTD */ 6692 + hashTableClear(&(p->elementTypes)); 6693 + hashTableClear(&(p->attributeIds)); 6694 + hashTableClear(&(p->prefixes)); 6695 + poolClear(&(p->pool)); 6696 + poolClear(&(p->entityValuePool)); 6697 + p->defaultPrefix.name = NULL; 6698 + p->defaultPrefix.binding = NULL; 6699 + 6700 + p->in_eldecl = XML_FALSE; 6701 + 6702 + FREE(parser, p->scaffIndex); 6703 + p->scaffIndex = NULL; 6704 + FREE(parser, p->scaffold); 6705 + p->scaffold = NULL; 6706 + 6707 + p->scaffLevel = 0; 6708 + p->scaffSize = 0; 6709 + p->scaffCount = 0; 6710 + p->contentStringLen = 0; 6711 + 6712 + p->keepProcessing = XML_TRUE; 6713 + p->hasParamEntityRefs = XML_FALSE; 6714 + p->standalone = XML_FALSE; 6715 + } 6716 + 6717 + static void dtdDestroy(DTD* p, XML_Bool isDocEntity, XML_Parser parser) { 6718 + HASH_TABLE_ITER iter; 6719 + hashTableIterInit(&iter, &(p->elementTypes)); 6720 + for (;;) { 6721 + ELEMENT_TYPE* e = (ELEMENT_TYPE*)hashTableIterNext(&iter); 6722 + if (!e) break; 6723 + if (e->allocDefaultAtts != 0) FREE(parser, e->defaultAtts); 6724 + } 6725 + hashTableDestroy(&(p->generalEntities)); 6726 + #ifdef XML_DTD 6727 + hashTableDestroy(&(p->paramEntities)); 6728 + #endif /* XML_DTD */ 6729 + hashTableDestroy(&(p->elementTypes)); 6730 + hashTableDestroy(&(p->attributeIds)); 6731 + hashTableDestroy(&(p->prefixes)); 6732 + poolDestroy(&(p->pool)); 6733 + poolDestroy(&(p->entityValuePool)); 6734 + if (isDocEntity) { 6735 + FREE(parser, p->scaffIndex); 6736 + FREE(parser, p->scaffold); 6737 + } 6738 + FREE(parser, p); 6739 + } 6740 + 6741 + /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. 6742 + The new DTD has already been initialized. 6743 + */ 6744 + static int dtdCopy(XML_Parser oldParser, DTD* newDtd, const DTD* oldDtd, XML_Parser parser) { 6745 + HASH_TABLE_ITER iter; 6746 + 6747 + /* Copy the prefix table. */ 6748 + 6749 + hashTableIterInit(&iter, &(oldDtd->prefixes)); 6750 + for (;;) { 6751 + const XML_Char* name; 6752 + const PREFIX* oldP = (PREFIX*)hashTableIterNext(&iter); 6753 + if (!oldP) break; 6754 + name = poolCopyString(&(newDtd->pool), oldP->name); 6755 + if (!name) return 0; 6756 + if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) return 0; 6757 + } 6758 + 6759 + hashTableIterInit(&iter, &(oldDtd->attributeIds)); 6760 + 6761 + /* Copy the attribute id table. */ 6762 + 6763 + for (;;) { 6764 + ATTRIBUTE_ID* newA; 6765 + const XML_Char* name; 6766 + const ATTRIBUTE_ID* oldA = (ATTRIBUTE_ID*)hashTableIterNext(&iter); 6767 + 6768 + if (!oldA) break; 6769 + /* Remember to allocate the scratch byte before the name. */ 6770 + if (!poolAppendChar(&(newDtd->pool), XML_T('\0'))) return 0; 6771 + name = poolCopyString(&(newDtd->pool), oldA->name); 6772 + if (!name) return 0; 6773 + ++name; 6774 + newA = (ATTRIBUTE_ID*)lookup(oldParser, &(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); 6775 + if (!newA) return 0; 6776 + newA->maybeTokenized = oldA->maybeTokenized; 6777 + if (oldA->prefix) { 6778 + newA->xmlns = oldA->xmlns; 6779 + if (oldA->prefix == &oldDtd->defaultPrefix) 6780 + newA->prefix = &newDtd->defaultPrefix; 6781 + else 6782 + newA->prefix = (PREFIX*)lookup(oldParser, &(newDtd->prefixes), oldA->prefix->name, 0); 6783 + } 6784 + } 6785 + 6786 + /* Copy the element type table. */ 6787 + 6788 + hashTableIterInit(&iter, &(oldDtd->elementTypes)); 6789 + 6790 + for (;;) { 6791 + int i; 6792 + ELEMENT_TYPE* newE; 6793 + const XML_Char* name; 6794 + const ELEMENT_TYPE* oldE = (ELEMENT_TYPE*)hashTableIterNext(&iter); 6795 + if (!oldE) break; 6796 + name = poolCopyString(&(newDtd->pool), oldE->name); 6797 + if (!name) return 0; 6798 + newE = (ELEMENT_TYPE*)lookup(oldParser, &(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); 6799 + if (!newE) return 0; 6800 + if (oldE->nDefaultAtts) { 6801 + /* Detect and prevent integer overflow. 6802 + * The preprocessor guard addresses the "always false" warning 6803 + * from -Wtype-limits on platforms where 6804 + * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ 6805 + #if UINT_MAX >= SIZE_MAX 6806 + if ((size_t)oldE->nDefaultAtts > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { 6807 + return 0; 6808 + } 6809 + #endif 6810 + newE->defaultAtts = MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 6811 + if (!newE->defaultAtts) { 6812 + return 0; 6813 + } 6814 + } 6815 + if (oldE->idAtt) newE->idAtt = (ATTRIBUTE_ID*)lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0); 6816 + newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; 6817 + if (oldE->prefix) newE->prefix = (PREFIX*)lookup(oldParser, &(newDtd->prefixes), oldE->prefix->name, 0); 6818 + for (i = 0; i < newE->nDefaultAtts; i++) { 6819 + newE->defaultAtts[i].id = 6820 + (ATTRIBUTE_ID*)lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); 6821 + newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; 6822 + if (oldE->defaultAtts[i].value) { 6823 + newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); 6824 + if (!newE->defaultAtts[i].value) return 0; 6825 + } else 6826 + newE->defaultAtts[i].value = NULL; 6827 + } 6828 + } 6829 + 6830 + /* Copy the entity tables. */ 6831 + if (!copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool), &(oldDtd->generalEntities))) return 0; 6832 + 6833 + #ifdef XML_DTD 6834 + if (!copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool), &(oldDtd->paramEntities))) return 0; 6835 + newDtd->paramEntityRead = oldDtd->paramEntityRead; 6836 + #endif /* XML_DTD */ 6837 + 6838 + newDtd->keepProcessing = oldDtd->keepProcessing; 6839 + newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs; 6840 + newDtd->standalone = oldDtd->standalone; 6841 + 6842 + /* Don't want deep copying for scaffolding */ 6843 + newDtd->in_eldecl = oldDtd->in_eldecl; 6844 + newDtd->scaffold = oldDtd->scaffold; 6845 + newDtd->contentStringLen = oldDtd->contentStringLen; 6846 + newDtd->scaffSize = oldDtd->scaffSize; 6847 + newDtd->scaffLevel = oldDtd->scaffLevel; 6848 + newDtd->scaffIndex = oldDtd->scaffIndex; 6849 + 6850 + return 1; 6851 + } /* End dtdCopy */ 6852 + 6853 + static int copyEntityTable(XML_Parser oldParser, HASH_TABLE* newTable, STRING_POOL* newPool, 6854 + const HASH_TABLE* oldTable) { 6855 + HASH_TABLE_ITER iter; 6856 + const XML_Char* cachedOldBase = NULL; 6857 + const XML_Char* cachedNewBase = NULL; 6858 + 6859 + hashTableIterInit(&iter, oldTable); 6860 + 6861 + for (;;) { 6862 + ENTITY* newE; 6863 + const XML_Char* name; 6864 + const ENTITY* oldE = (ENTITY*)hashTableIterNext(&iter); 6865 + if (!oldE) break; 6866 + name = poolCopyString(newPool, oldE->name); 6867 + if (!name) return 0; 6868 + newE = (ENTITY*)lookup(oldParser, newTable, name, sizeof(ENTITY)); 6869 + if (!newE) return 0; 6870 + if (oldE->systemId) { 6871 + const XML_Char* tem = poolCopyString(newPool, oldE->systemId); 6872 + if (!tem) return 0; 6873 + newE->systemId = tem; 6874 + if (oldE->base) { 6875 + if (oldE->base == cachedOldBase) 6876 + newE->base = cachedNewBase; 6877 + else { 6878 + cachedOldBase = oldE->base; 6879 + tem = poolCopyString(newPool, cachedOldBase); 6880 + if (!tem) return 0; 6881 + cachedNewBase = newE->base = tem; 6882 + } 6883 + } 6884 + if (oldE->publicId) { 6885 + tem = poolCopyString(newPool, oldE->publicId); 6886 + if (!tem) return 0; 6887 + newE->publicId = tem; 6888 + } 6889 + } else { 6890 + const XML_Char* tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); 6891 + if (!tem) return 0; 6892 + newE->textPtr = tem; 6893 + newE->textLen = oldE->textLen; 6894 + } 6895 + if (oldE->notation) { 6896 + const XML_Char* tem = poolCopyString(newPool, oldE->notation); 6897 + if (!tem) return 0; 6898 + newE->notation = tem; 6899 + } 6900 + newE->is_param = oldE->is_param; 6901 + newE->is_internal = oldE->is_internal; 6902 + } 6903 + return 1; 6904 + } 6905 + 6906 + #define INIT_POWER 6 6907 + 6908 + static XML_Bool FASTCALL keyeq(KEY s1, KEY s2) { 6909 + for (; *s1 == *s2; s1++, s2++) 6910 + if (*s1 == 0) return XML_TRUE; 6911 + return XML_FALSE; 6912 + } 6913 + 6914 + static size_t keylen(KEY s) { 6915 + size_t len = 0; 6916 + for (; *s; s++, len++); 6917 + return len; 6918 + } 6919 + 6920 + static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey* key) { 6921 + key->k[0] = 0; 6922 + key->k[1] = get_hash_secret_salt(parser); 6923 + } 6924 + 6925 + static unsigned long FASTCALL hash(XML_Parser parser, KEY s) { 6926 + struct siphash state; 6927 + struct sipkey key; 6928 + (void)sip24_valid; 6929 + copy_salt_to_sipkey(parser, &key); 6930 + sip24_init(&state, &key); 6931 + sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); 6932 + return (unsigned long)sip24_final(&state); 6933 + } 6934 + 6935 + static NAMED* lookup(XML_Parser parser, HASH_TABLE* table, KEY name, size_t createSize) { 6936 + size_t i; 6937 + if (table->size == 0) { 6938 + size_t tsize; 6939 + if (!createSize) return NULL; 6940 + table->power = INIT_POWER; 6941 + /* table->size is a power of 2 */ 6942 + table->size = (size_t)1 << INIT_POWER; 6943 + tsize = table->size * sizeof(NAMED*); 6944 + table->v = MALLOC(table->parser, tsize); 6945 + if (!table->v) { 6946 + table->size = 0; 6947 + return NULL; 6948 + } 6949 + memset(table->v, 0, tsize); 6950 + i = hash(parser, name) & ((unsigned long)table->size - 1); 6951 + } else { 6952 + unsigned long h = hash(parser, name); 6953 + unsigned long mask = (unsigned long)table->size - 1; 6954 + unsigned char step = 0; 6955 + i = h & mask; 6956 + while (table->v[i]) { 6957 + if (keyeq(name, table->v[i]->name)) return table->v[i]; 6958 + if (!step) step = PROBE_STEP(h, mask, table->power); 6959 + i < step ? (i += table->size - step) : (i -= step); 6960 + } 6961 + if (!createSize) return NULL; 6962 + 6963 + /* check for overflow (table is half full) */ 6964 + if (table->used >> (table->power - 1)) { 6965 + unsigned char newPower = table->power + 1; 6966 + 6967 + /* Detect and prevent invalid shift */ 6968 + if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) { 6969 + return NULL; 6970 + } 6971 + 6972 + size_t newSize = (size_t)1 << newPower; 6973 + unsigned long newMask = (unsigned long)newSize - 1; 6974 + 6975 + /* Detect and prevent integer overflow */ 6976 + if (newSize > SIZE_MAX / sizeof(NAMED*)) { 6977 + return NULL; 6978 + } 6979 + 6980 + size_t tsize = newSize * sizeof(NAMED*); 6981 + NAMED** newV = MALLOC(table->parser, tsize); 6982 + if (!newV) return NULL; 6983 + memset(newV, 0, tsize); 6984 + for (i = 0; i < table->size; i++) 6985 + if (table->v[i]) { 6986 + unsigned long newHash = hash(parser, table->v[i]->name); 6987 + size_t j = newHash & newMask; 6988 + step = 0; 6989 + while (newV[j]) { 6990 + if (!step) step = PROBE_STEP(newHash, newMask, newPower); 6991 + j < step ? (j += newSize - step) : (j -= step); 6992 + } 6993 + newV[j] = table->v[i]; 6994 + } 6995 + FREE(table->parser, table->v); 6996 + table->v = newV; 6997 + table->power = newPower; 6998 + table->size = newSize; 6999 + i = h & newMask; 7000 + step = 0; 7001 + while (table->v[i]) { 7002 + if (!step) step = PROBE_STEP(h, newMask, newPower); 7003 + i < step ? (i += newSize - step) : (i -= step); 7004 + } 7005 + } 7006 + } 7007 + table->v[i] = MALLOC(table->parser, createSize); 7008 + if (!table->v[i]) return NULL; 7009 + memset(table->v[i], 0, createSize); 7010 + table->v[i]->name = name; 7011 + (table->used)++; 7012 + return table->v[i]; 7013 + } 7014 + 7015 + static void FASTCALL hashTableClear(HASH_TABLE* table) { 7016 + size_t i; 7017 + for (i = 0; i < table->size; i++) { 7018 + FREE(table->parser, table->v[i]); 7019 + table->v[i] = NULL; 7020 + } 7021 + table->used = 0; 7022 + } 7023 + 7024 + static void FASTCALL hashTableDestroy(HASH_TABLE* table) { 7025 + size_t i; 7026 + for (i = 0; i < table->size; i++) FREE(table->parser, table->v[i]); 7027 + FREE(table->parser, table->v); 7028 + } 7029 + 7030 + static void FASTCALL hashTableInit(HASH_TABLE* p, XML_Parser parser) { 7031 + p->power = 0; 7032 + p->size = 0; 7033 + p->used = 0; 7034 + p->v = NULL; 7035 + p->parser = parser; 7036 + } 7037 + 7038 + static void FASTCALL hashTableIterInit(HASH_TABLE_ITER* iter, const HASH_TABLE* table) { 7039 + iter->p = table->v; 7040 + iter->end = iter->p ? iter->p + table->size : NULL; 7041 + } 7042 + 7043 + static NAMED* FASTCALL hashTableIterNext(HASH_TABLE_ITER* iter) { 7044 + while (iter->p != iter->end) { 7045 + NAMED* tem = *(iter->p)++; 7046 + if (tem) return tem; 7047 + } 7048 + return NULL; 7049 + } 7050 + 7051 + static void FASTCALL poolInit(STRING_POOL* pool, XML_Parser parser) { 7052 + pool->blocks = NULL; 7053 + pool->freeBlocks = NULL; 7054 + pool->start = NULL; 7055 + pool->ptr = NULL; 7056 + pool->end = NULL; 7057 + pool->parser = parser; 7058 + } 7059 + 7060 + static void FASTCALL poolClear(STRING_POOL* pool) { 7061 + if (!pool->freeBlocks) 7062 + pool->freeBlocks = pool->blocks; 7063 + else { 7064 + BLOCK* p = pool->blocks; 7065 + while (p) { 7066 + BLOCK* tem = p->next; 7067 + p->next = pool->freeBlocks; 7068 + pool->freeBlocks = p; 7069 + p = tem; 7070 + } 7071 + } 7072 + pool->blocks = NULL; 7073 + pool->start = NULL; 7074 + pool->ptr = NULL; 7075 + pool->end = NULL; 7076 + } 7077 + 7078 + static void FASTCALL poolDestroy(STRING_POOL* pool) { 7079 + BLOCK* p = pool->blocks; 7080 + while (p) { 7081 + BLOCK* tem = p->next; 7082 + FREE(pool->parser, p); 7083 + p = tem; 7084 + } 7085 + p = pool->freeBlocks; 7086 + while (p) { 7087 + BLOCK* tem = p->next; 7088 + FREE(pool->parser, p); 7089 + p = tem; 7090 + } 7091 + } 7092 + 7093 + static XML_Char* poolAppend(STRING_POOL* pool, const ENCODING* enc, const char* ptr, const char* end) { 7094 + if (!pool->ptr && !poolGrow(pool)) return NULL; 7095 + for (;;) { 7096 + const enum XML_Convert_Result convert_res = 7097 + XmlConvert(enc, &ptr, end, (ICHAR**)&(pool->ptr), (const ICHAR*)pool->end); 7098 + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; 7099 + if (!poolGrow(pool)) return NULL; 7100 + } 7101 + return pool->start; 7102 + } 7103 + 7104 + static const XML_Char* FASTCALL poolCopyString(STRING_POOL* pool, const XML_Char* s) { 7105 + do { 7106 + if (!poolAppendChar(pool, *s)) return NULL; 7107 + } while (*s++); 7108 + s = pool->start; 7109 + poolFinish(pool); 7110 + return s; 7111 + } 7112 + 7113 + static const XML_Char* poolCopyStringN(STRING_POOL* pool, const XML_Char* s, int n) { 7114 + if (!pool->ptr && !poolGrow(pool)) { 7115 + /* The following line is unreachable given the current usage of 7116 + * poolCopyStringN(). Currently it is called from exactly one 7117 + * place to copy the text of a simple general entity. By that 7118 + * point, the name of the entity is already stored in the pool, so 7119 + * pool->ptr cannot be NULL. 7120 + * 7121 + * If poolCopyStringN() is used elsewhere as it well might be, 7122 + * this line may well become executable again. Regardless, this 7123 + * sort of check shouldn't be removed lightly, so we just exclude 7124 + * it from the coverage statistics. 7125 + */ 7126 + return NULL; /* LCOV_EXCL_LINE */ 7127 + } 7128 + for (; n > 0; --n, s++) { 7129 + if (!poolAppendChar(pool, *s)) return NULL; 7130 + } 7131 + s = pool->start; 7132 + poolFinish(pool); 7133 + return s; 7134 + } 7135 + 7136 + static const XML_Char* FASTCALL poolAppendString(STRING_POOL* pool, const XML_Char* s) { 7137 + while (*s) { 7138 + if (!poolAppendChar(pool, *s)) return NULL; 7139 + s++; 7140 + } 7141 + return pool->start; 7142 + } 7143 + 7144 + static XML_Char* poolStoreString(STRING_POOL* pool, const ENCODING* enc, const char* ptr, const char* end) { 7145 + if (!poolAppend(pool, enc, ptr, end)) return NULL; 7146 + if (pool->ptr == pool->end && !poolGrow(pool)) return NULL; 7147 + *(pool->ptr)++ = 0; 7148 + return pool->start; 7149 + } 7150 + 7151 + static size_t poolBytesToAllocateFor(int blockSize) { 7152 + /* Unprotected math would be: 7153 + ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); 7154 + ** 7155 + ** Detect overflow, avoiding _signed_ overflow undefined behavior 7156 + ** For a + b * c we check b * c in isolation first, so that addition of a 7157 + ** on top has no chance of making us accept a small non-negative number 7158 + */ 7159 + const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ 7160 + 7161 + if (blockSize <= 0) return 0; 7162 + 7163 + if (blockSize > (int)(INT_MAX / stretch)) return 0; 7164 + 7165 + { 7166 + const int stretchedBlockSize = blockSize * (int)stretch; 7167 + const int bytesToAllocate = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); 7168 + if (bytesToAllocate < 0) return 0; 7169 + 7170 + return (size_t)bytesToAllocate; 7171 + } 7172 + } 7173 + 7174 + static XML_Bool FASTCALL poolGrow(STRING_POOL* pool) { 7175 + if (pool->freeBlocks) { 7176 + if (pool->start == 0) { 7177 + pool->blocks = pool->freeBlocks; 7178 + pool->freeBlocks = pool->freeBlocks->next; 7179 + pool->blocks->next = NULL; 7180 + pool->start = pool->blocks->s; 7181 + pool->end = pool->start + pool->blocks->size; 7182 + pool->ptr = pool->start; 7183 + return XML_TRUE; 7184 + } 7185 + if (pool->end - pool->start < pool->freeBlocks->size) { 7186 + BLOCK* tem = pool->freeBlocks->next; 7187 + pool->freeBlocks->next = pool->blocks; 7188 + pool->blocks = pool->freeBlocks; 7189 + pool->freeBlocks = tem; 7190 + memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char)); 7191 + pool->ptr = pool->blocks->s + (pool->ptr - pool->start); 7192 + pool->start = pool->blocks->s; 7193 + pool->end = pool->start + pool->blocks->size; 7194 + return XML_TRUE; 7195 + } 7196 + } 7197 + if (pool->blocks && pool->start == pool->blocks->s) { 7198 + BLOCK* temp; 7199 + int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U); 7200 + size_t bytesToAllocate; 7201 + 7202 + /* NOTE: Needs to be calculated prior to calling `realloc` 7203 + to avoid dangling pointers: */ 7204 + const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; 7205 + 7206 + if (blockSize < 0) { 7207 + /* This condition traps a situation where either more than 7208 + * INT_MAX/2 bytes have already been allocated. This isn't 7209 + * readily testable, since it is unlikely that an average 7210 + * machine will have that much memory, so we exclude it from the 7211 + * coverage statistics. 7212 + */ 7213 + return XML_FALSE; /* LCOV_EXCL_LINE */ 7214 + } 7215 + 7216 + bytesToAllocate = poolBytesToAllocateFor(blockSize); 7217 + if (bytesToAllocate == 0) return XML_FALSE; 7218 + 7219 + temp = REALLOC(pool->parser, pool->blocks, bytesToAllocate); 7220 + if (temp == NULL) return XML_FALSE; 7221 + pool->blocks = temp; 7222 + pool->blocks->size = blockSize; 7223 + pool->ptr = pool->blocks->s + offsetInsideBlock; 7224 + pool->start = pool->blocks->s; 7225 + pool->end = pool->start + blockSize; 7226 + } else { 7227 + BLOCK* tem; 7228 + int blockSize = (int)(pool->end - pool->start); 7229 + size_t bytesToAllocate; 7230 + 7231 + if (blockSize < 0) { 7232 + /* This condition traps a situation where either more than 7233 + * INT_MAX bytes have already been allocated (which is prevented 7234 + * by various pieces of program logic, not least this one, never 7235 + * mind the unlikelihood of actually having that much memory) or 7236 + * the pool control fields have been corrupted (which could 7237 + * conceivably happen in an extremely buggy user handler 7238 + * function). Either way it isn't readily testable, so we 7239 + * exclude it from the coverage statistics. 7240 + */ 7241 + return XML_FALSE; /* LCOV_EXCL_LINE */ 7242 + } 7243 + 7244 + if (blockSize < INIT_BLOCK_SIZE) 7245 + blockSize = INIT_BLOCK_SIZE; 7246 + else { 7247 + /* Detect overflow, avoiding _signed_ overflow undefined behavior */ 7248 + if ((int)((unsigned)blockSize * 2U) < 0) { 7249 + return XML_FALSE; 7250 + } 7251 + blockSize *= 2; 7252 + } 7253 + 7254 + bytesToAllocate = poolBytesToAllocateFor(blockSize); 7255 + if (bytesToAllocate == 0) return XML_FALSE; 7256 + 7257 + tem = MALLOC(pool->parser, bytesToAllocate); 7258 + if (!tem) return XML_FALSE; 7259 + tem->size = blockSize; 7260 + tem->next = pool->blocks; 7261 + pool->blocks = tem; 7262 + if (pool->ptr != pool->start) memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); 7263 + pool->ptr = tem->s + (pool->ptr - pool->start); 7264 + pool->start = tem->s; 7265 + pool->end = tem->s + blockSize; 7266 + } 7267 + return XML_TRUE; 7268 + } 7269 + 7270 + static int FASTCALL nextScaffoldPart(XML_Parser parser) { 7271 + DTD* const dtd = parser->m_dtd; /* save one level of indirection */ 7272 + CONTENT_SCAFFOLD* me; 7273 + int next; 7274 + 7275 + if (!dtd->scaffIndex) { 7276 + /* Detect and prevent integer overflow. 7277 + * The preprocessor guard addresses the "always false" warning 7278 + * from -Wtype-limits on platforms where 7279 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7280 + #if UINT_MAX >= SIZE_MAX 7281 + if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { 7282 + return -1; 7283 + } 7284 + #endif 7285 + dtd->scaffIndex = MALLOC(parser, parser->m_groupSize * sizeof(int)); 7286 + if (!dtd->scaffIndex) return -1; 7287 + dtd->scaffIndex[0] = 0; 7288 + } 7289 + 7290 + // Will casting to int be safe further down? 7291 + if (dtd->scaffCount > INT_MAX) { 7292 + return -1; 7293 + } 7294 + 7295 + if (dtd->scaffCount >= dtd->scaffSize) { 7296 + CONTENT_SCAFFOLD* temp; 7297 + if (dtd->scaffold) { 7298 + /* Detect and prevent integer overflow */ 7299 + if (dtd->scaffSize > UINT_MAX / 2u) { 7300 + return -1; 7301 + } 7302 + /* Detect and prevent integer overflow. 7303 + * The preprocessor guard addresses the "always false" warning 7304 + * from -Wtype-limits on platforms where 7305 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7306 + #if UINT_MAX >= SIZE_MAX 7307 + if (dtd->scaffSize > SIZE_MAX / 2u / sizeof(CONTENT_SCAFFOLD)) { 7308 + return -1; 7309 + } 7310 + #endif 7311 + 7312 + temp = REALLOC(parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); 7313 + if (temp == NULL) return -1; 7314 + dtd->scaffSize *= 2; 7315 + } else { 7316 + temp = MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD)); 7317 + if (temp == NULL) return -1; 7318 + dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; 7319 + } 7320 + dtd->scaffold = temp; 7321 + } 7322 + next = (int)dtd->scaffCount++; 7323 + me = &dtd->scaffold[next]; 7324 + if (dtd->scaffLevel) { 7325 + CONTENT_SCAFFOLD* parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]]; 7326 + if (parent->lastchild) { 7327 + dtd->scaffold[parent->lastchild].nextsib = next; 7328 + } 7329 + if (!parent->childcnt) parent->firstchild = next; 7330 + parent->lastchild = next; 7331 + parent->childcnt++; 7332 + } 7333 + me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; 7334 + return next; 7335 + } 7336 + 7337 + static XML_Content* build_model(XML_Parser parser) { 7338 + /* Function build_model transforms the existing parser->m_dtd->scaffold 7339 + * array of CONTENT_SCAFFOLD tree nodes into a new array of 7340 + * XML_Content tree nodes followed by a gapless list of zero-terminated 7341 + * strings. */ 7342 + DTD* const dtd = parser->m_dtd; /* save one level of indirection */ 7343 + XML_Content* ret; 7344 + XML_Char* str; /* the current string writing location */ 7345 + 7346 + /* Detect and prevent integer overflow. 7347 + * The preprocessor guard addresses the "always false" warning 7348 + * from -Wtype-limits on platforms where 7349 + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7350 + #if UINT_MAX >= SIZE_MAX 7351 + if (dtd->scaffCount > SIZE_MAX / sizeof(XML_Content)) { 7352 + return NULL; 7353 + } 7354 + if (dtd->contentStringLen > SIZE_MAX / sizeof(XML_Char)) { 7355 + return NULL; 7356 + } 7357 + #endif 7358 + if (dtd->scaffCount * sizeof(XML_Content) > SIZE_MAX - dtd->contentStringLen * sizeof(XML_Char)) { 7359 + return NULL; 7360 + } 7361 + 7362 + const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) + (dtd->contentStringLen * sizeof(XML_Char))); 7363 + 7364 + // NOTE: We are avoiding MALLOC(..) here to so that 7365 + // applications that are not using XML_FreeContentModel but plain 7366 + // free(..) or .free_fcn() to free the content model's memory are safe. 7367 + ret = parser->m_mem.malloc_fcn(allocsize); 7368 + if (!ret) return NULL; 7369 + 7370 + /* What follows is an iterative implementation (of what was previously done 7371 + * recursively in a dedicated function called "build_node". The old recursive 7372 + * build_node could be forced into stack exhaustion from input as small as a 7373 + * few megabyte, and so that was a security issue. Hence, a function call 7374 + * stack is avoided now by resolving recursion.) 7375 + * 7376 + * The iterative approach works as follows: 7377 + * 7378 + * - We have two writing pointers, both walking up the result array; one does 7379 + * the work, the other creates "jobs" for its colleague to do, and leads 7380 + * the way: 7381 + * 7382 + * - The faster one, pointer jobDest, always leads and writes "what job 7383 + * to do" by the other, once they reach that place in the 7384 + * array: leader "jobDest" stores the source node array index (relative 7385 + * to array dtd->scaffold) in field "numchildren". 7386 + * 7387 + * - The slower one, pointer dest, looks at the value stored in the 7388 + * "numchildren" field (which actually holds a source node array index 7389 + * at that time) and puts the real data from dtd->scaffold in. 7390 + * 7391 + * - Before the loop starts, jobDest writes source array index 0 7392 + * (where the root node is located) so that dest will have something to do 7393 + * when it starts operation. 7394 + * 7395 + * - Whenever nodes with children are encountered, jobDest appends 7396 + * them as new jobs, in order. As a result, tree node siblings are 7397 + * adjacent in the resulting array, for example: 7398 + * 7399 + * [0] root, has two children 7400 + * [1] first child of 0, has three children 7401 + * [3] first child of 1, does not have children 7402 + * [4] second child of 1, does not have children 7403 + * [5] third child of 1, does not have children 7404 + * [2] second child of 0, does not have children 7405 + * 7406 + * Or (the same data) presented in flat array view: 7407 + * 7408 + * [0] root, has two children 7409 + * 7410 + * [1] first child of 0, has three children 7411 + * [2] second child of 0, does not have children 7412 + * 7413 + * [3] first child of 1, does not have children 7414 + * [4] second child of 1, does not have children 7415 + * [5] third child of 1, does not have children 7416 + * 7417 + * - The algorithm repeats until all target array indices have been processed. 7418 + */ 7419 + XML_Content* dest = ret; /* tree node writing location, moves upwards */ 7420 + XML_Content* const destLimit = &ret[dtd->scaffCount]; 7421 + XML_Content* jobDest = ret; /* next free writing location in target array */ 7422 + str = (XML_Char*)&ret[dtd->scaffCount]; 7423 + 7424 + /* Add the starting job, the root node (index 0) of the source tree */ 7425 + (jobDest++)->numchildren = 0; 7426 + 7427 + for (; dest < destLimit; dest++) { 7428 + /* Retrieve source tree array index from job storage */ 7429 + const int src_node = (int)dest->numchildren; 7430 + 7431 + /* Convert item */ 7432 + dest->type = dtd->scaffold[src_node].type; 7433 + dest->quant = dtd->scaffold[src_node].quant; 7434 + if (dest->type == XML_CTYPE_NAME) { 7435 + const XML_Char* src; 7436 + dest->name = str; 7437 + src = dtd->scaffold[src_node].name; 7438 + for (;;) { 7439 + *str++ = *src; 7440 + if (!*src) break; 7441 + src++; 7442 + } 7443 + dest->numchildren = 0; 7444 + dest->children = NULL; 7445 + } else { 7446 + unsigned int i; 7447 + int cn; 7448 + dest->name = NULL; 7449 + dest->numchildren = dtd->scaffold[src_node].childcnt; 7450 + dest->children = jobDest; 7451 + 7452 + /* Append scaffold indices of children to array */ 7453 + for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) 7454 + (jobDest++)->numchildren = (unsigned int)cn; 7455 + } 7456 + } 7457 + 7458 + return ret; 7459 + } 7460 + 7461 + static ELEMENT_TYPE* getElementType(XML_Parser parser, const ENCODING* enc, const char* ptr, const char* end) { 7462 + DTD* const dtd = parser->m_dtd; /* save one level of indirection */ 7463 + const XML_Char* name = poolStoreString(&dtd->pool, enc, ptr, end); 7464 + ELEMENT_TYPE* ret; 7465 + 7466 + if (!name) return NULL; 7467 + ret = (ELEMENT_TYPE*)lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); 7468 + if (!ret) return NULL; 7469 + if (ret->name != name) 7470 + poolDiscard(&dtd->pool); 7471 + else { 7472 + poolFinish(&dtd->pool); 7473 + if (!setElementTypePrefix(parser, ret)) return NULL; 7474 + } 7475 + return ret; 7476 + } 7477 + 7478 + static XML_Char* copyString(const XML_Char* s, XML_Parser parser) { 7479 + size_t charsRequired = 0; 7480 + XML_Char* result; 7481 + 7482 + /* First determine how long the string is */ 7483 + while (s[charsRequired] != 0) { 7484 + charsRequired++; 7485 + } 7486 + /* Include the terminator */ 7487 + charsRequired++; 7488 + 7489 + /* Now allocate space for the copy */ 7490 + result = MALLOC(parser, charsRequired * sizeof(XML_Char)); 7491 + if (result == NULL) return NULL; 7492 + /* Copy the original into place */ 7493 + memcpy(result, s, charsRequired * sizeof(XML_Char)); 7494 + return result; 7495 + } 7496 + 7497 + #if XML_GE == 1 7498 + 7499 + static float accountingGetCurrentAmplification(XML_Parser rootParser) { 7500 + // 1.........1.........12 => 22 7501 + const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1; 7502 + const XmlBigCount countBytesOutput = 7503 + rootParser->m_accounting.countBytesDirect + rootParser->m_accounting.countBytesIndirect; 7504 + const float amplificationFactor = 7505 + rootParser->m_accounting.countBytesDirect 7506 + ? ((float)countBytesOutput / (float)(rootParser->m_accounting.countBytesDirect)) 7507 + : ((float)(lenOfShortestInclude + rootParser->m_accounting.countBytesIndirect) / (float)lenOfShortestInclude); 7508 + assert(!rootParser->m_parentParser); 7509 + return amplificationFactor; 7510 + } 7511 + 7512 + static void accountingReportStats(XML_Parser originParser, const char* epilog) { 7513 + const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7514 + assert(!rootParser->m_parentParser); 7515 + 7516 + if (rootParser->m_accounting.debugLevel == 0u) { 7517 + return; 7518 + } 7519 + 7520 + const float amplificationFactor = accountingGetCurrentAmplification(rootParser); 7521 + fprintf( 7522 + stderr, 7523 + "expat: Accounting(%p): Direct " EXPAT_FMT_ULL("10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", 7524 + (void*)rootParser, rootParser->m_accounting.countBytesDirect, rootParser->m_accounting.countBytesIndirect, 7525 + (double)amplificationFactor, epilog); 7526 + } 7527 + 7528 + static void accountingOnAbort(XML_Parser originParser) { accountingReportStats(originParser, " ABORTING\n"); } 7529 + 7530 + static void accountingReportDiff(XML_Parser rootParser, unsigned int levelsAwayFromRootParser, const char* before, 7531 + const char* after, ptrdiff_t bytesMore, int source_line, enum XML_Account account) { 7532 + assert(!rootParser->m_parentParser); 7533 + 7534 + fprintf(stderr, " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"", bytesMore, 7535 + (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", levelsAwayFromRootParser, source_line, 10, ""); 7536 + 7537 + const char ellipis[] = "[..]"; 7538 + const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; 7539 + const unsigned int contextLength = 10; 7540 + 7541 + /* Note: Performance is of no concern here */ 7542 + const char* walker = before; 7543 + if ((rootParser->m_accounting.debugLevel >= 3u) || 7544 + (after - before) <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { 7545 + for (; walker < after; walker++) { 7546 + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7547 + } 7548 + } else { 7549 + for (; walker < before + contextLength; walker++) { 7550 + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7551 + } 7552 + fprintf(stderr, ellipis); 7553 + walker = after - contextLength; 7554 + for (; walker < after; walker++) { 7555 + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7556 + } 7557 + } 7558 + fprintf(stderr, "\"\n"); 7559 + } 7560 + 7561 + static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, const char* before, const char* after, 7562 + int source_line, enum XML_Account account) { 7563 + /* Note: We need to check the token type *first* to be sure that 7564 + * we can even access variable <after>, safely. 7565 + * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */ 7566 + switch (tok) { 7567 + case XML_TOK_INVALID: 7568 + case XML_TOK_PARTIAL: 7569 + case XML_TOK_PARTIAL_CHAR: 7570 + case XML_TOK_NONE: 7571 + return XML_TRUE; 7572 + } 7573 + 7574 + if (account == XML_ACCOUNT_NONE) return XML_TRUE; /* because these bytes have been accounted for, already */ 7575 + 7576 + unsigned int levelsAwayFromRootParser; 7577 + const XML_Parser rootParser = getRootParserOf(originParser, &levelsAwayFromRootParser); 7578 + assert(!rootParser->m_parentParser); 7579 + 7580 + const int isDirect = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); 7581 + const ptrdiff_t bytesMore = after - before; 7582 + 7583 + XmlBigCount* const additionTarget = 7584 + isDirect ? &rootParser->m_accounting.countBytesDirect : &rootParser->m_accounting.countBytesIndirect; 7585 + 7586 + /* Detect and avoid integer overflow */ 7587 + if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) return XML_FALSE; 7588 + *additionTarget += bytesMore; 7589 + 7590 + const XmlBigCount countBytesOutput = 7591 + rootParser->m_accounting.countBytesDirect + rootParser->m_accounting.countBytesIndirect; 7592 + const float amplificationFactor = accountingGetCurrentAmplification(rootParser); 7593 + const XML_Bool tolerated = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) || 7594 + (amplificationFactor <= rootParser->m_accounting.maximumAmplificationFactor); 7595 + 7596 + if (rootParser->m_accounting.debugLevel >= 2u) { 7597 + accountingReportStats(rootParser, ""); 7598 + accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, bytesMore, source_line, account); 7599 + } 7600 + 7601 + return tolerated; 7602 + } 7603 + 7604 + unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser) { 7605 + if (!parser) return 0; 7606 + return parser->m_accounting.countBytesDirect; 7607 + } 7608 + 7609 + unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser) { 7610 + if (!parser) return 0; 7611 + return parser->m_accounting.countBytesIndirect; 7612 + } 7613 + 7614 + static void entityTrackingReportStats(XML_Parser rootParser, ENTITY* entity, const char* action, int sourceLine) { 7615 + assert(!rootParser->m_parentParser); 7616 + if (rootParser->m_entity_stats.debugLevel == 0u) return; 7617 + 7618 + #if defined(XML_UNICODE) 7619 + const char* const entityName = "[..]"; 7620 + #else 7621 + const char* const entityName = entity->name; 7622 + #endif 7623 + 7624 + fprintf(stderr, "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n", 7625 + (void*)rootParser, rootParser->m_entity_stats.countEverOpened, rootParser->m_entity_stats.currentDepth, 7626 + rootParser->m_entity_stats.maximumDepthSeen, ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "", 7627 + entity->is_param ? "%" : "&", entityName, action, entity->textLen, sourceLine); 7628 + } 7629 + 7630 + static void entityTrackingOnOpen(XML_Parser originParser, ENTITY* entity, int sourceLine) { 7631 + const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7632 + assert(!rootParser->m_parentParser); 7633 + 7634 + rootParser->m_entity_stats.countEverOpened++; 7635 + rootParser->m_entity_stats.currentDepth++; 7636 + if (rootParser->m_entity_stats.currentDepth > rootParser->m_entity_stats.maximumDepthSeen) { 7637 + rootParser->m_entity_stats.maximumDepthSeen++; 7638 + } 7639 + 7640 + entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); 7641 + } 7642 + 7643 + static void entityTrackingOnClose(XML_Parser originParser, ENTITY* entity, int sourceLine) { 7644 + const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7645 + assert(!rootParser->m_parentParser); 7646 + 7647 + entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); 7648 + rootParser->m_entity_stats.currentDepth--; 7649 + } 7650 + 7651 + #endif /* XML_GE == 1 */ 7652 + 7653 + static XML_Parser getRootParserOf(XML_Parser parser, unsigned int* outLevelDiff) { 7654 + XML_Parser rootParser = parser; 7655 + unsigned int stepsTakenUpwards = 0; 7656 + while (rootParser->m_parentParser) { 7657 + rootParser = rootParser->m_parentParser; 7658 + stepsTakenUpwards++; 7659 + } 7660 + assert(!rootParser->m_parentParser); 7661 + if (outLevelDiff != NULL) { 7662 + *outLevelDiff = stepsTakenUpwards; 7663 + } 7664 + return rootParser; 7665 + } 7666 + 7667 + #if XML_GE == 1 7668 + 7669 + const char* unsignedCharToPrintable(unsigned char c) { 7670 + switch (c) { 7671 + case 0: 7672 + return "\\0"; 7673 + case 1: 7674 + return "\\x1"; 7675 + case 2: 7676 + return "\\x2"; 7677 + case 3: 7678 + return "\\x3"; 7679 + case 4: 7680 + return "\\x4"; 7681 + case 5: 7682 + return "\\x5"; 7683 + case 6: 7684 + return "\\x6"; 7685 + case 7: 7686 + return "\\x7"; 7687 + case 8: 7688 + return "\\x8"; 7689 + case 9: 7690 + return "\\t"; 7691 + case 10: 7692 + return "\\n"; 7693 + case 11: 7694 + return "\\xB"; 7695 + case 12: 7696 + return "\\xC"; 7697 + case 13: 7698 + return "\\r"; 7699 + case 14: 7700 + return "\\xE"; 7701 + case 15: 7702 + return "\\xF"; 7703 + case 16: 7704 + return "\\x10"; 7705 + case 17: 7706 + return "\\x11"; 7707 + case 18: 7708 + return "\\x12"; 7709 + case 19: 7710 + return "\\x13"; 7711 + case 20: 7712 + return "\\x14"; 7713 + case 21: 7714 + return "\\x15"; 7715 + case 22: 7716 + return "\\x16"; 7717 + case 23: 7718 + return "\\x17"; 7719 + case 24: 7720 + return "\\x18"; 7721 + case 25: 7722 + return "\\x19"; 7723 + case 26: 7724 + return "\\x1A"; 7725 + case 27: 7726 + return "\\x1B"; 7727 + case 28: 7728 + return "\\x1C"; 7729 + case 29: 7730 + return "\\x1D"; 7731 + case 30: 7732 + return "\\x1E"; 7733 + case 31: 7734 + return "\\x1F"; 7735 + case 32: 7736 + return " "; 7737 + case 33: 7738 + return "!"; 7739 + case 34: 7740 + return "\\\""; 7741 + case 35: 7742 + return "#"; 7743 + case 36: 7744 + return "$"; 7745 + case 37: 7746 + return "%"; 7747 + case 38: 7748 + return "&"; 7749 + case 39: 7750 + return "'"; 7751 + case 40: 7752 + return "("; 7753 + case 41: 7754 + return ")"; 7755 + case 42: 7756 + return "*"; 7757 + case 43: 7758 + return "+"; 7759 + case 44: 7760 + return ","; 7761 + case 45: 7762 + return "-"; 7763 + case 46: 7764 + return "."; 7765 + case 47: 7766 + return "/"; 7767 + case 48: 7768 + return "0"; 7769 + case 49: 7770 + return "1"; 7771 + case 50: 7772 + return "2"; 7773 + case 51: 7774 + return "3"; 7775 + case 52: 7776 + return "4"; 7777 + case 53: 7778 + return "5"; 7779 + case 54: 7780 + return "6"; 7781 + case 55: 7782 + return "7"; 7783 + case 56: 7784 + return "8"; 7785 + case 57: 7786 + return "9"; 7787 + case 58: 7788 + return ":"; 7789 + case 59: 7790 + return ";"; 7791 + case 60: 7792 + return "<"; 7793 + case 61: 7794 + return "="; 7795 + case 62: 7796 + return ">"; 7797 + case 63: 7798 + return "?"; 7799 + case 64: 7800 + return "@"; 7801 + case 65: 7802 + return "A"; 7803 + case 66: 7804 + return "B"; 7805 + case 67: 7806 + return "C"; 7807 + case 68: 7808 + return "D"; 7809 + case 69: 7810 + return "E"; 7811 + case 70: 7812 + return "F"; 7813 + case 71: 7814 + return "G"; 7815 + case 72: 7816 + return "H"; 7817 + case 73: 7818 + return "I"; 7819 + case 74: 7820 + return "J"; 7821 + case 75: 7822 + return "K"; 7823 + case 76: 7824 + return "L"; 7825 + case 77: 7826 + return "M"; 7827 + case 78: 7828 + return "N"; 7829 + case 79: 7830 + return "O"; 7831 + case 80: 7832 + return "P"; 7833 + case 81: 7834 + return "Q"; 7835 + case 82: 7836 + return "R"; 7837 + case 83: 7838 + return "S"; 7839 + case 84: 7840 + return "T"; 7841 + case 85: 7842 + return "U"; 7843 + case 86: 7844 + return "V"; 7845 + case 87: 7846 + return "W"; 7847 + case 88: 7848 + return "X"; 7849 + case 89: 7850 + return "Y"; 7851 + case 90: 7852 + return "Z"; 7853 + case 91: 7854 + return "["; 7855 + case 92: 7856 + return "\\\\"; 7857 + case 93: 7858 + return "]"; 7859 + case 94: 7860 + return "^"; 7861 + case 95: 7862 + return "_"; 7863 + case 96: 7864 + return "`"; 7865 + case 97: 7866 + return "a"; 7867 + case 98: 7868 + return "b"; 7869 + case 99: 7870 + return "c"; 7871 + case 100: 7872 + return "d"; 7873 + case 101: 7874 + return "e"; 7875 + case 102: 7876 + return "f"; 7877 + case 103: 7878 + return "g"; 7879 + case 104: 7880 + return "h"; 7881 + case 105: 7882 + return "i"; 7883 + case 106: 7884 + return "j"; 7885 + case 107: 7886 + return "k"; 7887 + case 108: 7888 + return "l"; 7889 + case 109: 7890 + return "m"; 7891 + case 110: 7892 + return "n"; 7893 + case 111: 7894 + return "o"; 7895 + case 112: 7896 + return "p"; 7897 + case 113: 7898 + return "q"; 7899 + case 114: 7900 + return "r"; 7901 + case 115: 7902 + return "s"; 7903 + case 116: 7904 + return "t"; 7905 + case 117: 7906 + return "u"; 7907 + case 118: 7908 + return "v"; 7909 + case 119: 7910 + return "w"; 7911 + case 120: 7912 + return "x"; 7913 + case 121: 7914 + return "y"; 7915 + case 122: 7916 + return "z"; 7917 + case 123: 7918 + return "{"; 7919 + case 124: 7920 + return "|"; 7921 + case 125: 7922 + return "}"; 7923 + case 126: 7924 + return "~"; 7925 + case 127: 7926 + return "\\x7F"; 7927 + case 128: 7928 + return "\\x80"; 7929 + case 129: 7930 + return "\\x81"; 7931 + case 130: 7932 + return "\\x82"; 7933 + case 131: 7934 + return "\\x83"; 7935 + case 132: 7936 + return "\\x84"; 7937 + case 133: 7938 + return "\\x85"; 7939 + case 134: 7940 + return "\\x86"; 7941 + case 135: 7942 + return "\\x87"; 7943 + case 136: 7944 + return "\\x88"; 7945 + case 137: 7946 + return "\\x89"; 7947 + case 138: 7948 + return "\\x8A"; 7949 + case 139: 7950 + return "\\x8B"; 7951 + case 140: 7952 + return "\\x8C"; 7953 + case 141: 7954 + return "\\x8D"; 7955 + case 142: 7956 + return "\\x8E"; 7957 + case 143: 7958 + return "\\x8F"; 7959 + case 144: 7960 + return "\\x90"; 7961 + case 145: 7962 + return "\\x91"; 7963 + case 146: 7964 + return "\\x92"; 7965 + case 147: 7966 + return "\\x93"; 7967 + case 148: 7968 + return "\\x94"; 7969 + case 149: 7970 + return "\\x95"; 7971 + case 150: 7972 + return "\\x96"; 7973 + case 151: 7974 + return "\\x97"; 7975 + case 152: 7976 + return "\\x98"; 7977 + case 153: 7978 + return "\\x99"; 7979 + case 154: 7980 + return "\\x9A"; 7981 + case 155: 7982 + return "\\x9B"; 7983 + case 156: 7984 + return "\\x9C"; 7985 + case 157: 7986 + return "\\x9D"; 7987 + case 158: 7988 + return "\\x9E"; 7989 + case 159: 7990 + return "\\x9F"; 7991 + case 160: 7992 + return "\\xA0"; 7993 + case 161: 7994 + return "\\xA1"; 7995 + case 162: 7996 + return "\\xA2"; 7997 + case 163: 7998 + return "\\xA3"; 7999 + case 164: 8000 + return "\\xA4"; 8001 + case 165: 8002 + return "\\xA5"; 8003 + case 166: 8004 + return "\\xA6"; 8005 + case 167: 8006 + return "\\xA7"; 8007 + case 168: 8008 + return "\\xA8"; 8009 + case 169: 8010 + return "\\xA9"; 8011 + case 170: 8012 + return "\\xAA"; 8013 + case 171: 8014 + return "\\xAB"; 8015 + case 172: 8016 + return "\\xAC"; 8017 + case 173: 8018 + return "\\xAD"; 8019 + case 174: 8020 + return "\\xAE"; 8021 + case 175: 8022 + return "\\xAF"; 8023 + case 176: 8024 + return "\\xB0"; 8025 + case 177: 8026 + return "\\xB1"; 8027 + case 178: 8028 + return "\\xB2"; 8029 + case 179: 8030 + return "\\xB3"; 8031 + case 180: 8032 + return "\\xB4"; 8033 + case 181: 8034 + return "\\xB5"; 8035 + case 182: 8036 + return "\\xB6"; 8037 + case 183: 8038 + return "\\xB7"; 8039 + case 184: 8040 + return "\\xB8"; 8041 + case 185: 8042 + return "\\xB9"; 8043 + case 186: 8044 + return "\\xBA"; 8045 + case 187: 8046 + return "\\xBB"; 8047 + case 188: 8048 + return "\\xBC"; 8049 + case 189: 8050 + return "\\xBD"; 8051 + case 190: 8052 + return "\\xBE"; 8053 + case 191: 8054 + return "\\xBF"; 8055 + case 192: 8056 + return "\\xC0"; 8057 + case 193: 8058 + return "\\xC1"; 8059 + case 194: 8060 + return "\\xC2"; 8061 + case 195: 8062 + return "\\xC3"; 8063 + case 196: 8064 + return "\\xC4"; 8065 + case 197: 8066 + return "\\xC5"; 8067 + case 198: 8068 + return "\\xC6"; 8069 + case 199: 8070 + return "\\xC7"; 8071 + case 200: 8072 + return "\\xC8"; 8073 + case 201: 8074 + return "\\xC9"; 8075 + case 202: 8076 + return "\\xCA"; 8077 + case 203: 8078 + return "\\xCB"; 8079 + case 204: 8080 + return "\\xCC"; 8081 + case 205: 8082 + return "\\xCD"; 8083 + case 206: 8084 + return "\\xCE"; 8085 + case 207: 8086 + return "\\xCF"; 8087 + case 208: 8088 + return "\\xD0"; 8089 + case 209: 8090 + return "\\xD1"; 8091 + case 210: 8092 + return "\\xD2"; 8093 + case 211: 8094 + return "\\xD3"; 8095 + case 212: 8096 + return "\\xD4"; 8097 + case 213: 8098 + return "\\xD5"; 8099 + case 214: 8100 + return "\\xD6"; 8101 + case 215: 8102 + return "\\xD7"; 8103 + case 216: 8104 + return "\\xD8"; 8105 + case 217: 8106 + return "\\xD9"; 8107 + case 218: 8108 + return "\\xDA"; 8109 + case 219: 8110 + return "\\xDB"; 8111 + case 220: 8112 + return "\\xDC"; 8113 + case 221: 8114 + return "\\xDD"; 8115 + case 222: 8116 + return "\\xDE"; 8117 + case 223: 8118 + return "\\xDF"; 8119 + case 224: 8120 + return "\\xE0"; 8121 + case 225: 8122 + return "\\xE1"; 8123 + case 226: 8124 + return "\\xE2"; 8125 + case 227: 8126 + return "\\xE3"; 8127 + case 228: 8128 + return "\\xE4"; 8129 + case 229: 8130 + return "\\xE5"; 8131 + case 230: 8132 + return "\\xE6"; 8133 + case 231: 8134 + return "\\xE7"; 8135 + case 232: 8136 + return "\\xE8"; 8137 + case 233: 8138 + return "\\xE9"; 8139 + case 234: 8140 + return "\\xEA"; 8141 + case 235: 8142 + return "\\xEB"; 8143 + case 236: 8144 + return "\\xEC"; 8145 + case 237: 8146 + return "\\xED"; 8147 + case 238: 8148 + return "\\xEE"; 8149 + case 239: 8150 + return "\\xEF"; 8151 + case 240: 8152 + return "\\xF0"; 8153 + case 241: 8154 + return "\\xF1"; 8155 + case 242: 8156 + return "\\xF2"; 8157 + case 243: 8158 + return "\\xF3"; 8159 + case 244: 8160 + return "\\xF4"; 8161 + case 245: 8162 + return "\\xF5"; 8163 + case 246: 8164 + return "\\xF6"; 8165 + case 247: 8166 + return "\\xF7"; 8167 + case 248: 8168 + return "\\xF8"; 8169 + case 249: 8170 + return "\\xF9"; 8171 + case 250: 8172 + return "\\xFA"; 8173 + case 251: 8174 + return "\\xFB"; 8175 + case 252: 8176 + return "\\xFC"; 8177 + case 253: 8178 + return "\\xFD"; 8179 + case 254: 8180 + return "\\xFE"; 8181 + case 255: 8182 + return "\\xFF"; 8183 + // LCOV_EXCL_START 8184 + default: 8185 + assert(0); /* never gets here */ 8186 + return "dead code"; 8187 + } 8188 + assert(0); /* never gets here */ 8189 + // LCOV_EXCL_STOP 8190 + } 8191 + 8192 + #endif /* XML_GE == 1 */ 8193 + 8194 + static unsigned long getDebugLevel(const char* variableName, unsigned long defaultDebugLevel) { 8195 + const char* const valueOrNull = getenv(variableName); 8196 + if (valueOrNull == NULL) { 8197 + return defaultDebugLevel; 8198 + } 8199 + const char* const value = valueOrNull; 8200 + 8201 + errno = 0; 8202 + char* afterValue = NULL; 8203 + unsigned long debugLevel = strtoul(value, &afterValue, 10); 8204 + if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) { 8205 + errno = 0; 8206 + return defaultDebugLevel; 8207 + } 8208 + 8209 + return debugLevel; 8210 + }
+1108
lib/expat/xmlrole.c
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net> 12 + Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net> 13 + Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 14 + Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> 15 + Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org> 16 + Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 17 + Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 18 + Copyright (c) 2021 Donghee Na <donghee.na@python.org> 19 + Licensed under the MIT license: 20 + 21 + Permission is hereby granted, free of charge, to any person obtaining 22 + a copy of this software and associated documentation files (the 23 + "Software"), to deal in the Software without restriction, including 24 + without limitation the rights to use, copy, modify, merge, publish, 25 + distribute, sublicense, and/or sell copies of the Software, and to permit 26 + persons to whom the Software is furnished to do so, subject to the 27 + following conditions: 28 + 29 + The above copyright notice and this permission notice shall be included 30 + in all copies or substantial portions of the Software. 31 + 32 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 33 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 34 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 35 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 36 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 37 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 38 + USE OR OTHER DEALINGS IN THE SOFTWARE. 39 + */ 40 + 41 + #include <stddef.h> 42 + 43 + #include "expat_config.h" 44 + 45 + #ifdef _WIN32 46 + #include "winconfig.h" 47 + #endif 48 + 49 + #include "ascii.h" 50 + #include "expat_external.h" 51 + #include "internal.h" 52 + #include "xmlrole.h" 53 + 54 + /* Doesn't check: 55 + 56 + that ,| are not mixed in a model group 57 + content of literals 58 + 59 + */ 60 + 61 + static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'}; 62 + static const char KW_ATTLIST[] = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'}; 63 + static const char KW_CDATA[] = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; 64 + static const char KW_DOCTYPE[] = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'}; 65 + static const char KW_ELEMENT[] = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'}; 66 + static const char KW_EMPTY[] = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'}; 67 + static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0'}; 68 + static const char KW_ENTITY[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; 69 + static const char KW_FIXED[] = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'}; 70 + static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'}; 71 + static const char KW_IDREF[] = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; 72 + static const char KW_IDREFS[] = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; 73 + #ifdef XML_DTD 74 + static const char KW_IGNORE[] = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'}; 75 + #endif 76 + static const char KW_IMPLIED[] = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'}; 77 + #ifdef XML_DTD 78 + static const char KW_INCLUDE[] = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'}; 79 + #endif 80 + static const char KW_NDATA[] = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; 81 + static const char KW_NMTOKEN[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; 82 + static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0'}; 83 + static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0'}; 84 + static const char KW_PCDATA[] = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; 85 + static const char KW_PUBLIC[] = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'}; 86 + static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0'}; 87 + static const char KW_SYSTEM[] = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'}; 88 + 89 + #ifndef MIN_BYTES_PER_CHAR 90 + #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) 91 + #endif 92 + 93 + #ifdef XML_DTD 94 + #define setTopLevel(state) ((state)->handler = ((state)->documentEntity ? internalSubset : externalSubset1)) 95 + #else /* not XML_DTD */ 96 + #define setTopLevel(state) ((state)->handler = internalSubset) 97 + #endif /* not XML_DTD */ 98 + 99 + typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc); 100 + 101 + static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2, doctype3, doctype4, doctype5, 102 + internalSubset, entity0, entity1, entity2, entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10, 103 + notation0, notation1, notation2, notation3, notation4, attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, 104 + attlist6, attlist7, attlist8, attlist9, element0, element1, element2, element3, element4, element5, element6, 105 + element7, 106 + #ifdef XML_DTD 107 + externalSubset0, externalSubset1, condSect0, condSect1, condSect2, 108 + #endif /* XML_DTD */ 109 + declClose, error; 110 + 111 + static int FASTCALL common(PROLOG_STATE* state, int tok); 112 + 113 + static int PTRCALL prolog0(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 114 + switch (tok) { 115 + case XML_TOK_PROLOG_S: 116 + state->handler = prolog1; 117 + return XML_ROLE_NONE; 118 + case XML_TOK_XML_DECL: 119 + state->handler = prolog1; 120 + return XML_ROLE_XML_DECL; 121 + case XML_TOK_PI: 122 + state->handler = prolog1; 123 + return XML_ROLE_PI; 124 + case XML_TOK_COMMENT: 125 + state->handler = prolog1; 126 + return XML_ROLE_COMMENT; 127 + case XML_TOK_BOM: 128 + return XML_ROLE_NONE; 129 + case XML_TOK_DECL_OPEN: 130 + if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_DOCTYPE)) break; 131 + state->handler = doctype0; 132 + return XML_ROLE_DOCTYPE_NONE; 133 + case XML_TOK_INSTANCE_START: 134 + state->handler = error; 135 + return XML_ROLE_INSTANCE_START; 136 + } 137 + return common(state, tok); 138 + } 139 + 140 + static int PTRCALL prolog1(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 141 + switch (tok) { 142 + case XML_TOK_PROLOG_S: 143 + return XML_ROLE_NONE; 144 + case XML_TOK_PI: 145 + return XML_ROLE_PI; 146 + case XML_TOK_COMMENT: 147 + return XML_ROLE_COMMENT; 148 + case XML_TOK_BOM: 149 + /* This case can never arise. To reach this role function, the 150 + * parse must have passed through prolog0 and therefore have had 151 + * some form of input, even if only a space. At that point, a 152 + * byte order mark is no longer a valid character (though 153 + * technically it should be interpreted as a non-breaking space), 154 + * so will be rejected by the tokenizing stages. 155 + */ 156 + return XML_ROLE_NONE; /* LCOV_EXCL_LINE */ 157 + case XML_TOK_DECL_OPEN: 158 + if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_DOCTYPE)) break; 159 + state->handler = doctype0; 160 + return XML_ROLE_DOCTYPE_NONE; 161 + case XML_TOK_INSTANCE_START: 162 + state->handler = error; 163 + return XML_ROLE_INSTANCE_START; 164 + } 165 + return common(state, tok); 166 + } 167 + 168 + static int PTRCALL prolog2(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 169 + UNUSED_P(ptr); 170 + UNUSED_P(end); 171 + UNUSED_P(enc); 172 + switch (tok) { 173 + case XML_TOK_PROLOG_S: 174 + return XML_ROLE_NONE; 175 + case XML_TOK_PI: 176 + return XML_ROLE_PI; 177 + case XML_TOK_COMMENT: 178 + return XML_ROLE_COMMENT; 179 + case XML_TOK_INSTANCE_START: 180 + state->handler = error; 181 + return XML_ROLE_INSTANCE_START; 182 + } 183 + return common(state, tok); 184 + } 185 + 186 + static int PTRCALL doctype0(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 187 + UNUSED_P(ptr); 188 + UNUSED_P(end); 189 + UNUSED_P(enc); 190 + switch (tok) { 191 + case XML_TOK_PROLOG_S: 192 + return XML_ROLE_DOCTYPE_NONE; 193 + case XML_TOK_NAME: 194 + case XML_TOK_PREFIXED_NAME: 195 + state->handler = doctype1; 196 + return XML_ROLE_DOCTYPE_NAME; 197 + } 198 + return common(state, tok); 199 + } 200 + 201 + static int PTRCALL doctype1(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 202 + switch (tok) { 203 + case XML_TOK_PROLOG_S: 204 + return XML_ROLE_DOCTYPE_NONE; 205 + case XML_TOK_OPEN_BRACKET: 206 + state->handler = internalSubset; 207 + return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; 208 + case XML_TOK_DECL_CLOSE: 209 + state->handler = prolog2; 210 + return XML_ROLE_DOCTYPE_CLOSE; 211 + case XML_TOK_NAME: 212 + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { 213 + state->handler = doctype3; 214 + return XML_ROLE_DOCTYPE_NONE; 215 + } 216 + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { 217 + state->handler = doctype2; 218 + return XML_ROLE_DOCTYPE_NONE; 219 + } 220 + break; 221 + } 222 + return common(state, tok); 223 + } 224 + 225 + static int PTRCALL doctype2(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 226 + UNUSED_P(ptr); 227 + UNUSED_P(end); 228 + UNUSED_P(enc); 229 + switch (tok) { 230 + case XML_TOK_PROLOG_S: 231 + return XML_ROLE_DOCTYPE_NONE; 232 + case XML_TOK_LITERAL: 233 + state->handler = doctype3; 234 + return XML_ROLE_DOCTYPE_PUBLIC_ID; 235 + } 236 + return common(state, tok); 237 + } 238 + 239 + static int PTRCALL doctype3(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 240 + UNUSED_P(ptr); 241 + UNUSED_P(end); 242 + UNUSED_P(enc); 243 + switch (tok) { 244 + case XML_TOK_PROLOG_S: 245 + return XML_ROLE_DOCTYPE_NONE; 246 + case XML_TOK_LITERAL: 247 + state->handler = doctype4; 248 + return XML_ROLE_DOCTYPE_SYSTEM_ID; 249 + } 250 + return common(state, tok); 251 + } 252 + 253 + static int PTRCALL doctype4(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 254 + UNUSED_P(ptr); 255 + UNUSED_P(end); 256 + UNUSED_P(enc); 257 + switch (tok) { 258 + case XML_TOK_PROLOG_S: 259 + return XML_ROLE_DOCTYPE_NONE; 260 + case XML_TOK_OPEN_BRACKET: 261 + state->handler = internalSubset; 262 + return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; 263 + case XML_TOK_DECL_CLOSE: 264 + state->handler = prolog2; 265 + return XML_ROLE_DOCTYPE_CLOSE; 266 + } 267 + return common(state, tok); 268 + } 269 + 270 + static int PTRCALL doctype5(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 271 + UNUSED_P(ptr); 272 + UNUSED_P(end); 273 + UNUSED_P(enc); 274 + switch (tok) { 275 + case XML_TOK_PROLOG_S: 276 + return XML_ROLE_DOCTYPE_NONE; 277 + case XML_TOK_DECL_CLOSE: 278 + state->handler = prolog2; 279 + return XML_ROLE_DOCTYPE_CLOSE; 280 + } 281 + return common(state, tok); 282 + } 283 + 284 + static int PTRCALL internalSubset(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 285 + switch (tok) { 286 + case XML_TOK_PROLOG_S: 287 + return XML_ROLE_NONE; 288 + case XML_TOK_DECL_OPEN: 289 + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ENTITY)) { 290 + state->handler = entity0; 291 + return XML_ROLE_ENTITY_NONE; 292 + } 293 + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ATTLIST)) { 294 + state->handler = attlist0; 295 + return XML_ROLE_ATTLIST_NONE; 296 + } 297 + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ELEMENT)) { 298 + state->handler = element0; 299 + return XML_ROLE_ELEMENT_NONE; 300 + } 301 + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_NOTATION)) { 302 + state->handler = notation0; 303 + return XML_ROLE_NOTATION_NONE; 304 + } 305 + break; 306 + case XML_TOK_PI: 307 + return XML_ROLE_PI; 308 + case XML_TOK_COMMENT: 309 + return XML_ROLE_COMMENT; 310 + case XML_TOK_PARAM_ENTITY_REF: 311 + return XML_ROLE_PARAM_ENTITY_REF; 312 + case XML_TOK_CLOSE_BRACKET: 313 + state->handler = doctype5; 314 + return XML_ROLE_DOCTYPE_NONE; 315 + case XML_TOK_NONE: 316 + return XML_ROLE_NONE; 317 + } 318 + return common(state, tok); 319 + } 320 + 321 + #ifdef XML_DTD 322 + 323 + static int PTRCALL externalSubset0(PROLOG_STATE* state, int tok, const char* ptr, const char* end, 324 + const ENCODING* enc) { 325 + state->handler = externalSubset1; 326 + if (tok == XML_TOK_XML_DECL) return XML_ROLE_TEXT_DECL; 327 + return externalSubset1(state, tok, ptr, end, enc); 328 + } 329 + 330 + static int PTRCALL externalSubset1(PROLOG_STATE* state, int tok, const char* ptr, const char* end, 331 + const ENCODING* enc) { 332 + switch (tok) { 333 + case XML_TOK_COND_SECT_OPEN: 334 + state->handler = condSect0; 335 + return XML_ROLE_NONE; 336 + case XML_TOK_COND_SECT_CLOSE: 337 + if (state->includeLevel == 0) break; 338 + state->includeLevel -= 1; 339 + return XML_ROLE_NONE; 340 + case XML_TOK_PROLOG_S: 341 + return XML_ROLE_NONE; 342 + case XML_TOK_CLOSE_BRACKET: 343 + break; 344 + case XML_TOK_NONE: 345 + if (state->includeLevel) break; 346 + return XML_ROLE_NONE; 347 + default: 348 + return internalSubset(state, tok, ptr, end, enc); 349 + } 350 + return common(state, tok); 351 + } 352 + 353 + #endif /* XML_DTD */ 354 + 355 + static int PTRCALL entity0(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 356 + UNUSED_P(ptr); 357 + UNUSED_P(end); 358 + UNUSED_P(enc); 359 + switch (tok) { 360 + case XML_TOK_PROLOG_S: 361 + return XML_ROLE_ENTITY_NONE; 362 + case XML_TOK_PERCENT: 363 + state->handler = entity1; 364 + return XML_ROLE_ENTITY_NONE; 365 + case XML_TOK_NAME: 366 + state->handler = entity2; 367 + return XML_ROLE_GENERAL_ENTITY_NAME; 368 + } 369 + return common(state, tok); 370 + } 371 + 372 + static int PTRCALL entity1(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 373 + UNUSED_P(ptr); 374 + UNUSED_P(end); 375 + UNUSED_P(enc); 376 + switch (tok) { 377 + case XML_TOK_PROLOG_S: 378 + return XML_ROLE_ENTITY_NONE; 379 + case XML_TOK_NAME: 380 + state->handler = entity7; 381 + return XML_ROLE_PARAM_ENTITY_NAME; 382 + } 383 + return common(state, tok); 384 + } 385 + 386 + static int PTRCALL entity2(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 387 + switch (tok) { 388 + case XML_TOK_PROLOG_S: 389 + return XML_ROLE_ENTITY_NONE; 390 + case XML_TOK_NAME: 391 + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { 392 + state->handler = entity4; 393 + return XML_ROLE_ENTITY_NONE; 394 + } 395 + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { 396 + state->handler = entity3; 397 + return XML_ROLE_ENTITY_NONE; 398 + } 399 + break; 400 + case XML_TOK_LITERAL: 401 + state->handler = declClose; 402 + state->role_none = XML_ROLE_ENTITY_NONE; 403 + return XML_ROLE_ENTITY_VALUE; 404 + } 405 + return common(state, tok); 406 + } 407 + 408 + static int PTRCALL entity3(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 409 + UNUSED_P(ptr); 410 + UNUSED_P(end); 411 + UNUSED_P(enc); 412 + switch (tok) { 413 + case XML_TOK_PROLOG_S: 414 + return XML_ROLE_ENTITY_NONE; 415 + case XML_TOK_LITERAL: 416 + state->handler = entity4; 417 + return XML_ROLE_ENTITY_PUBLIC_ID; 418 + } 419 + return common(state, tok); 420 + } 421 + 422 + static int PTRCALL entity4(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 423 + UNUSED_P(ptr); 424 + UNUSED_P(end); 425 + UNUSED_P(enc); 426 + switch (tok) { 427 + case XML_TOK_PROLOG_S: 428 + return XML_ROLE_ENTITY_NONE; 429 + case XML_TOK_LITERAL: 430 + state->handler = entity5; 431 + return XML_ROLE_ENTITY_SYSTEM_ID; 432 + } 433 + return common(state, tok); 434 + } 435 + 436 + static int PTRCALL entity5(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 437 + switch (tok) { 438 + case XML_TOK_PROLOG_S: 439 + return XML_ROLE_ENTITY_NONE; 440 + case XML_TOK_DECL_CLOSE: 441 + setTopLevel(state); 442 + return XML_ROLE_ENTITY_COMPLETE; 443 + case XML_TOK_NAME: 444 + if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { 445 + state->handler = entity6; 446 + return XML_ROLE_ENTITY_NONE; 447 + } 448 + break; 449 + } 450 + return common(state, tok); 451 + } 452 + 453 + static int PTRCALL entity6(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 454 + UNUSED_P(ptr); 455 + UNUSED_P(end); 456 + UNUSED_P(enc); 457 + switch (tok) { 458 + case XML_TOK_PROLOG_S: 459 + return XML_ROLE_ENTITY_NONE; 460 + case XML_TOK_NAME: 461 + state->handler = declClose; 462 + state->role_none = XML_ROLE_ENTITY_NONE; 463 + return XML_ROLE_ENTITY_NOTATION_NAME; 464 + } 465 + return common(state, tok); 466 + } 467 + 468 + static int PTRCALL entity7(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 469 + switch (tok) { 470 + case XML_TOK_PROLOG_S: 471 + return XML_ROLE_ENTITY_NONE; 472 + case XML_TOK_NAME: 473 + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { 474 + state->handler = entity9; 475 + return XML_ROLE_ENTITY_NONE; 476 + } 477 + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { 478 + state->handler = entity8; 479 + return XML_ROLE_ENTITY_NONE; 480 + } 481 + break; 482 + case XML_TOK_LITERAL: 483 + state->handler = declClose; 484 + state->role_none = XML_ROLE_ENTITY_NONE; 485 + return XML_ROLE_ENTITY_VALUE; 486 + } 487 + return common(state, tok); 488 + } 489 + 490 + static int PTRCALL entity8(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 491 + UNUSED_P(ptr); 492 + UNUSED_P(end); 493 + UNUSED_P(enc); 494 + switch (tok) { 495 + case XML_TOK_PROLOG_S: 496 + return XML_ROLE_ENTITY_NONE; 497 + case XML_TOK_LITERAL: 498 + state->handler = entity9; 499 + return XML_ROLE_ENTITY_PUBLIC_ID; 500 + } 501 + return common(state, tok); 502 + } 503 + 504 + static int PTRCALL entity9(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 505 + UNUSED_P(ptr); 506 + UNUSED_P(end); 507 + UNUSED_P(enc); 508 + switch (tok) { 509 + case XML_TOK_PROLOG_S: 510 + return XML_ROLE_ENTITY_NONE; 511 + case XML_TOK_LITERAL: 512 + state->handler = entity10; 513 + return XML_ROLE_ENTITY_SYSTEM_ID; 514 + } 515 + return common(state, tok); 516 + } 517 + 518 + static int PTRCALL entity10(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 519 + UNUSED_P(ptr); 520 + UNUSED_P(end); 521 + UNUSED_P(enc); 522 + switch (tok) { 523 + case XML_TOK_PROLOG_S: 524 + return XML_ROLE_ENTITY_NONE; 525 + case XML_TOK_DECL_CLOSE: 526 + setTopLevel(state); 527 + return XML_ROLE_ENTITY_COMPLETE; 528 + } 529 + return common(state, tok); 530 + } 531 + 532 + static int PTRCALL notation0(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 533 + UNUSED_P(ptr); 534 + UNUSED_P(end); 535 + UNUSED_P(enc); 536 + switch (tok) { 537 + case XML_TOK_PROLOG_S: 538 + return XML_ROLE_NOTATION_NONE; 539 + case XML_TOK_NAME: 540 + state->handler = notation1; 541 + return XML_ROLE_NOTATION_NAME; 542 + } 543 + return common(state, tok); 544 + } 545 + 546 + static int PTRCALL notation1(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 547 + switch (tok) { 548 + case XML_TOK_PROLOG_S: 549 + return XML_ROLE_NOTATION_NONE; 550 + case XML_TOK_NAME: 551 + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { 552 + state->handler = notation3; 553 + return XML_ROLE_NOTATION_NONE; 554 + } 555 + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { 556 + state->handler = notation2; 557 + return XML_ROLE_NOTATION_NONE; 558 + } 559 + break; 560 + } 561 + return common(state, tok); 562 + } 563 + 564 + static int PTRCALL notation2(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 565 + UNUSED_P(ptr); 566 + UNUSED_P(end); 567 + UNUSED_P(enc); 568 + switch (tok) { 569 + case XML_TOK_PROLOG_S: 570 + return XML_ROLE_NOTATION_NONE; 571 + case XML_TOK_LITERAL: 572 + state->handler = notation4; 573 + return XML_ROLE_NOTATION_PUBLIC_ID; 574 + } 575 + return common(state, tok); 576 + } 577 + 578 + static int PTRCALL notation3(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 579 + UNUSED_P(ptr); 580 + UNUSED_P(end); 581 + UNUSED_P(enc); 582 + switch (tok) { 583 + case XML_TOK_PROLOG_S: 584 + return XML_ROLE_NOTATION_NONE; 585 + case XML_TOK_LITERAL: 586 + state->handler = declClose; 587 + state->role_none = XML_ROLE_NOTATION_NONE; 588 + return XML_ROLE_NOTATION_SYSTEM_ID; 589 + } 590 + return common(state, tok); 591 + } 592 + 593 + static int PTRCALL notation4(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 594 + UNUSED_P(ptr); 595 + UNUSED_P(end); 596 + UNUSED_P(enc); 597 + switch (tok) { 598 + case XML_TOK_PROLOG_S: 599 + return XML_ROLE_NOTATION_NONE; 600 + case XML_TOK_LITERAL: 601 + state->handler = declClose; 602 + state->role_none = XML_ROLE_NOTATION_NONE; 603 + return XML_ROLE_NOTATION_SYSTEM_ID; 604 + case XML_TOK_DECL_CLOSE: 605 + setTopLevel(state); 606 + return XML_ROLE_NOTATION_NO_SYSTEM_ID; 607 + } 608 + return common(state, tok); 609 + } 610 + 611 + static int PTRCALL attlist0(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 612 + UNUSED_P(ptr); 613 + UNUSED_P(end); 614 + UNUSED_P(enc); 615 + switch (tok) { 616 + case XML_TOK_PROLOG_S: 617 + return XML_ROLE_ATTLIST_NONE; 618 + case XML_TOK_NAME: 619 + case XML_TOK_PREFIXED_NAME: 620 + state->handler = attlist1; 621 + return XML_ROLE_ATTLIST_ELEMENT_NAME; 622 + } 623 + return common(state, tok); 624 + } 625 + 626 + static int PTRCALL attlist1(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 627 + UNUSED_P(ptr); 628 + UNUSED_P(end); 629 + UNUSED_P(enc); 630 + switch (tok) { 631 + case XML_TOK_PROLOG_S: 632 + return XML_ROLE_ATTLIST_NONE; 633 + case XML_TOK_DECL_CLOSE: 634 + setTopLevel(state); 635 + return XML_ROLE_ATTLIST_NONE; 636 + case XML_TOK_NAME: 637 + case XML_TOK_PREFIXED_NAME: 638 + state->handler = attlist2; 639 + return XML_ROLE_ATTRIBUTE_NAME; 640 + } 641 + return common(state, tok); 642 + } 643 + 644 + static int PTRCALL attlist2(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 645 + switch (tok) { 646 + case XML_TOK_PROLOG_S: 647 + return XML_ROLE_ATTLIST_NONE; 648 + case XML_TOK_NAME: { 649 + static const char* const types[] = { 650 + KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS, KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS, 651 + }; 652 + int i; 653 + for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++) 654 + if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { 655 + state->handler = attlist8; 656 + return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; 657 + } 658 + } 659 + if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { 660 + state->handler = attlist5; 661 + return XML_ROLE_ATTLIST_NONE; 662 + } 663 + break; 664 + case XML_TOK_OPEN_PAREN: 665 + state->handler = attlist3; 666 + return XML_ROLE_ATTLIST_NONE; 667 + } 668 + return common(state, tok); 669 + } 670 + 671 + static int PTRCALL attlist3(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 672 + UNUSED_P(ptr); 673 + UNUSED_P(end); 674 + UNUSED_P(enc); 675 + switch (tok) { 676 + case XML_TOK_PROLOG_S: 677 + return XML_ROLE_ATTLIST_NONE; 678 + case XML_TOK_NMTOKEN: 679 + case XML_TOK_NAME: 680 + case XML_TOK_PREFIXED_NAME: 681 + state->handler = attlist4; 682 + return XML_ROLE_ATTRIBUTE_ENUM_VALUE; 683 + } 684 + return common(state, tok); 685 + } 686 + 687 + static int PTRCALL attlist4(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 688 + UNUSED_P(ptr); 689 + UNUSED_P(end); 690 + UNUSED_P(enc); 691 + switch (tok) { 692 + case XML_TOK_PROLOG_S: 693 + return XML_ROLE_ATTLIST_NONE; 694 + case XML_TOK_CLOSE_PAREN: 695 + state->handler = attlist8; 696 + return XML_ROLE_ATTLIST_NONE; 697 + case XML_TOK_OR: 698 + state->handler = attlist3; 699 + return XML_ROLE_ATTLIST_NONE; 700 + } 701 + return common(state, tok); 702 + } 703 + 704 + static int PTRCALL attlist5(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 705 + UNUSED_P(ptr); 706 + UNUSED_P(end); 707 + UNUSED_P(enc); 708 + switch (tok) { 709 + case XML_TOK_PROLOG_S: 710 + return XML_ROLE_ATTLIST_NONE; 711 + case XML_TOK_OPEN_PAREN: 712 + state->handler = attlist6; 713 + return XML_ROLE_ATTLIST_NONE; 714 + } 715 + return common(state, tok); 716 + } 717 + 718 + static int PTRCALL attlist6(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 719 + UNUSED_P(ptr); 720 + UNUSED_P(end); 721 + UNUSED_P(enc); 722 + switch (tok) { 723 + case XML_TOK_PROLOG_S: 724 + return XML_ROLE_ATTLIST_NONE; 725 + case XML_TOK_NAME: 726 + state->handler = attlist7; 727 + return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; 728 + } 729 + return common(state, tok); 730 + } 731 + 732 + static int PTRCALL attlist7(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 733 + UNUSED_P(ptr); 734 + UNUSED_P(end); 735 + UNUSED_P(enc); 736 + switch (tok) { 737 + case XML_TOK_PROLOG_S: 738 + return XML_ROLE_ATTLIST_NONE; 739 + case XML_TOK_CLOSE_PAREN: 740 + state->handler = attlist8; 741 + return XML_ROLE_ATTLIST_NONE; 742 + case XML_TOK_OR: 743 + state->handler = attlist6; 744 + return XML_ROLE_ATTLIST_NONE; 745 + } 746 + return common(state, tok); 747 + } 748 + 749 + /* default value */ 750 + static int PTRCALL attlist8(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 751 + switch (tok) { 752 + case XML_TOK_PROLOG_S: 753 + return XML_ROLE_ATTLIST_NONE; 754 + case XML_TOK_POUND_NAME: 755 + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_IMPLIED)) { 756 + state->handler = attlist1; 757 + return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; 758 + } 759 + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_REQUIRED)) { 760 + state->handler = attlist1; 761 + return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; 762 + } 763 + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_FIXED)) { 764 + state->handler = attlist9; 765 + return XML_ROLE_ATTLIST_NONE; 766 + } 767 + break; 768 + case XML_TOK_LITERAL: 769 + state->handler = attlist1; 770 + return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE; 771 + } 772 + return common(state, tok); 773 + } 774 + 775 + static int PTRCALL attlist9(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 776 + UNUSED_P(ptr); 777 + UNUSED_P(end); 778 + UNUSED_P(enc); 779 + switch (tok) { 780 + case XML_TOK_PROLOG_S: 781 + return XML_ROLE_ATTLIST_NONE; 782 + case XML_TOK_LITERAL: 783 + state->handler = attlist1; 784 + return XML_ROLE_FIXED_ATTRIBUTE_VALUE; 785 + } 786 + return common(state, tok); 787 + } 788 + 789 + static int PTRCALL element0(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 790 + UNUSED_P(ptr); 791 + UNUSED_P(end); 792 + UNUSED_P(enc); 793 + switch (tok) { 794 + case XML_TOK_PROLOG_S: 795 + return XML_ROLE_ELEMENT_NONE; 796 + case XML_TOK_NAME: 797 + case XML_TOK_PREFIXED_NAME: 798 + state->handler = element1; 799 + return XML_ROLE_ELEMENT_NAME; 800 + } 801 + return common(state, tok); 802 + } 803 + 804 + static int PTRCALL element1(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 805 + switch (tok) { 806 + case XML_TOK_PROLOG_S: 807 + return XML_ROLE_ELEMENT_NONE; 808 + case XML_TOK_NAME: 809 + if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { 810 + state->handler = declClose; 811 + state->role_none = XML_ROLE_ELEMENT_NONE; 812 + return XML_ROLE_CONTENT_EMPTY; 813 + } 814 + if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { 815 + state->handler = declClose; 816 + state->role_none = XML_ROLE_ELEMENT_NONE; 817 + return XML_ROLE_CONTENT_ANY; 818 + } 819 + break; 820 + case XML_TOK_OPEN_PAREN: 821 + state->handler = element2; 822 + state->level = 1; 823 + return XML_ROLE_GROUP_OPEN; 824 + } 825 + return common(state, tok); 826 + } 827 + 828 + static int PTRCALL element2(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 829 + switch (tok) { 830 + case XML_TOK_PROLOG_S: 831 + return XML_ROLE_ELEMENT_NONE; 832 + case XML_TOK_POUND_NAME: 833 + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_PCDATA)) { 834 + state->handler = element3; 835 + return XML_ROLE_CONTENT_PCDATA; 836 + } 837 + break; 838 + case XML_TOK_OPEN_PAREN: 839 + state->level = 2; 840 + state->handler = element6; 841 + return XML_ROLE_GROUP_OPEN; 842 + case XML_TOK_NAME: 843 + case XML_TOK_PREFIXED_NAME: 844 + state->handler = element7; 845 + return XML_ROLE_CONTENT_ELEMENT; 846 + case XML_TOK_NAME_QUESTION: 847 + state->handler = element7; 848 + return XML_ROLE_CONTENT_ELEMENT_OPT; 849 + case XML_TOK_NAME_ASTERISK: 850 + state->handler = element7; 851 + return XML_ROLE_CONTENT_ELEMENT_REP; 852 + case XML_TOK_NAME_PLUS: 853 + state->handler = element7; 854 + return XML_ROLE_CONTENT_ELEMENT_PLUS; 855 + } 856 + return common(state, tok); 857 + } 858 + 859 + static int PTRCALL element3(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 860 + UNUSED_P(ptr); 861 + UNUSED_P(end); 862 + UNUSED_P(enc); 863 + switch (tok) { 864 + case XML_TOK_PROLOG_S: 865 + return XML_ROLE_ELEMENT_NONE; 866 + case XML_TOK_CLOSE_PAREN: 867 + state->handler = declClose; 868 + state->role_none = XML_ROLE_ELEMENT_NONE; 869 + return XML_ROLE_GROUP_CLOSE; 870 + case XML_TOK_CLOSE_PAREN_ASTERISK: 871 + state->handler = declClose; 872 + state->role_none = XML_ROLE_ELEMENT_NONE; 873 + return XML_ROLE_GROUP_CLOSE_REP; 874 + case XML_TOK_OR: 875 + state->handler = element4; 876 + return XML_ROLE_ELEMENT_NONE; 877 + } 878 + return common(state, tok); 879 + } 880 + 881 + static int PTRCALL element4(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 882 + UNUSED_P(ptr); 883 + UNUSED_P(end); 884 + UNUSED_P(enc); 885 + switch (tok) { 886 + case XML_TOK_PROLOG_S: 887 + return XML_ROLE_ELEMENT_NONE; 888 + case XML_TOK_NAME: 889 + case XML_TOK_PREFIXED_NAME: 890 + state->handler = element5; 891 + return XML_ROLE_CONTENT_ELEMENT; 892 + } 893 + return common(state, tok); 894 + } 895 + 896 + static int PTRCALL element5(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 897 + UNUSED_P(ptr); 898 + UNUSED_P(end); 899 + UNUSED_P(enc); 900 + switch (tok) { 901 + case XML_TOK_PROLOG_S: 902 + return XML_ROLE_ELEMENT_NONE; 903 + case XML_TOK_CLOSE_PAREN_ASTERISK: 904 + state->handler = declClose; 905 + state->role_none = XML_ROLE_ELEMENT_NONE; 906 + return XML_ROLE_GROUP_CLOSE_REP; 907 + case XML_TOK_OR: 908 + state->handler = element4; 909 + return XML_ROLE_ELEMENT_NONE; 910 + } 911 + return common(state, tok); 912 + } 913 + 914 + static int PTRCALL element6(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 915 + UNUSED_P(ptr); 916 + UNUSED_P(end); 917 + UNUSED_P(enc); 918 + switch (tok) { 919 + case XML_TOK_PROLOG_S: 920 + return XML_ROLE_ELEMENT_NONE; 921 + case XML_TOK_OPEN_PAREN: 922 + state->level += 1; 923 + return XML_ROLE_GROUP_OPEN; 924 + case XML_TOK_NAME: 925 + case XML_TOK_PREFIXED_NAME: 926 + state->handler = element7; 927 + return XML_ROLE_CONTENT_ELEMENT; 928 + case XML_TOK_NAME_QUESTION: 929 + state->handler = element7; 930 + return XML_ROLE_CONTENT_ELEMENT_OPT; 931 + case XML_TOK_NAME_ASTERISK: 932 + state->handler = element7; 933 + return XML_ROLE_CONTENT_ELEMENT_REP; 934 + case XML_TOK_NAME_PLUS: 935 + state->handler = element7; 936 + return XML_ROLE_CONTENT_ELEMENT_PLUS; 937 + } 938 + return common(state, tok); 939 + } 940 + 941 + static int PTRCALL element7(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 942 + UNUSED_P(ptr); 943 + UNUSED_P(end); 944 + UNUSED_P(enc); 945 + switch (tok) { 946 + case XML_TOK_PROLOG_S: 947 + return XML_ROLE_ELEMENT_NONE; 948 + case XML_TOK_CLOSE_PAREN: 949 + state->level -= 1; 950 + if (state->level == 0) { 951 + state->handler = declClose; 952 + state->role_none = XML_ROLE_ELEMENT_NONE; 953 + } 954 + return XML_ROLE_GROUP_CLOSE; 955 + case XML_TOK_CLOSE_PAREN_ASTERISK: 956 + state->level -= 1; 957 + if (state->level == 0) { 958 + state->handler = declClose; 959 + state->role_none = XML_ROLE_ELEMENT_NONE; 960 + } 961 + return XML_ROLE_GROUP_CLOSE_REP; 962 + case XML_TOK_CLOSE_PAREN_QUESTION: 963 + state->level -= 1; 964 + if (state->level == 0) { 965 + state->handler = declClose; 966 + state->role_none = XML_ROLE_ELEMENT_NONE; 967 + } 968 + return XML_ROLE_GROUP_CLOSE_OPT; 969 + case XML_TOK_CLOSE_PAREN_PLUS: 970 + state->level -= 1; 971 + if (state->level == 0) { 972 + state->handler = declClose; 973 + state->role_none = XML_ROLE_ELEMENT_NONE; 974 + } 975 + return XML_ROLE_GROUP_CLOSE_PLUS; 976 + case XML_TOK_COMMA: 977 + state->handler = element6; 978 + return XML_ROLE_GROUP_SEQUENCE; 979 + case XML_TOK_OR: 980 + state->handler = element6; 981 + return XML_ROLE_GROUP_CHOICE; 982 + } 983 + return common(state, tok); 984 + } 985 + 986 + #ifdef XML_DTD 987 + 988 + static int PTRCALL condSect0(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 989 + switch (tok) { 990 + case XML_TOK_PROLOG_S: 991 + return XML_ROLE_NONE; 992 + case XML_TOK_NAME: 993 + if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) { 994 + state->handler = condSect1; 995 + return XML_ROLE_NONE; 996 + } 997 + if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) { 998 + state->handler = condSect2; 999 + return XML_ROLE_NONE; 1000 + } 1001 + break; 1002 + } 1003 + return common(state, tok); 1004 + } 1005 + 1006 + static int PTRCALL condSect1(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 1007 + UNUSED_P(ptr); 1008 + UNUSED_P(end); 1009 + UNUSED_P(enc); 1010 + switch (tok) { 1011 + case XML_TOK_PROLOG_S: 1012 + return XML_ROLE_NONE; 1013 + case XML_TOK_OPEN_BRACKET: 1014 + state->handler = externalSubset1; 1015 + state->includeLevel += 1; 1016 + return XML_ROLE_NONE; 1017 + } 1018 + return common(state, tok); 1019 + } 1020 + 1021 + static int PTRCALL condSect2(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 1022 + UNUSED_P(ptr); 1023 + UNUSED_P(end); 1024 + UNUSED_P(enc); 1025 + switch (tok) { 1026 + case XML_TOK_PROLOG_S: 1027 + return XML_ROLE_NONE; 1028 + case XML_TOK_OPEN_BRACKET: 1029 + state->handler = externalSubset1; 1030 + return XML_ROLE_IGNORE_SECT; 1031 + } 1032 + return common(state, tok); 1033 + } 1034 + 1035 + #endif /* XML_DTD */ 1036 + 1037 + static int PTRCALL declClose(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 1038 + UNUSED_P(ptr); 1039 + UNUSED_P(end); 1040 + UNUSED_P(enc); 1041 + switch (tok) { 1042 + case XML_TOK_PROLOG_S: 1043 + return state->role_none; 1044 + case XML_TOK_DECL_CLOSE: 1045 + setTopLevel(state); 1046 + return state->role_none; 1047 + } 1048 + return common(state, tok); 1049 + } 1050 + 1051 + /* This function will only be invoked if the internal logic of the 1052 + * parser has broken down. It is used in two cases: 1053 + * 1054 + * 1: When the XML prolog has been finished. At this point the 1055 + * processor (the parser level above these role handlers) should 1056 + * switch from prologProcessor to contentProcessor and reinitialise 1057 + * the handler function. 1058 + * 1059 + * 2: When an error has been detected (via common() below). At this 1060 + * point again the processor should be switched to errorProcessor, 1061 + * which will never call a handler. 1062 + * 1063 + * The result of this is that error() can only be called if the 1064 + * processor switch failed to happen, which is an internal error and 1065 + * therefore we shouldn't be able to provoke it simply by using the 1066 + * library. It is a necessary backstop, however, so we merely exclude 1067 + * it from the coverage statistics. 1068 + * 1069 + * LCOV_EXCL_START 1070 + */ 1071 + static int PTRCALL error(PROLOG_STATE* state, int tok, const char* ptr, const char* end, const ENCODING* enc) { 1072 + UNUSED_P(state); 1073 + UNUSED_P(tok); 1074 + UNUSED_P(ptr); 1075 + UNUSED_P(end); 1076 + UNUSED_P(enc); 1077 + return XML_ROLE_NONE; 1078 + } 1079 + /* LCOV_EXCL_STOP */ 1080 + 1081 + static int FASTCALL common(PROLOG_STATE* state, int tok) { 1082 + #ifdef XML_DTD 1083 + if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) return XML_ROLE_INNER_PARAM_ENTITY_REF; 1084 + #else 1085 + UNUSED_P(tok); 1086 + #endif 1087 + state->handler = error; 1088 + return XML_ROLE_ERROR; 1089 + } 1090 + 1091 + void XmlPrologStateInit(PROLOG_STATE* state) { 1092 + state->handler = prolog0; 1093 + #ifdef XML_DTD 1094 + state->documentEntity = 1; 1095 + state->includeLevel = 0; 1096 + state->inEntityValue = 0; 1097 + #endif /* XML_DTD */ 1098 + } 1099 + 1100 + #ifdef XML_DTD 1101 + 1102 + void XmlPrologStateInitExternalEntity(PROLOG_STATE* state) { 1103 + state->handler = externalSubset0; 1104 + state->documentEntity = 0; 1105 + state->includeLevel = 0; 1106 + } 1107 + 1108 + #endif /* XML_DTD */
+134
lib/expat/xmlrole.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2002 Karl Waclawek <karl@waclawek.net> 12 + Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 13 + Copyright (c) 2017-2025 Sebastian Pipping <sebastian@pipping.org> 14 + Licensed under the MIT license: 15 + 16 + Permission is hereby granted, free of charge, to any person obtaining 17 + a copy of this software and associated documentation files (the 18 + "Software"), to deal in the Software without restriction, including 19 + without limitation the rights to use, copy, modify, merge, publish, 20 + distribute, sublicense, and/or sell copies of the Software, and to permit 21 + persons to whom the Software is furnished to do so, subject to the 22 + following conditions: 23 + 24 + The above copyright notice and this permission notice shall be included 25 + in all copies or substantial portions of the Software. 26 + 27 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 30 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 31 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 32 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 33 + USE OR OTHER DEALINGS IN THE SOFTWARE. 34 + */ 35 + 36 + #ifndef XmlRole_INCLUDED 37 + #define XmlRole_INCLUDED 1 38 + 39 + #include "xmltok.h" 40 + 41 + #ifdef __cplusplus 42 + extern "C" { 43 + #endif 44 + 45 + enum { 46 + XML_ROLE_ERROR = -1, 47 + XML_ROLE_NONE = 0, 48 + XML_ROLE_XML_DECL, 49 + XML_ROLE_INSTANCE_START, 50 + XML_ROLE_DOCTYPE_NONE, 51 + XML_ROLE_DOCTYPE_NAME, 52 + XML_ROLE_DOCTYPE_SYSTEM_ID, 53 + XML_ROLE_DOCTYPE_PUBLIC_ID, 54 + XML_ROLE_DOCTYPE_INTERNAL_SUBSET, 55 + XML_ROLE_DOCTYPE_CLOSE, 56 + XML_ROLE_GENERAL_ENTITY_NAME, 57 + XML_ROLE_PARAM_ENTITY_NAME, 58 + XML_ROLE_ENTITY_NONE, 59 + XML_ROLE_ENTITY_VALUE, 60 + XML_ROLE_ENTITY_SYSTEM_ID, 61 + XML_ROLE_ENTITY_PUBLIC_ID, 62 + XML_ROLE_ENTITY_COMPLETE, 63 + XML_ROLE_ENTITY_NOTATION_NAME, 64 + XML_ROLE_NOTATION_NONE, 65 + XML_ROLE_NOTATION_NAME, 66 + XML_ROLE_NOTATION_SYSTEM_ID, 67 + XML_ROLE_NOTATION_NO_SYSTEM_ID, 68 + XML_ROLE_NOTATION_PUBLIC_ID, 69 + XML_ROLE_ATTRIBUTE_NAME, 70 + XML_ROLE_ATTRIBUTE_TYPE_CDATA, 71 + XML_ROLE_ATTRIBUTE_TYPE_ID, 72 + XML_ROLE_ATTRIBUTE_TYPE_IDREF, 73 + XML_ROLE_ATTRIBUTE_TYPE_IDREFS, 74 + XML_ROLE_ATTRIBUTE_TYPE_ENTITY, 75 + XML_ROLE_ATTRIBUTE_TYPE_ENTITIES, 76 + XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN, 77 + XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS, 78 + XML_ROLE_ATTRIBUTE_ENUM_VALUE, 79 + XML_ROLE_ATTRIBUTE_NOTATION_VALUE, 80 + XML_ROLE_ATTLIST_NONE, 81 + XML_ROLE_ATTLIST_ELEMENT_NAME, 82 + XML_ROLE_IMPLIED_ATTRIBUTE_VALUE, 83 + XML_ROLE_REQUIRED_ATTRIBUTE_VALUE, 84 + XML_ROLE_DEFAULT_ATTRIBUTE_VALUE, 85 + XML_ROLE_FIXED_ATTRIBUTE_VALUE, 86 + XML_ROLE_ELEMENT_NONE, 87 + XML_ROLE_ELEMENT_NAME, 88 + XML_ROLE_CONTENT_ANY, 89 + XML_ROLE_CONTENT_EMPTY, 90 + XML_ROLE_CONTENT_PCDATA, 91 + XML_ROLE_GROUP_OPEN, 92 + XML_ROLE_GROUP_CLOSE, 93 + XML_ROLE_GROUP_CLOSE_REP, 94 + XML_ROLE_GROUP_CLOSE_OPT, 95 + XML_ROLE_GROUP_CLOSE_PLUS, 96 + XML_ROLE_GROUP_CHOICE, 97 + XML_ROLE_GROUP_SEQUENCE, 98 + XML_ROLE_CONTENT_ELEMENT, 99 + XML_ROLE_CONTENT_ELEMENT_REP, 100 + XML_ROLE_CONTENT_ELEMENT_OPT, 101 + XML_ROLE_CONTENT_ELEMENT_PLUS, 102 + XML_ROLE_PI, 103 + XML_ROLE_COMMENT, 104 + #ifdef XML_DTD 105 + XML_ROLE_TEXT_DECL, 106 + XML_ROLE_IGNORE_SECT, 107 + XML_ROLE_INNER_PARAM_ENTITY_REF, 108 + #endif /* XML_DTD */ 109 + XML_ROLE_PARAM_ENTITY_REF 110 + }; 111 + 112 + typedef struct prolog_state { 113 + int(PTRCALL* handler)(struct prolog_state* state, int tok, const char* ptr, const char* end, const ENCODING* enc); 114 + unsigned level; 115 + int role_none; 116 + #ifdef XML_DTD 117 + unsigned includeLevel; 118 + int documentEntity; 119 + int inEntityValue; 120 + #endif /* XML_DTD */ 121 + } PROLOG_STATE; 122 + 123 + void XmlPrologStateInit(PROLOG_STATE* state); 124 + #ifdef XML_DTD 125 + void XmlPrologStateInitExternalEntity(PROLOG_STATE* state); 126 + #endif /* XML_DTD */ 127 + 128 + #define XmlTokenRole(state, tok, ptr, end, enc) (((state)->handler)(state, tok, ptr, end, enc)) 129 + 130 + #ifdef __cplusplus 131 + } 132 + #endif 133 + 134 + #endif /* not XmlRole_INCLUDED */
+1489
lib/expat/xmltok.c
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net> 13 + Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 14 + Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> 15 + Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 16 + Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com> 17 + Copyright (c) 2016 Don Lewis <truckman@apache.org> 18 + Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 19 + Copyright (c) 2017 Alexander Bluhm <alexander.bluhm@gmx.net> 20 + Copyright (c) 2017 Benbuck Nason <bnason@netflix.com> 21 + Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 22 + Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 23 + Copyright (c) 2021 Donghee Na <donghee.na@python.org> 24 + Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com> 25 + Copyright (c) 2022 Sean McBride <sean@rogue-research.com> 26 + Copyright (c) 2023 Hanno Böck <hanno@gentoo.org> 27 + Licensed under the MIT license: 28 + 29 + Permission is hereby granted, free of charge, to any person obtaining 30 + a copy of this software and associated documentation files (the 31 + "Software"), to deal in the Software without restriction, including 32 + without limitation the rights to use, copy, modify, merge, publish, 33 + distribute, sublicense, and/or sell copies of the Software, and to permit 34 + persons to whom the Software is furnished to do so, subject to the 35 + following conditions: 36 + 37 + The above copyright notice and this permission notice shall be included 38 + in all copies or substantial portions of the Software. 39 + 40 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 41 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 42 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 43 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 44 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 45 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 46 + USE OR OTHER DEALINGS IN THE SOFTWARE. 47 + */ 48 + 49 + #include <stdbool.h> 50 + #include <stddef.h> 51 + #include <string.h> /* memcpy */ 52 + 53 + #include "expat_config.h" 54 + 55 + #ifdef _WIN32 56 + #include "winconfig.h" 57 + #endif 58 + 59 + #include "expat_external.h" 60 + #include "internal.h" 61 + #include "nametab.h" 62 + #include "xmltok.h" 63 + 64 + #ifdef XML_DTD 65 + #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) 66 + #else 67 + #define IGNORE_SECTION_TOK_VTABLE /* as nothing */ 68 + #endif 69 + 70 + #define VTABLE1 \ 71 + {PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE}, \ 72 + {PREFIX(attributeValueTok), PREFIX(entityValueTok)}, PREFIX(nameMatchesAscii), PREFIX(nameLength), \ 73 + PREFIX(skipS), PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName), PREFIX(updatePosition), \ 74 + PREFIX(isPublicId) 75 + 76 + #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) 77 + 78 + #define UCS2_GET_NAMING(pages, hi, lo) (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F))) 79 + 80 + /* A 2 byte UTF-8 representation splits the characters 11 bits between 81 + the bottom 5 and 6 bits of the bytes. We need 8 bits to index into 82 + pages, 3 bits to add to that index and 5 bits to generate the mask. 83 + */ 84 + #define UTF8_GET_NAMING2(pages, byte) \ 85 + (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)] & \ 86 + (1u << (((byte)[1]) & 0x1F))) 87 + 88 + /* A 3 byte UTF-8 representation splits the characters 16 bits between 89 + the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index 90 + into pages, 3 bits to add to that index and 5 bits to generate the 91 + mask. 92 + */ 93 + #define UTF8_GET_NAMING3(pages, byte) \ 94 + (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)] << 3) + ((((byte)[1]) & 3) << 1) + \ 95 + ((((byte)[2]) >> 5) & 1)] & \ 96 + (1u << (((byte)[2]) & 0x1F))) 97 + 98 + /* Detection of invalid UTF-8 sequences is based on Table 3.1B 99 + of Unicode 3.2: https://www.unicode.org/unicode/reports/tr28/ 100 + with the additional restriction of not allowing the Unicode 101 + code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE). 102 + Implementation details: 103 + (A & 0x80) == 0 means A < 0x80 104 + and 105 + (A & 0xC0) == 0xC0 means A > 0xBF 106 + */ 107 + 108 + #define UTF8_INVALID2(p) ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) 109 + 110 + #define UTF8_INVALID3(p) \ 111 + (((p)[2] & 0x80) == 0 || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD : ((p)[2] & 0xC0) == 0xC0) || \ 112 + ((*p) == 0xE0 ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ 113 + : ((p)[1] & 0x80) == 0 || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) 114 + 115 + #define UTF8_INVALID4(p) \ 116 + (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 || \ 117 + ((*p) == 0xF0 ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ 118 + : ((p)[1] & 0x80) == 0 || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) 119 + 120 + static int PTRFASTCALL isNever(const ENCODING* enc, const char* p) { 121 + UNUSED_P(enc); 122 + UNUSED_P(p); 123 + return 0; 124 + } 125 + 126 + static int PTRFASTCALL utf8_isName2(const ENCODING* enc, const char* p) { 127 + UNUSED_P(enc); 128 + return UTF8_GET_NAMING2(namePages, (const unsigned char*)p); 129 + } 130 + 131 + static int PTRFASTCALL utf8_isName3(const ENCODING* enc, const char* p) { 132 + UNUSED_P(enc); 133 + return UTF8_GET_NAMING3(namePages, (const unsigned char*)p); 134 + } 135 + 136 + #define utf8_isName4 isNever 137 + 138 + static int PTRFASTCALL utf8_isNmstrt2(const ENCODING* enc, const char* p) { 139 + UNUSED_P(enc); 140 + return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char*)p); 141 + } 142 + 143 + static int PTRFASTCALL utf8_isNmstrt3(const ENCODING* enc, const char* p) { 144 + UNUSED_P(enc); 145 + return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char*)p); 146 + } 147 + 148 + #define utf8_isNmstrt4 isNever 149 + 150 + static int PTRFASTCALL utf8_isInvalid2(const ENCODING* enc, const char* p) { 151 + UNUSED_P(enc); 152 + return UTF8_INVALID2((const unsigned char*)p); 153 + } 154 + 155 + static int PTRFASTCALL utf8_isInvalid3(const ENCODING* enc, const char* p) { 156 + UNUSED_P(enc); 157 + return UTF8_INVALID3((const unsigned char*)p); 158 + } 159 + 160 + static int PTRFASTCALL utf8_isInvalid4(const ENCODING* enc, const char* p) { 161 + UNUSED_P(enc); 162 + return UTF8_INVALID4((const unsigned char*)p); 163 + } 164 + 165 + struct normal_encoding { 166 + ENCODING enc; 167 + unsigned char type[256]; 168 + #ifdef XML_MIN_SIZE 169 + int(PTRFASTCALL* byteType)(const ENCODING*, const char*); 170 + int(PTRFASTCALL* isNameMin)(const ENCODING*, const char*); 171 + int(PTRFASTCALL* isNmstrtMin)(const ENCODING*, const char*); 172 + int(PTRFASTCALL* byteToAscii)(const ENCODING*, const char*); 173 + int(PTRCALL* charMatches)(const ENCODING*, const char*, int); 174 + #endif /* XML_MIN_SIZE */ 175 + int(PTRFASTCALL* isName2)(const ENCODING*, const char*); 176 + int(PTRFASTCALL* isName3)(const ENCODING*, const char*); 177 + int(PTRFASTCALL* isName4)(const ENCODING*, const char*); 178 + int(PTRFASTCALL* isNmstrt2)(const ENCODING*, const char*); 179 + int(PTRFASTCALL* isNmstrt3)(const ENCODING*, const char*); 180 + int(PTRFASTCALL* isNmstrt4)(const ENCODING*, const char*); 181 + int(PTRFASTCALL* isInvalid2)(const ENCODING*, const char*); 182 + int(PTRFASTCALL* isInvalid3)(const ENCODING*, const char*); 183 + int(PTRFASTCALL* isInvalid4)(const ENCODING*, const char*); 184 + }; 185 + 186 + #define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding*)(enc)) 187 + 188 + #ifdef XML_MIN_SIZE 189 + 190 + #define STANDARD_VTABLE(E) E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches, 191 + 192 + #else 193 + 194 + #define STANDARD_VTABLE(E) /* as nothing */ 195 + 196 + #endif 197 + 198 + #define NORMAL_VTABLE(E) \ 199 + E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3, E##isNmstrt4, E##isInvalid2, E##isInvalid3, \ 200 + E##isInvalid4 201 + 202 + #define NULL_VTABLE \ 203 + /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL, /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, \ 204 + /* isNmstrt4 */ NULL, /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL 205 + 206 + static int FASTCALL checkCharRefNumber(int result); 207 + 208 + #include "ascii.h" 209 + #include "xmltok_impl.h" 210 + 211 + #ifdef XML_MIN_SIZE 212 + #define sb_isNameMin isNever 213 + #define sb_isNmstrtMin isNever 214 + #endif 215 + 216 + #ifdef XML_MIN_SIZE 217 + #define MINBPC(enc) ((enc)->minBytesPerChar) 218 + #else 219 + /* minimum bytes per character */ 220 + #define MINBPC(enc) 1 221 + #endif 222 + 223 + #define SB_BYTE_TYPE(enc, p) (((const struct normal_encoding*)(enc))->type[(unsigned char)*(p)]) 224 + 225 + #ifdef XML_MIN_SIZE 226 + static int PTRFASTCALL sb_byteType(const ENCODING* enc, const char* p) { return SB_BYTE_TYPE(enc, p); } 227 + #define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) 228 + #else 229 + #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) 230 + #endif 231 + 232 + #ifdef XML_MIN_SIZE 233 + #define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) 234 + static int PTRFASTCALL sb_byteToAscii(const ENCODING* enc, const char* p) { 235 + UNUSED_P(enc); 236 + return *p; 237 + } 238 + #else 239 + #define BYTE_TO_ASCII(enc, p) (*(p)) 240 + #endif 241 + 242 + #define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p)) 243 + #define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p)) 244 + #ifdef XML_MIN_SIZE 245 + #define IS_INVALID_CHAR(enc, p, n) \ 246 + (AS_NORMAL_ENCODING(enc)->isInvalid##n && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) 247 + #else 248 + #define IS_INVALID_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) 249 + #endif 250 + 251 + #ifdef XML_MIN_SIZE 252 + #define IS_NAME_CHAR_MINBPC(enc, p) (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) 253 + #define IS_NMSTRT_CHAR_MINBPC(enc, p) (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) 254 + #else 255 + #define IS_NAME_CHAR_MINBPC(enc, p) (0) 256 + #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) 257 + #endif 258 + 259 + #ifdef XML_MIN_SIZE 260 + #define CHAR_MATCHES(enc, p, c) (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) 261 + static int PTRCALL sb_charMatches(const ENCODING* enc, const char* p, int c) { 262 + UNUSED_P(enc); 263 + return *p == c; 264 + } 265 + #else 266 + /* c is an ASCII character */ 267 + #define CHAR_MATCHES(enc, p, c) (*(p) == (c)) 268 + #endif 269 + 270 + #define PREFIX(ident) normal_##ident 271 + #define XML_TOK_IMPL_C 272 + #include "xmltok_impl.c" 273 + #undef XML_TOK_IMPL_C 274 + 275 + #undef MINBPC 276 + #undef BYTE_TYPE 277 + #undef BYTE_TO_ASCII 278 + #undef CHAR_MATCHES 279 + #undef IS_NAME_CHAR 280 + #undef IS_NAME_CHAR_MINBPC 281 + #undef IS_NMSTRT_CHAR 282 + #undef IS_NMSTRT_CHAR_MINBPC 283 + #undef IS_INVALID_CHAR 284 + 285 + enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ 286 + UTF8_cval1 = 0x00, 287 + UTF8_cval2 = 0xc0, 288 + UTF8_cval3 = 0xe0, 289 + UTF8_cval4 = 0xf0 290 + }; 291 + 292 + void _INTERNAL_trim_to_complete_utf8_characters(const char* from, const char** fromLimRef) { 293 + const char* fromLim = *fromLimRef; 294 + size_t walked = 0; 295 + for (; fromLim > from; fromLim--, walked++) { 296 + const unsigned char prev = (unsigned char)fromLim[-1]; 297 + if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */ 298 + if (walked + 1 >= 4) { 299 + fromLim += 4 - 1; 300 + break; 301 + } else { 302 + walked = 0; 303 + } 304 + } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */ 305 + if (walked + 1 >= 3) { 306 + fromLim += 3 - 1; 307 + break; 308 + } else { 309 + walked = 0; 310 + } 311 + } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */ 312 + if (walked + 1 >= 2) { 313 + fromLim += 2 - 1; 314 + break; 315 + } else { 316 + walked = 0; 317 + } 318 + } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */ 319 + break; 320 + } 321 + } 322 + *fromLimRef = fromLim; 323 + } 324 + 325 + static enum XML_Convert_Result PTRCALL utf8_toUtf8(const ENCODING* enc, const char** fromP, const char* fromLim, 326 + char** toP, const char* toLim) { 327 + bool input_incomplete = false; 328 + bool output_exhausted = false; 329 + 330 + /* Avoid copying partial characters (due to limited space). */ 331 + const ptrdiff_t bytesAvailable = fromLim - *fromP; 332 + const ptrdiff_t bytesStorable = toLim - *toP; 333 + UNUSED_P(enc); 334 + if (bytesAvailable > bytesStorable) { 335 + fromLim = *fromP + bytesStorable; 336 + output_exhausted = true; 337 + } 338 + 339 + /* Avoid copying partial characters (from incomplete input). */ 340 + { 341 + const char* const fromLimBefore = fromLim; 342 + _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim); 343 + if (fromLim < fromLimBefore) { 344 + input_incomplete = true; 345 + } 346 + } 347 + 348 + { 349 + const ptrdiff_t bytesToCopy = fromLim - *fromP; 350 + memcpy(*toP, *fromP, bytesToCopy); 351 + *fromP += bytesToCopy; 352 + *toP += bytesToCopy; 353 + } 354 + 355 + if (output_exhausted) /* needs to go first */ 356 + return XML_CONVERT_OUTPUT_EXHAUSTED; 357 + else if (input_incomplete) 358 + return XML_CONVERT_INPUT_INCOMPLETE; 359 + else 360 + return XML_CONVERT_COMPLETED; 361 + } 362 + 363 + static enum XML_Convert_Result PTRCALL utf8_toUtf16(const ENCODING* enc, const char** fromP, const char* fromLim, 364 + unsigned short** toP, const unsigned short* toLim) { 365 + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; 366 + unsigned short* to = *toP; 367 + const char* from = *fromP; 368 + while (from < fromLim && to < toLim) { 369 + switch (SB_BYTE_TYPE(enc, from)) { 370 + case BT_LEAD2: 371 + if (fromLim - from < 2) { 372 + res = XML_CONVERT_INPUT_INCOMPLETE; 373 + goto after; 374 + } 375 + *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); 376 + from += 2; 377 + break; 378 + case BT_LEAD3: 379 + if (fromLim - from < 3) { 380 + res = XML_CONVERT_INPUT_INCOMPLETE; 381 + goto after; 382 + } 383 + *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); 384 + from += 3; 385 + break; 386 + case BT_LEAD4: { 387 + unsigned long n; 388 + if (toLim - to < 2) { 389 + res = XML_CONVERT_OUTPUT_EXHAUSTED; 390 + goto after; 391 + } 392 + if (fromLim - from < 4) { 393 + res = XML_CONVERT_INPUT_INCOMPLETE; 394 + goto after; 395 + } 396 + n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); 397 + n -= 0x10000; 398 + to[0] = (unsigned short)((n >> 10) | 0xD800); 399 + to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); 400 + to += 2; 401 + from += 4; 402 + } break; 403 + default: 404 + *to++ = *from++; 405 + break; 406 + } 407 + } 408 + if (from < fromLim) res = XML_CONVERT_OUTPUT_EXHAUSTED; 409 + after: 410 + *fromP = from; 411 + *toP = to; 412 + return res; 413 + } 414 + 415 + #ifdef XML_NS 416 + static const struct normal_encoding utf8_encoding_ns = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, 417 + { 418 + #include "asciitab.h" 419 + #include "utf8tab.h" 420 + }, 421 + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; 422 + #endif 423 + 424 + static const struct normal_encoding utf8_encoding = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, 425 + { 426 + #define BT_COLON BT_NMSTRT 427 + #include "asciitab.h" 428 + #undef BT_COLON 429 + #include "utf8tab.h" 430 + }, 431 + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; 432 + 433 + #ifdef XML_NS 434 + 435 + static const struct normal_encoding internal_utf8_encoding_ns = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, 436 + { 437 + #include "iasciitab.h" 438 + #include "utf8tab.h" 439 + }, 440 + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; 441 + 442 + #endif 443 + 444 + static const struct normal_encoding internal_utf8_encoding = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, 445 + { 446 + #define BT_COLON BT_NMSTRT 447 + #include "iasciitab.h" 448 + #undef BT_COLON 449 + #include "utf8tab.h" 450 + }, 451 + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; 452 + 453 + static enum XML_Convert_Result PTRCALL latin1_toUtf8(const ENCODING* enc, const char** fromP, const char* fromLim, 454 + char** toP, const char* toLim) { 455 + UNUSED_P(enc); 456 + for (;;) { 457 + unsigned char c; 458 + if (*fromP == fromLim) return XML_CONVERT_COMPLETED; 459 + c = (unsigned char)**fromP; 460 + if (c & 0x80) { 461 + if (toLim - *toP < 2) return XML_CONVERT_OUTPUT_EXHAUSTED; 462 + *(*toP)++ = (char)((c >> 6) | UTF8_cval2); 463 + *(*toP)++ = (char)((c & 0x3f) | 0x80); 464 + (*fromP)++; 465 + } else { 466 + if (*toP == toLim) return XML_CONVERT_OUTPUT_EXHAUSTED; 467 + *(*toP)++ = *(*fromP)++; 468 + } 469 + } 470 + } 471 + 472 + static enum XML_Convert_Result PTRCALL latin1_toUtf16(const ENCODING* enc, const char** fromP, const char* fromLim, 473 + unsigned short** toP, const unsigned short* toLim) { 474 + UNUSED_P(enc); 475 + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = (unsigned char)*(*fromP)++; 476 + 477 + if ((*toP == toLim) && (*fromP < fromLim)) 478 + return XML_CONVERT_OUTPUT_EXHAUSTED; 479 + else 480 + return XML_CONVERT_COMPLETED; 481 + } 482 + 483 + #ifdef XML_NS 484 + 485 + static const struct normal_encoding latin1_encoding_ns = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, 486 + { 487 + #include "asciitab.h" 488 + #include "latin1tab.h" 489 + }, 490 + STANDARD_VTABLE(sb_) NULL_VTABLE}; 491 + 492 + #endif 493 + 494 + static const struct normal_encoding latin1_encoding = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, 495 + { 496 + #define BT_COLON BT_NMSTRT 497 + #include "asciitab.h" 498 + #undef BT_COLON 499 + #include "latin1tab.h" 500 + }, 501 + STANDARD_VTABLE(sb_) NULL_VTABLE}; 502 + 503 + static enum XML_Convert_Result PTRCALL ascii_toUtf8(const ENCODING* enc, const char** fromP, const char* fromLim, 504 + char** toP, const char* toLim) { 505 + UNUSED_P(enc); 506 + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = *(*fromP)++; 507 + 508 + if ((*toP == toLim) && (*fromP < fromLim)) 509 + return XML_CONVERT_OUTPUT_EXHAUSTED; 510 + else 511 + return XML_CONVERT_COMPLETED; 512 + } 513 + 514 + #ifdef XML_NS 515 + 516 + static const struct normal_encoding ascii_encoding_ns = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, 517 + { 518 + #include "asciitab.h" 519 + /* BT_NONXML == 0 */ 520 + }, 521 + STANDARD_VTABLE(sb_) NULL_VTABLE}; 522 + 523 + #endif 524 + 525 + static const struct normal_encoding ascii_encoding = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, 526 + { 527 + #define BT_COLON BT_NMSTRT 528 + #include "asciitab.h" 529 + #undef BT_COLON 530 + /* BT_NONXML == 0 */ 531 + }, 532 + STANDARD_VTABLE(sb_) NULL_VTABLE}; 533 + 534 + static int PTRFASTCALL unicode_byte_type(char hi, char lo) { 535 + switch ((unsigned char)hi) { 536 + /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */ 537 + case 0xD8: 538 + case 0xD9: 539 + case 0xDA: 540 + case 0xDB: 541 + return BT_LEAD4; 542 + /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */ 543 + case 0xDC: 544 + case 0xDD: 545 + case 0xDE: 546 + case 0xDF: 547 + return BT_TRAIL; 548 + case 0xFF: 549 + switch ((unsigned char)lo) { 550 + case 0xFF: /* noncharacter-FFFF */ 551 + case 0xFE: /* noncharacter-FFFE */ 552 + return BT_NONXML; 553 + } 554 + break; 555 + } 556 + return BT_NONASCII; 557 + } 558 + 559 + #define DEFINE_UTF16_TO_UTF8(E) \ 560 + static enum XML_Convert_Result PTRCALL E##toUtf8(const ENCODING* enc, const char** fromP, const char* fromLim, \ 561 + char** toP, const char* toLim) { \ 562 + const char* from = *fromP; \ 563 + UNUSED_P(enc); \ 564 + fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ 565 + for (; from < fromLim; from += 2) { \ 566 + int plane; \ 567 + unsigned char lo2; \ 568 + unsigned char lo = GET_LO(from); \ 569 + unsigned char hi = GET_HI(from); \ 570 + switch (hi) { \ 571 + case 0: \ 572 + if (lo < 0x80) { \ 573 + if (*toP == toLim) { \ 574 + *fromP = from; \ 575 + return XML_CONVERT_OUTPUT_EXHAUSTED; \ 576 + } \ 577 + *(*toP)++ = lo; \ 578 + break; \ 579 + } \ 580 + /* fall through */ \ 581 + case 0x1: \ 582 + case 0x2: \ 583 + case 0x3: \ 584 + case 0x4: \ 585 + case 0x5: \ 586 + case 0x6: \ 587 + case 0x7: \ 588 + if (toLim - *toP < 2) { \ 589 + *fromP = from; \ 590 + return XML_CONVERT_OUTPUT_EXHAUSTED; \ 591 + } \ 592 + *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ 593 + *(*toP)++ = ((lo & 0x3f) | 0x80); \ 594 + break; \ 595 + default: \ 596 + if (toLim - *toP < 3) { \ 597 + *fromP = from; \ 598 + return XML_CONVERT_OUTPUT_EXHAUSTED; \ 599 + } \ 600 + /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ 601 + *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ 602 + *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ 603 + *(*toP)++ = ((lo & 0x3f) | 0x80); \ 604 + break; \ 605 + case 0xD8: \ 606 + case 0xD9: \ 607 + case 0xDA: \ 608 + case 0xDB: \ 609 + if (toLim - *toP < 4) { \ 610 + *fromP = from; \ 611 + return XML_CONVERT_OUTPUT_EXHAUSTED; \ 612 + } \ 613 + if (fromLim - from < 4) { \ 614 + *fromP = from; \ 615 + return XML_CONVERT_INPUT_INCOMPLETE; \ 616 + } \ 617 + plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ 618 + *(*toP)++ = (char)((plane >> 2) | UTF8_cval4); \ 619 + *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ 620 + from += 2; \ 621 + lo2 = GET_LO(from); \ 622 + *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2) | (lo2 >> 6) | 0x80); \ 623 + *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ 624 + break; \ 625 + } \ 626 + } \ 627 + *fromP = from; \ 628 + if (from < fromLim) \ 629 + return XML_CONVERT_INPUT_INCOMPLETE; \ 630 + else \ 631 + return XML_CONVERT_COMPLETED; \ 632 + } 633 + 634 + #define DEFINE_UTF16_TO_UTF16(E) \ 635 + static enum XML_Convert_Result PTRCALL E##toUtf16(const ENCODING* enc, const char** fromP, const char* fromLim, \ 636 + unsigned short** toP, const unsigned short* toLim) { \ 637 + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ 638 + UNUSED_P(enc); \ 639 + fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ 640 + /* Avoid copying first half only of surrogate */ \ 641 + if (fromLim - *fromP > ((toLim - *toP) << 1) && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ 642 + fromLim -= 2; \ 643 + res = XML_CONVERT_INPUT_INCOMPLETE; \ 644 + } \ 645 + for (; *fromP < fromLim && *toP < toLim; *fromP += 2) *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ 646 + if ((*toP == toLim) && (*fromP < fromLim)) \ 647 + return XML_CONVERT_OUTPUT_EXHAUSTED; \ 648 + else \ 649 + return res; \ 650 + } 651 + 652 + #define GET_LO(ptr) ((unsigned char)(ptr)[0]) 653 + #define GET_HI(ptr) ((unsigned char)(ptr)[1]) 654 + 655 + DEFINE_UTF16_TO_UTF8(little2_) 656 + DEFINE_UTF16_TO_UTF16(little2_) 657 + 658 + #undef GET_LO 659 + #undef GET_HI 660 + 661 + #define GET_LO(ptr) ((unsigned char)(ptr)[1]) 662 + #define GET_HI(ptr) ((unsigned char)(ptr)[0]) 663 + 664 + DEFINE_UTF16_TO_UTF8(big2_) 665 + DEFINE_UTF16_TO_UTF16(big2_) 666 + 667 + #undef GET_LO 668 + #undef GET_HI 669 + 670 + #define LITTLE2_BYTE_TYPE(enc, p) ((p)[1] == 0 ? SB_BYTE_TYPE(enc, p) : unicode_byte_type((p)[1], (p)[0])) 671 + #define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1) 672 + #define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c)) 673 + #define LITTLE2_IS_NAME_CHAR_MINBPC(p) UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) 674 + #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) 675 + 676 + #ifdef XML_MIN_SIZE 677 + 678 + static int PTRFASTCALL little2_byteType(const ENCODING* enc, const char* p) { return LITTLE2_BYTE_TYPE(enc, p); } 679 + 680 + static int PTRFASTCALL little2_byteToAscii(const ENCODING* enc, const char* p) { 681 + UNUSED_P(enc); 682 + return LITTLE2_BYTE_TO_ASCII(p); 683 + } 684 + 685 + static int PTRCALL little2_charMatches(const ENCODING* enc, const char* p, int c) { 686 + UNUSED_P(enc); 687 + return LITTLE2_CHAR_MATCHES(p, c); 688 + } 689 + 690 + static int PTRFASTCALL little2_isNameMin(const ENCODING* enc, const char* p) { 691 + UNUSED_P(enc); 692 + return LITTLE2_IS_NAME_CHAR_MINBPC(p); 693 + } 694 + 695 + static int PTRFASTCALL little2_isNmstrtMin(const ENCODING* enc, const char* p) { 696 + UNUSED_P(enc); 697 + return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p); 698 + } 699 + 700 + #undef VTABLE 701 + #define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 702 + 703 + #else /* not XML_MIN_SIZE */ 704 + 705 + #undef PREFIX 706 + #define PREFIX(ident) little2_##ident 707 + #define MINBPC(enc) 2 708 + /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ 709 + #define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) 710 + #define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p) 711 + #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c) 712 + #define IS_NAME_CHAR(enc, p, n) 0 713 + #define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p) 714 + #define IS_NMSTRT_CHAR(enc, p, n) (0) 715 + #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) 716 + 717 + #define XML_TOK_IMPL_C 718 + #include "xmltok_impl.c" 719 + #undef XML_TOK_IMPL_C 720 + 721 + #undef MINBPC 722 + #undef BYTE_TYPE 723 + #undef BYTE_TO_ASCII 724 + #undef CHAR_MATCHES 725 + #undef IS_NAME_CHAR 726 + #undef IS_NAME_CHAR_MINBPC 727 + #undef IS_NMSTRT_CHAR 728 + #undef IS_NMSTRT_CHAR_MINBPC 729 + #undef IS_INVALID_CHAR 730 + 731 + #endif /* not XML_MIN_SIZE */ 732 + 733 + #ifdef XML_NS 734 + 735 + static const struct normal_encoding little2_encoding_ns = {{VTABLE, 2, 0, 736 + #if BYTEORDER == 1234 737 + 1 738 + #else 739 + 0 740 + #endif 741 + }, 742 + { 743 + #include "asciitab.h" 744 + #include "latin1tab.h" 745 + }, 746 + STANDARD_VTABLE(little2_) NULL_VTABLE}; 747 + 748 + #endif 749 + 750 + static const struct normal_encoding little2_encoding = {{VTABLE, 2, 0, 751 + #if BYTEORDER == 1234 752 + 1 753 + #else 754 + 0 755 + #endif 756 + }, 757 + { 758 + #define BT_COLON BT_NMSTRT 759 + #include "asciitab.h" 760 + #undef BT_COLON 761 + #include "latin1tab.h" 762 + }, 763 + STANDARD_VTABLE(little2_) NULL_VTABLE}; 764 + 765 + #if BYTEORDER != 4321 766 + 767 + #ifdef XML_NS 768 + 769 + static const struct normal_encoding internal_little2_encoding_ns = {{VTABLE, 2, 0, 1}, 770 + { 771 + #include "iasciitab.h" 772 + #include "latin1tab.h" 773 + }, 774 + STANDARD_VTABLE(little2_) NULL_VTABLE}; 775 + 776 + #endif 777 + 778 + static const struct normal_encoding internal_little2_encoding = {{VTABLE, 2, 0, 1}, 779 + { 780 + #define BT_COLON BT_NMSTRT 781 + #include "iasciitab.h" 782 + #undef BT_COLON 783 + #include "latin1tab.h" 784 + }, 785 + STANDARD_VTABLE(little2_) NULL_VTABLE}; 786 + 787 + #endif 788 + 789 + #define BIG2_BYTE_TYPE(enc, p) ((p)[0] == 0 ? SB_BYTE_TYPE(enc, p + 1) : unicode_byte_type((p)[0], (p)[1])) 790 + #define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1) 791 + #define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c)) 792 + #define BIG2_IS_NAME_CHAR_MINBPC(p) UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) 793 + #define BIG2_IS_NMSTRT_CHAR_MINBPC(p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) 794 + 795 + #ifdef XML_MIN_SIZE 796 + 797 + static int PTRFASTCALL big2_byteType(const ENCODING* enc, const char* p) { return BIG2_BYTE_TYPE(enc, p); } 798 + 799 + static int PTRFASTCALL big2_byteToAscii(const ENCODING* enc, const char* p) { 800 + UNUSED_P(enc); 801 + return BIG2_BYTE_TO_ASCII(p); 802 + } 803 + 804 + static int PTRCALL big2_charMatches(const ENCODING* enc, const char* p, int c) { 805 + UNUSED_P(enc); 806 + return BIG2_CHAR_MATCHES(p, c); 807 + } 808 + 809 + static int PTRFASTCALL big2_isNameMin(const ENCODING* enc, const char* p) { 810 + UNUSED_P(enc); 811 + return BIG2_IS_NAME_CHAR_MINBPC(p); 812 + } 813 + 814 + static int PTRFASTCALL big2_isNmstrtMin(const ENCODING* enc, const char* p) { 815 + UNUSED_P(enc); 816 + return BIG2_IS_NMSTRT_CHAR_MINBPC(p); 817 + } 818 + 819 + #undef VTABLE 820 + #define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 821 + 822 + #else /* not XML_MIN_SIZE */ 823 + 824 + #undef PREFIX 825 + #define PREFIX(ident) big2_##ident 826 + #define MINBPC(enc) 2 827 + /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ 828 + #define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) 829 + #define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p) 830 + #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c) 831 + #define IS_NAME_CHAR(enc, p, n) 0 832 + #define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p) 833 + #define IS_NMSTRT_CHAR(enc, p, n) (0) 834 + #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p) 835 + 836 + #define XML_TOK_IMPL_C 837 + #include "xmltok_impl.c" 838 + #undef XML_TOK_IMPL_C 839 + 840 + #undef MINBPC 841 + #undef BYTE_TYPE 842 + #undef BYTE_TO_ASCII 843 + #undef CHAR_MATCHES 844 + #undef IS_NAME_CHAR 845 + #undef IS_NAME_CHAR_MINBPC 846 + #undef IS_NMSTRT_CHAR 847 + #undef IS_NMSTRT_CHAR_MINBPC 848 + #undef IS_INVALID_CHAR 849 + 850 + #endif /* not XML_MIN_SIZE */ 851 + 852 + #ifdef XML_NS 853 + 854 + static const struct normal_encoding big2_encoding_ns = {{VTABLE, 2, 0, 855 + #if BYTEORDER == 4321 856 + 1 857 + #else 858 + 0 859 + #endif 860 + }, 861 + { 862 + #include "asciitab.h" 863 + #include "latin1tab.h" 864 + }, 865 + STANDARD_VTABLE(big2_) NULL_VTABLE}; 866 + 867 + #endif 868 + 869 + static const struct normal_encoding big2_encoding = {{VTABLE, 2, 0, 870 + #if BYTEORDER == 4321 871 + 1 872 + #else 873 + 0 874 + #endif 875 + }, 876 + { 877 + #define BT_COLON BT_NMSTRT 878 + #include "asciitab.h" 879 + #undef BT_COLON 880 + #include "latin1tab.h" 881 + }, 882 + STANDARD_VTABLE(big2_) NULL_VTABLE}; 883 + 884 + #if BYTEORDER != 1234 885 + 886 + #ifdef XML_NS 887 + 888 + static const struct normal_encoding internal_big2_encoding_ns = {{VTABLE, 2, 0, 1}, 889 + { 890 + #include "iasciitab.h" 891 + #include "latin1tab.h" 892 + }, 893 + STANDARD_VTABLE(big2_) NULL_VTABLE}; 894 + 895 + #endif 896 + 897 + static const struct normal_encoding internal_big2_encoding = {{VTABLE, 2, 0, 1}, 898 + { 899 + #define BT_COLON BT_NMSTRT 900 + #include "iasciitab.h" 901 + #undef BT_COLON 902 + #include "latin1tab.h" 903 + }, 904 + STANDARD_VTABLE(big2_) NULL_VTABLE}; 905 + 906 + #endif 907 + 908 + #undef PREFIX 909 + 910 + static int FASTCALL streqci(const char* s1, const char* s2) { 911 + for (;;) { 912 + char c1 = *s1++; 913 + char c2 = *s2++; 914 + if (ASCII_a <= c1 && c1 <= ASCII_z) c1 += ASCII_A - ASCII_a; 915 + if (ASCII_a <= c2 && c2 <= ASCII_z) 916 + /* The following line will never get executed. streqci() is 917 + * only called from two places, both of which guarantee to put 918 + * upper-case strings into s2. 919 + */ 920 + c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */ 921 + if (c1 != c2) return 0; 922 + if (!c1) break; 923 + } 924 + return 1; 925 + } 926 + 927 + static void PTRCALL initUpdatePosition(const ENCODING* enc, const char* ptr, const char* end, POSITION* pos) { 928 + UNUSED_P(enc); 929 + normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); 930 + } 931 + 932 + static int toAscii(const ENCODING* enc, const char* ptr, const char* end) { 933 + char buf[1]; 934 + char* p = buf; 935 + XmlUtf8Convert(enc, &ptr, end, &p, p + 1); 936 + if (p == buf) 937 + return -1; 938 + else 939 + return buf[0]; 940 + } 941 + 942 + static int FASTCALL isSpace(int c) { 943 + switch (c) { 944 + case 0x20: 945 + case 0xD: 946 + case 0xA: 947 + case 0x9: 948 + return 1; 949 + } 950 + return 0; 951 + } 952 + 953 + /* Return 1 if there's just optional white space or there's an S 954 + followed by name=val. 955 + */ 956 + static int parsePseudoAttribute(const ENCODING* enc, const char* ptr, const char* end, const char** namePtr, 957 + const char** nameEndPtr, const char** valPtr, const char** nextTokPtr) { 958 + int c; 959 + char open; 960 + if (ptr == end) { 961 + *namePtr = NULL; 962 + return 1; 963 + } 964 + if (!isSpace(toAscii(enc, ptr, end))) { 965 + *nextTokPtr = ptr; 966 + return 0; 967 + } 968 + do { 969 + ptr += enc->minBytesPerChar; 970 + } while (isSpace(toAscii(enc, ptr, end))); 971 + if (ptr == end) { 972 + *namePtr = NULL; 973 + return 1; 974 + } 975 + *namePtr = ptr; 976 + for (;;) { 977 + c = toAscii(enc, ptr, end); 978 + if (c == -1) { 979 + *nextTokPtr = ptr; 980 + return 0; 981 + } 982 + if (c == ASCII_EQUALS) { 983 + *nameEndPtr = ptr; 984 + break; 985 + } 986 + if (isSpace(c)) { 987 + *nameEndPtr = ptr; 988 + do { 989 + ptr += enc->minBytesPerChar; 990 + } while (isSpace(c = toAscii(enc, ptr, end))); 991 + if (c != ASCII_EQUALS) { 992 + *nextTokPtr = ptr; 993 + return 0; 994 + } 995 + break; 996 + } 997 + ptr += enc->minBytesPerChar; 998 + } 999 + if (ptr == *namePtr) { 1000 + *nextTokPtr = ptr; 1001 + return 0; 1002 + } 1003 + ptr += enc->minBytesPerChar; 1004 + c = toAscii(enc, ptr, end); 1005 + while (isSpace(c)) { 1006 + ptr += enc->minBytesPerChar; 1007 + c = toAscii(enc, ptr, end); 1008 + } 1009 + if (c != ASCII_QUOT && c != ASCII_APOS) { 1010 + *nextTokPtr = ptr; 1011 + return 0; 1012 + } 1013 + open = (char)c; 1014 + ptr += enc->minBytesPerChar; 1015 + *valPtr = ptr; 1016 + for (;; ptr += enc->minBytesPerChar) { 1017 + c = toAscii(enc, ptr, end); 1018 + if (c == open) break; 1019 + if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z) && !(ASCII_0 <= c && c <= ASCII_9) && 1020 + c != ASCII_PERIOD && c != ASCII_MINUS && c != ASCII_UNDERSCORE) { 1021 + *nextTokPtr = ptr; 1022 + return 0; 1023 + } 1024 + } 1025 + *nextTokPtr = ptr + enc->minBytesPerChar; 1026 + return 1; 1027 + } 1028 + 1029 + static const char KW_version[] = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'}; 1030 + 1031 + static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0'}; 1032 + 1033 + static const char KW_standalone[] = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, 1034 + ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'}; 1035 + 1036 + static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'}; 1037 + 1038 + static const char KW_no[] = {ASCII_n, ASCII_o, '\0'}; 1039 + 1040 + static int doParseXmlDecl(const ENCODING* (*encodingFinder)(const ENCODING*, const char*, const char*), 1041 + int isGeneralTextEntity, const ENCODING* enc, const char* ptr, const char* end, 1042 + const char** badPtr, const char** versionPtr, const char** versionEndPtr, 1043 + const char** encodingName, const ENCODING** encoding, int* standalone) { 1044 + const char* val = NULL; 1045 + const char* name = NULL; 1046 + const char* nameEnd = NULL; 1047 + ptr += 5 * enc->minBytesPerChar; 1048 + end -= 2 * enc->minBytesPerChar; 1049 + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) || !name) { 1050 + *badPtr = ptr; 1051 + return 0; 1052 + } 1053 + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { 1054 + if (!isGeneralTextEntity) { 1055 + *badPtr = name; 1056 + return 0; 1057 + } 1058 + } else { 1059 + if (versionPtr) *versionPtr = val; 1060 + if (versionEndPtr) *versionEndPtr = ptr; 1061 + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { 1062 + *badPtr = ptr; 1063 + return 0; 1064 + } 1065 + if (!name) { 1066 + if (isGeneralTextEntity) { 1067 + /* a TextDecl must have an EncodingDecl */ 1068 + *badPtr = ptr; 1069 + return 0; 1070 + } 1071 + return 1; 1072 + } 1073 + } 1074 + if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { 1075 + int c = toAscii(enc, val, end); 1076 + if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) { 1077 + *badPtr = val; 1078 + return 0; 1079 + } 1080 + if (encodingName) *encodingName = val; 1081 + if (encoding) *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); 1082 + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { 1083 + *badPtr = ptr; 1084 + return 0; 1085 + } 1086 + if (!name) return 1; 1087 + } 1088 + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) { 1089 + *badPtr = name; 1090 + return 0; 1091 + } 1092 + if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { 1093 + if (standalone) *standalone = 1; 1094 + } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { 1095 + if (standalone) *standalone = 0; 1096 + } else { 1097 + *badPtr = val; 1098 + return 0; 1099 + } 1100 + while (isSpace(toAscii(enc, ptr, end))) ptr += enc->minBytesPerChar; 1101 + if (ptr != end) { 1102 + *badPtr = ptr; 1103 + return 0; 1104 + } 1105 + return 1; 1106 + } 1107 + 1108 + static int FASTCALL checkCharRefNumber(int result) { 1109 + switch (result >> 8) { 1110 + case 0xD8: 1111 + case 0xD9: 1112 + case 0xDA: 1113 + case 0xDB: 1114 + case 0xDC: 1115 + case 0xDD: 1116 + case 0xDE: 1117 + case 0xDF: 1118 + return -1; 1119 + case 0: 1120 + if (latin1_encoding.type[result] == BT_NONXML) return -1; 1121 + break; 1122 + case 0xFF: 1123 + if (result == 0xFFFE || result == 0xFFFF) return -1; 1124 + break; 1125 + } 1126 + return result; 1127 + } 1128 + 1129 + int FASTCALL XmlUtf8Encode(int c, char* buf) { 1130 + enum { 1131 + /* minN is minimum legal resulting value for N byte sequence */ 1132 + min2 = 0x80, 1133 + min3 = 0x800, 1134 + min4 = 0x10000 1135 + }; 1136 + 1137 + if (c < 0) return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */ 1138 + if (c < min2) { 1139 + buf[0] = (char)(c | UTF8_cval1); 1140 + return 1; 1141 + } 1142 + if (c < min3) { 1143 + buf[0] = (char)((c >> 6) | UTF8_cval2); 1144 + buf[1] = (char)((c & 0x3f) | 0x80); 1145 + return 2; 1146 + } 1147 + if (c < min4) { 1148 + buf[0] = (char)((c >> 12) | UTF8_cval3); 1149 + buf[1] = (char)(((c >> 6) & 0x3f) | 0x80); 1150 + buf[2] = (char)((c & 0x3f) | 0x80); 1151 + return 3; 1152 + } 1153 + if (c < 0x110000) { 1154 + buf[0] = (char)((c >> 18) | UTF8_cval4); 1155 + buf[1] = (char)(((c >> 12) & 0x3f) | 0x80); 1156 + buf[2] = (char)(((c >> 6) & 0x3f) | 0x80); 1157 + buf[3] = (char)((c & 0x3f) | 0x80); 1158 + return 4; 1159 + } 1160 + return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */ 1161 + } 1162 + 1163 + int FASTCALL XmlUtf16Encode(int charNum, unsigned short* buf) { 1164 + if (charNum < 0) return 0; 1165 + if (charNum < 0x10000) { 1166 + buf[0] = (unsigned short)charNum; 1167 + return 1; 1168 + } 1169 + if (charNum < 0x110000) { 1170 + charNum -= 0x10000; 1171 + buf[0] = (unsigned short)((charNum >> 10) + 0xD800); 1172 + buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00); 1173 + return 2; 1174 + } 1175 + return 0; 1176 + } 1177 + 1178 + struct unknown_encoding { 1179 + struct normal_encoding normal; 1180 + CONVERTER convert; 1181 + void* userData; 1182 + unsigned short utf16[256]; 1183 + char utf8[256][4]; 1184 + }; 1185 + 1186 + #define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding*)(enc)) 1187 + 1188 + int XmlSizeOfUnknownEncoding(void) { return sizeof(struct unknown_encoding); } 1189 + 1190 + static int PTRFASTCALL unknown_isName(const ENCODING* enc, const char* p) { 1191 + const struct unknown_encoding* uenc = AS_UNKNOWN_ENCODING(enc); 1192 + int c = uenc->convert(uenc->userData, p); 1193 + if (c & ~0xFFFF) return 0; 1194 + return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); 1195 + } 1196 + 1197 + static int PTRFASTCALL unknown_isNmstrt(const ENCODING* enc, const char* p) { 1198 + const struct unknown_encoding* uenc = AS_UNKNOWN_ENCODING(enc); 1199 + int c = uenc->convert(uenc->userData, p); 1200 + if (c & ~0xFFFF) return 0; 1201 + return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); 1202 + } 1203 + 1204 + static int PTRFASTCALL unknown_isInvalid(const ENCODING* enc, const char* p) { 1205 + const struct unknown_encoding* uenc = AS_UNKNOWN_ENCODING(enc); 1206 + int c = uenc->convert(uenc->userData, p); 1207 + return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; 1208 + } 1209 + 1210 + static enum XML_Convert_Result PTRCALL unknown_toUtf8(const ENCODING* enc, const char** fromP, const char* fromLim, 1211 + char** toP, const char* toLim) { 1212 + const struct unknown_encoding* uenc = AS_UNKNOWN_ENCODING(enc); 1213 + char buf[XML_UTF8_ENCODE_MAX]; 1214 + for (;;) { 1215 + const char* utf8; 1216 + int n; 1217 + if (*fromP == fromLim) return XML_CONVERT_COMPLETED; 1218 + utf8 = uenc->utf8[(unsigned char)**fromP]; 1219 + n = *utf8++; 1220 + if (n == 0) { 1221 + int c = uenc->convert(uenc->userData, *fromP); 1222 + n = XmlUtf8Encode(c, buf); 1223 + if (n > toLim - *toP) return XML_CONVERT_OUTPUT_EXHAUSTED; 1224 + utf8 = buf; 1225 + *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2)); 1226 + } else { 1227 + if (n > toLim - *toP) return XML_CONVERT_OUTPUT_EXHAUSTED; 1228 + (*fromP)++; 1229 + } 1230 + memcpy(*toP, utf8, n); 1231 + *toP += n; 1232 + } 1233 + } 1234 + 1235 + static enum XML_Convert_Result PTRCALL unknown_toUtf16(const ENCODING* enc, const char** fromP, const char* fromLim, 1236 + unsigned short** toP, const unsigned short* toLim) { 1237 + const struct unknown_encoding* uenc = AS_UNKNOWN_ENCODING(enc); 1238 + while (*fromP < fromLim && *toP < toLim) { 1239 + unsigned short c = uenc->utf16[(unsigned char)**fromP]; 1240 + if (c == 0) { 1241 + c = (unsigned short)uenc->convert(uenc->userData, *fromP); 1242 + *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2)); 1243 + } else 1244 + (*fromP)++; 1245 + *(*toP)++ = c; 1246 + } 1247 + 1248 + if ((*toP == toLim) && (*fromP < fromLim)) 1249 + return XML_CONVERT_OUTPUT_EXHAUSTED; 1250 + else 1251 + return XML_CONVERT_COMPLETED; 1252 + } 1253 + 1254 + ENCODING* XmlInitUnknownEncoding(void* mem, const int* table, CONVERTER convert, void* userData) { 1255 + int i; 1256 + struct unknown_encoding* e = (struct unknown_encoding*)mem; 1257 + memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding)); 1258 + for (i = 0; i < 128; i++) 1259 + if (latin1_encoding.type[i] != BT_OTHER && latin1_encoding.type[i] != BT_NONXML && table[i] != i) return 0; 1260 + for (i = 0; i < 256; i++) { 1261 + int c = table[i]; 1262 + if (c == -1) { 1263 + e->normal.type[i] = BT_MALFORM; 1264 + /* This shouldn't really get used. */ 1265 + e->utf16[i] = 0xFFFF; 1266 + e->utf8[i][0] = 1; 1267 + e->utf8[i][1] = 0; 1268 + } else if (c < 0) { 1269 + if (c < -4) return 0; 1270 + /* Multi-byte sequences need a converter function */ 1271 + if (!convert) return 0; 1272 + e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); 1273 + e->utf8[i][0] = 0; 1274 + e->utf16[i] = 0; 1275 + } else if (c < 0x80) { 1276 + if (latin1_encoding.type[c] != BT_OTHER && latin1_encoding.type[c] != BT_NONXML && c != i) return 0; 1277 + e->normal.type[i] = latin1_encoding.type[c]; 1278 + e->utf8[i][0] = 1; 1279 + e->utf8[i][1] = (char)c; 1280 + e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); 1281 + } else if (checkCharRefNumber(c) < 0) { 1282 + e->normal.type[i] = BT_NONXML; 1283 + /* This shouldn't really get used. */ 1284 + e->utf16[i] = 0xFFFF; 1285 + e->utf8[i][0] = 1; 1286 + e->utf8[i][1] = 0; 1287 + } else { 1288 + if (c > 0xFFFF) return 0; 1289 + if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) 1290 + e->normal.type[i] = BT_NMSTRT; 1291 + else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) 1292 + e->normal.type[i] = BT_NAME; 1293 + else 1294 + e->normal.type[i] = BT_OTHER; 1295 + e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); 1296 + e->utf16[i] = (unsigned short)c; 1297 + } 1298 + } 1299 + e->userData = userData; 1300 + e->convert = convert; 1301 + if (convert) { 1302 + e->normal.isName2 = unknown_isName; 1303 + e->normal.isName3 = unknown_isName; 1304 + e->normal.isName4 = unknown_isName; 1305 + e->normal.isNmstrt2 = unknown_isNmstrt; 1306 + e->normal.isNmstrt3 = unknown_isNmstrt; 1307 + e->normal.isNmstrt4 = unknown_isNmstrt; 1308 + e->normal.isInvalid2 = unknown_isInvalid; 1309 + e->normal.isInvalid3 = unknown_isInvalid; 1310 + e->normal.isInvalid4 = unknown_isInvalid; 1311 + } 1312 + e->normal.enc.utf8Convert = unknown_toUtf8; 1313 + e->normal.enc.utf16Convert = unknown_toUtf16; 1314 + return &(e->normal.enc); 1315 + } 1316 + 1317 + /* If this enumeration is changed, getEncodingIndex and encodings 1318 + must also be changed. */ 1319 + enum { 1320 + UNKNOWN_ENC = -1, 1321 + ISO_8859_1_ENC = 0, 1322 + US_ASCII_ENC, 1323 + UTF_8_ENC, 1324 + UTF_16_ENC, 1325 + UTF_16BE_ENC, 1326 + UTF_16LE_ENC, 1327 + /* must match encodingNames up to here */ 1328 + NO_ENC 1329 + }; 1330 + 1331 + static const char KW_ISO_8859_1[] = {ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, 1332 + ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0'}; 1333 + static const char KW_US_ASCII[] = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, '\0'}; 1334 + static const char KW_UTF_8[] = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'}; 1335 + static const char KW_UTF_16[] = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'}; 1336 + static const char KW_UTF_16BE[] = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, '\0'}; 1337 + static const char KW_UTF_16LE[] = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, '\0'}; 1338 + 1339 + static int FASTCALL getEncodingIndex(const char* name) { 1340 + static const char* const encodingNames[] = { 1341 + KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE, 1342 + }; 1343 + int i; 1344 + if (name == NULL) return NO_ENC; 1345 + for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++) 1346 + if (streqci(name, encodingNames[i])) return i; 1347 + return UNKNOWN_ENC; 1348 + } 1349 + 1350 + /* For binary compatibility, we store the index of the encoding 1351 + specified at initialization in the isUtf16 member. 1352 + */ 1353 + 1354 + #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) 1355 + #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) 1356 + 1357 + /* This is what detects the encoding. encodingTable maps from 1358 + encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of 1359 + the external (protocol) specified encoding; state is 1360 + XML_CONTENT_STATE if we're parsing an external text entity, and 1361 + XML_PROLOG_STATE otherwise. 1362 + */ 1363 + 1364 + static int initScan(const ENCODING* const* encodingTable, const INIT_ENCODING* enc, int state, const char* ptr, 1365 + const char* end, const char** nextTokPtr) { 1366 + const ENCODING** encPtr; 1367 + 1368 + if (ptr >= end) return XML_TOK_NONE; 1369 + encPtr = enc->encPtr; 1370 + if (ptr + 1 == end) { 1371 + /* only a single byte available for auto-detection */ 1372 + #ifndef XML_DTD /* FIXME */ 1373 + /* a well-formed document entity must have more than one byte */ 1374 + if (state != XML_CONTENT_STATE) return XML_TOK_PARTIAL; 1375 + #endif 1376 + /* so we're parsing an external text entity... */ 1377 + /* if UTF-16 was externally specified, then we need at least 2 bytes */ 1378 + switch (INIT_ENC_INDEX(enc)) { 1379 + case UTF_16_ENC: 1380 + case UTF_16LE_ENC: 1381 + case UTF_16BE_ENC: 1382 + return XML_TOK_PARTIAL; 1383 + } 1384 + switch ((unsigned char)*ptr) { 1385 + case 0xFE: 1386 + case 0xFF: 1387 + case 0xEF: /* possibly first byte of UTF-8 BOM */ 1388 + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; 1389 + /* fall through */ 1390 + case 0x00: 1391 + case 0x3C: 1392 + return XML_TOK_PARTIAL; 1393 + } 1394 + } else { 1395 + switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { 1396 + case 0xFEFF: 1397 + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; 1398 + *nextTokPtr = ptr + 2; 1399 + *encPtr = encodingTable[UTF_16BE_ENC]; 1400 + return XML_TOK_BOM; 1401 + /* 00 3C is handled in the default case */ 1402 + case 0x3C00: 1403 + if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC || INIT_ENC_INDEX(enc) == UTF_16_ENC) && state == XML_CONTENT_STATE) 1404 + break; 1405 + *encPtr = encodingTable[UTF_16LE_ENC]; 1406 + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 1407 + case 0xFFFE: 1408 + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; 1409 + *nextTokPtr = ptr + 2; 1410 + *encPtr = encodingTable[UTF_16LE_ENC]; 1411 + return XML_TOK_BOM; 1412 + case 0xEFBB: 1413 + /* Maybe a UTF-8 BOM (EF BB BF) */ 1414 + /* If there's an explicitly specified (external) encoding 1415 + of ISO-8859-1 or some flavour of UTF-16 1416 + and this is an external text entity, 1417 + don't look for the BOM, 1418 + because it might be a legal data. 1419 + */ 1420 + if (state == XML_CONTENT_STATE) { 1421 + int e = INIT_ENC_INDEX(enc); 1422 + if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC) break; 1423 + } 1424 + if (ptr + 2 == end) return XML_TOK_PARTIAL; 1425 + if ((unsigned char)ptr[2] == 0xBF) { 1426 + *nextTokPtr = ptr + 3; 1427 + *encPtr = encodingTable[UTF_8_ENC]; 1428 + return XML_TOK_BOM; 1429 + } 1430 + break; 1431 + default: 1432 + if (ptr[0] == '\0') { 1433 + /* 0 isn't a legal data character. Furthermore a document 1434 + entity can only start with ASCII characters. So the only 1435 + way this can fail to be big-endian UTF-16 if it it's an 1436 + external parsed general entity that's labelled as 1437 + UTF-16LE. 1438 + */ 1439 + if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) break; 1440 + *encPtr = encodingTable[UTF_16BE_ENC]; 1441 + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 1442 + } else if (ptr[1] == '\0') { 1443 + /* We could recover here in the case: 1444 + - parsing an external entity 1445 + - second byte is 0 1446 + - no externally specified encoding 1447 + - no encoding declaration 1448 + by assuming UTF-16LE. But we don't, because this would mean when 1449 + presented just with a single byte, we couldn't reliably determine 1450 + whether we needed further bytes. 1451 + */ 1452 + if (state == XML_CONTENT_STATE) break; 1453 + *encPtr = encodingTable[UTF_16LE_ENC]; 1454 + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 1455 + } 1456 + break; 1457 + } 1458 + } 1459 + *encPtr = encodingTable[INIT_ENC_INDEX(enc)]; 1460 + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 1461 + } 1462 + 1463 + #define NS(x) x 1464 + #define ns(x) x 1465 + #define XML_TOK_NS_C 1466 + #include "xmltok_ns.c" 1467 + #undef XML_TOK_NS_C 1468 + #undef NS 1469 + #undef ns 1470 + 1471 + #ifdef XML_NS 1472 + 1473 + #define NS(x) x##NS 1474 + #define ns(x) x##_ns 1475 + 1476 + #define XML_TOK_NS_C 1477 + #include "xmltok_ns.c" 1478 + #undef XML_TOK_NS_C 1479 + 1480 + #undef NS 1481 + #undef ns 1482 + 1483 + ENCODING* XmlInitUnknownEncodingNS(void* mem, const int* table, CONVERTER convert, void* userData) { 1484 + ENCODING* enc = XmlInitUnknownEncoding(mem, table, convert, userData); 1485 + if (enc) ((struct normal_encoding*)enc)->type[ASCII_COLON] = BT_COLON; 1486 + return enc; 1487 + } 1488 + 1489 + #endif /* XML_NS */
+288
lib/expat/xmltok.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2002-2005 Karl Waclawek <karl@waclawek.net> 13 + Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 14 + Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 15 + Licensed under the MIT license: 16 + 17 + Permission is hereby granted, free of charge, to any person obtaining 18 + a copy of this software and associated documentation files (the 19 + "Software"), to deal in the Software without restriction, including 20 + without limitation the rights to use, copy, modify, merge, publish, 21 + distribute, sublicense, and/or sell copies of the Software, and to permit 22 + persons to whom the Software is furnished to do so, subject to the 23 + following conditions: 24 + 25 + The above copyright notice and this permission notice shall be included 26 + in all copies or substantial portions of the Software. 27 + 28 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 31 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 32 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 33 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 34 + USE OR OTHER DEALINGS IN THE SOFTWARE. 35 + */ 36 + 37 + #ifndef XmlTok_INCLUDED 38 + #define XmlTok_INCLUDED 1 39 + 40 + #ifdef __cplusplus 41 + extern "C" { 42 + #endif 43 + 44 + /* The following token may be returned by XmlContentTok */ 45 + #define XML_TOK_TRAILING_RSQB \ 46 + -5 /* ] or ]] at the end of the scan; might be \ 47 + start of illegal ]]> sequence */ 48 + /* The following tokens may be returned by both XmlPrologTok and 49 + XmlContentTok. 50 + */ 51 + #define XML_TOK_NONE -4 /* The string to be scanned is empty */ 52 + #define XML_TOK_TRAILING_CR \ 53 + -3 /* A CR at the end of the scan; \ 54 + might be part of CRLF sequence */ 55 + #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ 56 + #define XML_TOK_PARTIAL -1 /* only part of a token */ 57 + #define XML_TOK_INVALID 0 58 + 59 + /* The following tokens are returned by XmlContentTok; some are also 60 + returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok. 61 + */ 62 + #define XML_TOK_START_TAG_WITH_ATTS 1 63 + #define XML_TOK_START_TAG_NO_ATTS 2 64 + #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ 65 + #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 66 + #define XML_TOK_END_TAG 5 67 + #define XML_TOK_DATA_CHARS 6 68 + #define XML_TOK_DATA_NEWLINE 7 69 + #define XML_TOK_CDATA_SECT_OPEN 8 70 + #define XML_TOK_ENTITY_REF 9 71 + #define XML_TOK_CHAR_REF 10 /* numeric character reference */ 72 + 73 + /* The following tokens may be returned by both XmlPrologTok and 74 + XmlContentTok. 75 + */ 76 + #define XML_TOK_PI 11 /* processing instruction */ 77 + #define XML_TOK_XML_DECL 12 /* XML decl or text decl */ 78 + #define XML_TOK_COMMENT 13 79 + #define XML_TOK_BOM 14 /* Byte order mark */ 80 + 81 + /* The following tokens are returned only by XmlPrologTok */ 82 + #define XML_TOK_PROLOG_S 15 83 + #define XML_TOK_DECL_OPEN 16 /* <!foo */ 84 + #define XML_TOK_DECL_CLOSE 17 /* > */ 85 + #define XML_TOK_NAME 18 86 + #define XML_TOK_NMTOKEN 19 87 + #define XML_TOK_POUND_NAME 20 /* #name */ 88 + #define XML_TOK_OR 21 /* | */ 89 + #define XML_TOK_PERCENT 22 90 + #define XML_TOK_OPEN_PAREN 23 91 + #define XML_TOK_CLOSE_PAREN 24 92 + #define XML_TOK_OPEN_BRACKET 25 93 + #define XML_TOK_CLOSE_BRACKET 26 94 + #define XML_TOK_LITERAL 27 95 + #define XML_TOK_PARAM_ENTITY_REF 28 96 + #define XML_TOK_INSTANCE_START 29 97 + 98 + /* The following occur only in element type declarations */ 99 + #define XML_TOK_NAME_QUESTION 30 /* name? */ 100 + #define XML_TOK_NAME_ASTERISK 31 /* name* */ 101 + #define XML_TOK_NAME_PLUS 32 /* name+ */ 102 + #define XML_TOK_COND_SECT_OPEN 33 /* <![ */ 103 + #define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */ 104 + #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ 105 + #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ 106 + #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ 107 + #define XML_TOK_COMMA 38 108 + 109 + /* The following token is returned only by XmlAttributeValueTok */ 110 + #define XML_TOK_ATTRIBUTE_VALUE_S 39 111 + 112 + /* The following token is returned only by XmlCdataSectionTok */ 113 + #define XML_TOK_CDATA_SECT_CLOSE 40 114 + 115 + /* With namespace processing this is returned by XmlPrologTok for a 116 + name with a colon. 117 + */ 118 + #define XML_TOK_PREFIXED_NAME 41 119 + 120 + #ifdef XML_DTD 121 + #define XML_TOK_IGNORE_SECT 42 122 + #endif /* XML_DTD */ 123 + 124 + #ifdef XML_DTD 125 + #define XML_N_STATES 4 126 + #else /* not XML_DTD */ 127 + #define XML_N_STATES 3 128 + #endif /* not XML_DTD */ 129 + 130 + #define XML_PROLOG_STATE 0 131 + #define XML_CONTENT_STATE 1 132 + #define XML_CDATA_SECTION_STATE 2 133 + #ifdef XML_DTD 134 + #define XML_IGNORE_SECTION_STATE 3 135 + #endif /* XML_DTD */ 136 + 137 + #define XML_N_LITERAL_TYPES 2 138 + #define XML_ATTRIBUTE_VALUE_LITERAL 0 139 + #define XML_ENTITY_VALUE_LITERAL 1 140 + 141 + /* The size of the buffer passed to XmlUtf8Encode must be at least this. */ 142 + #define XML_UTF8_ENCODE_MAX 4 143 + /* The size of the buffer passed to XmlUtf16Encode must be at least this. */ 144 + #define XML_UTF16_ENCODE_MAX 2 145 + 146 + typedef struct position { 147 + /* first line and first column are 0 not 1 */ 148 + XML_Size lineNumber; 149 + XML_Size columnNumber; 150 + } POSITION; 151 + 152 + typedef struct { 153 + const char* name; 154 + const char* valuePtr; 155 + const char* valueEnd; 156 + char normalized; 157 + } ATTRIBUTE; 158 + 159 + struct encoding; 160 + typedef struct encoding ENCODING; 161 + 162 + typedef int(PTRCALL* SCANNER)(const ENCODING*, const char*, const char*, const char**); 163 + 164 + enum XML_Convert_Result { 165 + XML_CONVERT_COMPLETED = 0, 166 + XML_CONVERT_INPUT_INCOMPLETE = 1, 167 + XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */ 168 + }; 169 + 170 + struct encoding { 171 + SCANNER scanners[XML_N_STATES]; 172 + SCANNER literalScanners[XML_N_LITERAL_TYPES]; 173 + int(PTRCALL* nameMatchesAscii)(const ENCODING*, const char*, const char*, const char*); 174 + int(PTRFASTCALL* nameLength)(const ENCODING*, const char*); 175 + const char*(PTRFASTCALL* skipS)(const ENCODING*, const char*); 176 + int(PTRCALL* getAtts)(const ENCODING* enc, const char* ptr, int attsMax, ATTRIBUTE* atts); 177 + int(PTRFASTCALL* charRefNumber)(const ENCODING* enc, const char* ptr); 178 + int(PTRCALL* predefinedEntityName)(const ENCODING*, const char*, const char*); 179 + void(PTRCALL* updatePosition)(const ENCODING*, const char* ptr, const char* end, POSITION*); 180 + int(PTRCALL* isPublicId)(const ENCODING* enc, const char* ptr, const char* end, const char** badPtr); 181 + enum XML_Convert_Result(PTRCALL* utf8Convert)(const ENCODING* enc, const char** fromP, const char* fromLim, 182 + char** toP, const char* toLim); 183 + enum XML_Convert_Result(PTRCALL* utf16Convert)(const ENCODING* enc, const char** fromP, const char* fromLim, 184 + unsigned short** toP, const unsigned short* toLim); 185 + int minBytesPerChar; 186 + char isUtf8; 187 + char isUtf16; 188 + }; 189 + 190 + /* Scan the string starting at ptr until the end of the next complete 191 + token, but do not scan past eptr. Return an integer giving the 192 + type of token. 193 + 194 + Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. 195 + 196 + Return XML_TOK_PARTIAL when the string does not contain a complete 197 + token; nextTokPtr will not be set. 198 + 199 + Return XML_TOK_INVALID when the string does not start a valid 200 + token; nextTokPtr will be set to point to the character which made 201 + the token invalid. 202 + 203 + Otherwise the string starts with a valid token; nextTokPtr will be 204 + set to point to the character following the end of that token. 205 + 206 + Each data character counts as a single token, but adjacent data 207 + characters may be returned together. Similarly for characters in 208 + the prolog outside literals, comments and processing instructions. 209 + */ 210 + 211 + #define XmlTok(enc, state, ptr, end, nextTokPtr) (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) 212 + 213 + #define XmlPrologTok(enc, ptr, end, nextTokPtr) XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) 214 + 215 + #define XmlContentTok(enc, ptr, end, nextTokPtr) XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) 216 + 217 + #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) 218 + 219 + #ifdef XML_DTD 220 + 221 + #define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) 222 + 223 + #endif /* XML_DTD */ 224 + 225 + /* This is used for performing a 2nd-level tokenization on the content 226 + of a literal that has already been returned by XmlTok. 227 + */ 228 + #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ 229 + (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) 230 + 231 + #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ 232 + XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) 233 + 234 + #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) 235 + 236 + #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) 237 + 238 + #define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr)) 239 + 240 + #define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr)) 241 + 242 + #define XmlGetAttributes(enc, ptr, attsMax, atts) (((enc)->getAtts)(enc, ptr, attsMax, atts)) 243 + 244 + #define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr)) 245 + 246 + #define XmlPredefinedEntityName(enc, ptr, end) (((enc)->predefinedEntityName)(enc, ptr, end)) 247 + 248 + #define XmlUpdatePosition(enc, ptr, end, pos) (((enc)->updatePosition)(enc, ptr, end, pos)) 249 + 250 + #define XmlIsPublicId(enc, ptr, end, badPtr) (((enc)->isPublicId)(enc, ptr, end, badPtr)) 251 + 252 + #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) 253 + 254 + #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) 255 + 256 + typedef struct { 257 + ENCODING initEnc; 258 + const ENCODING** encPtr; 259 + } INIT_ENCODING; 260 + 261 + int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING* enc, const char* ptr, const char* end, const char** badPtr, 262 + const char** versionPtr, const char** versionEndPtr, const char** encodingNamePtr, 263 + const ENCODING** namedEncodingPtr, int* standalonePtr); 264 + 265 + int XmlInitEncoding(INIT_ENCODING* p, const ENCODING** encPtr, const char* name); 266 + const ENCODING* XmlGetUtf8InternalEncoding(void); 267 + const ENCODING* XmlGetUtf16InternalEncoding(void); 268 + int FASTCALL XmlUtf8Encode(int charNumber, char* buf); 269 + int FASTCALL XmlUtf16Encode(int charNumber, unsigned short* buf); 270 + int XmlSizeOfUnknownEncoding(void); 271 + 272 + typedef int(XMLCALL* CONVERTER)(void* userData, const char* p); 273 + 274 + ENCODING* XmlInitUnknownEncoding(void* mem, const int* table, CONVERTER convert, void* userData); 275 + 276 + int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING* enc, const char* ptr, const char* end, 277 + const char** badPtr, const char** versionPtr, const char** versionEndPtr, 278 + const char** encodingNamePtr, const ENCODING** namedEncodingPtr, int* standalonePtr); 279 + 280 + int XmlInitEncodingNS(INIT_ENCODING* p, const ENCODING** encPtr, const char* name); 281 + const ENCODING* XmlGetUtf8InternalEncodingNS(void); 282 + const ENCODING* XmlGetUtf16InternalEncodingNS(void); 283 + ENCODING* XmlInitUnknownEncodingNS(void* mem, const int* table, CONVERTER convert, void* userData); 284 + #ifdef __cplusplus 285 + } 286 + #endif 287 + 288 + #endif /* not XmlTok_INCLUDED */
+1719
lib/expat/xmltok_impl.c
··· 1 + /* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)! 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 + Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 13 + Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> 14 + Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 15 + Copyright (c) 2018 Benjamin Peterson <benjamin@python.org> 16 + Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com> 17 + Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 18 + Copyright (c) 2020 Boris Kolpackov <boris@codesynthesis.com> 19 + Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com> 20 + Licensed under the MIT license: 21 + 22 + Permission is hereby granted, free of charge, to any person obtaining 23 + a copy of this software and associated documentation files (the 24 + "Software"), to deal in the Software without restriction, including 25 + without limitation the rights to use, copy, modify, merge, publish, 26 + distribute, sublicense, and/or sell copies of the Software, and to permit 27 + persons to whom the Software is furnished to do so, subject to the 28 + following conditions: 29 + 30 + The above copyright notice and this permission notice shall be included 31 + in all copies or substantial portions of the Software. 32 + 33 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 34 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 35 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 36 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 37 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 38 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 39 + USE OR OTHER DEALINGS IN THE SOFTWARE. 40 + */ 41 + 42 + #ifdef XML_TOK_IMPL_C 43 + 44 + #ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined 45 + #define IS_INVALID_CHAR(enc, ptr, n) (0) 46 + #endif 47 + 48 + #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ 49 + case BT_LEAD##n: \ 50 + if (end - ptr < n) return XML_TOK_PARTIAL_CHAR; \ 51 + if (IS_INVALID_CHAR(enc, ptr, n)) { \ 52 + *(nextTokPtr) = (ptr); \ 53 + return XML_TOK_INVALID; \ 54 + } \ 55 + ptr += n; \ 56 + break; 57 + 58 + #define INVALID_CASES(ptr, nextTokPtr) \ 59 + INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ 60 + INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ 61 + INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ 62 + case BT_NONXML: \ 63 + case BT_MALFORM: \ 64 + case BT_TRAIL: \ 65 + *(nextTokPtr) = (ptr); \ 66 + return XML_TOK_INVALID; 67 + 68 + #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ 69 + case BT_LEAD##n: \ 70 + if (end - ptr < n) return XML_TOK_PARTIAL_CHAR; \ 71 + if (IS_INVALID_CHAR(enc, ptr, n) || !IS_NAME_CHAR(enc, ptr, n)) { \ 72 + *nextTokPtr = ptr; \ 73 + return XML_TOK_INVALID; \ 74 + } \ 75 + ptr += n; \ 76 + break; 77 + 78 + #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ 79 + case BT_NONASCII: \ 80 + if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ 81 + *nextTokPtr = ptr; \ 82 + return XML_TOK_INVALID; \ 83 + } \ 84 + /* fall through */ \ 85 + case BT_NMSTRT: \ 86 + case BT_HEX: \ 87 + case BT_DIGIT: \ 88 + case BT_NAME: \ 89 + case BT_MINUS: \ 90 + ptr += MINBPC(enc); \ 91 + break; \ 92 + CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ 93 + CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ 94 + CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) 95 + 96 + #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ 97 + case BT_LEAD##n: \ 98 + if ((end) - (ptr) < (n)) return XML_TOK_PARTIAL_CHAR; \ 99 + if (IS_INVALID_CHAR(enc, ptr, n) || !IS_NMSTRT_CHAR(enc, ptr, n)) { \ 100 + *nextTokPtr = ptr; \ 101 + return XML_TOK_INVALID; \ 102 + } \ 103 + ptr += n; \ 104 + break; 105 + 106 + #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ 107 + case BT_NONASCII: \ 108 + if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ 109 + *nextTokPtr = ptr; \ 110 + return XML_TOK_INVALID; \ 111 + } \ 112 + /* fall through */ \ 113 + case BT_NMSTRT: \ 114 + case BT_HEX: \ 115 + ptr += MINBPC(enc); \ 116 + break; \ 117 + CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ 118 + CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ 119 + CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) 120 + 121 + #ifndef PREFIX 122 + #define PREFIX(ident) ident 123 + #endif 124 + 125 + #define HAS_CHARS(enc, ptr, end, count) ((end) - (ptr) >= ((count) * MINBPC(enc))) 126 + 127 + #define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1) 128 + 129 + #define REQUIRE_CHARS(enc, ptr, end, count) \ 130 + { \ 131 + if (!HAS_CHARS(enc, ptr, end, count)) { \ 132 + return XML_TOK_PARTIAL; \ 133 + } \ 134 + } 135 + 136 + #define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1) 137 + 138 + /* ptr points to character following "<!-" */ 139 + 140 + static int PTRCALL PREFIX(scanComment)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 141 + if (HAS_CHAR(enc, ptr, end)) { 142 + if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 143 + *nextTokPtr = ptr; 144 + return XML_TOK_INVALID; 145 + } 146 + ptr += MINBPC(enc); 147 + while (HAS_CHAR(enc, ptr, end)) { 148 + switch (BYTE_TYPE(enc, ptr)) { 149 + INVALID_CASES(ptr, nextTokPtr) 150 + case BT_MINUS: 151 + ptr += MINBPC(enc); 152 + REQUIRE_CHAR(enc, ptr, end); 153 + if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 154 + ptr += MINBPC(enc); 155 + REQUIRE_CHAR(enc, ptr, end); 156 + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 157 + *nextTokPtr = ptr; 158 + return XML_TOK_INVALID; 159 + } 160 + *nextTokPtr = ptr + MINBPC(enc); 161 + return XML_TOK_COMMENT; 162 + } 163 + break; 164 + default: 165 + ptr += MINBPC(enc); 166 + break; 167 + } 168 + } 169 + } 170 + return XML_TOK_PARTIAL; 171 + } 172 + 173 + /* ptr points to character following "<!" */ 174 + 175 + static int PTRCALL PREFIX(scanDecl)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 176 + REQUIRE_CHAR(enc, ptr, end); 177 + switch (BYTE_TYPE(enc, ptr)) { 178 + case BT_MINUS: 179 + return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 180 + case BT_LSQB: 181 + *nextTokPtr = ptr + MINBPC(enc); 182 + return XML_TOK_COND_SECT_OPEN; 183 + case BT_NMSTRT: 184 + case BT_HEX: 185 + ptr += MINBPC(enc); 186 + break; 187 + default: 188 + *nextTokPtr = ptr; 189 + return XML_TOK_INVALID; 190 + } 191 + while (HAS_CHAR(enc, ptr, end)) { 192 + switch (BYTE_TYPE(enc, ptr)) { 193 + case BT_PERCNT: 194 + REQUIRE_CHARS(enc, ptr, end, 2); 195 + /* don't allow <!ENTITY% foo "whatever"> */ 196 + switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { 197 + case BT_S: 198 + case BT_CR: 199 + case BT_LF: 200 + case BT_PERCNT: 201 + *nextTokPtr = ptr; 202 + return XML_TOK_INVALID; 203 + } 204 + /* fall through */ 205 + case BT_S: 206 + case BT_CR: 207 + case BT_LF: 208 + *nextTokPtr = ptr; 209 + return XML_TOK_DECL_OPEN; 210 + case BT_NMSTRT: 211 + case BT_HEX: 212 + ptr += MINBPC(enc); 213 + break; 214 + default: 215 + *nextTokPtr = ptr; 216 + return XML_TOK_INVALID; 217 + } 218 + } 219 + return XML_TOK_PARTIAL; 220 + } 221 + 222 + static int PTRCALL PREFIX(checkPiTarget)(const ENCODING* enc, const char* ptr, const char* end, int* tokPtr) { 223 + int upper = 0; 224 + UNUSED_P(enc); 225 + *tokPtr = XML_TOK_PI; 226 + if (end - ptr != MINBPC(enc) * 3) return 1; 227 + switch (BYTE_TO_ASCII(enc, ptr)) { 228 + case ASCII_x: 229 + break; 230 + case ASCII_X: 231 + upper = 1; 232 + break; 233 + default: 234 + return 1; 235 + } 236 + ptr += MINBPC(enc); 237 + switch (BYTE_TO_ASCII(enc, ptr)) { 238 + case ASCII_m: 239 + break; 240 + case ASCII_M: 241 + upper = 1; 242 + break; 243 + default: 244 + return 1; 245 + } 246 + ptr += MINBPC(enc); 247 + switch (BYTE_TO_ASCII(enc, ptr)) { 248 + case ASCII_l: 249 + break; 250 + case ASCII_L: 251 + upper = 1; 252 + break; 253 + default: 254 + return 1; 255 + } 256 + if (upper) return 0; 257 + *tokPtr = XML_TOK_XML_DECL; 258 + return 1; 259 + } 260 + 261 + /* ptr points to character following "<?" */ 262 + 263 + static int PTRCALL PREFIX(scanPi)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 264 + int tok; 265 + const char* target = ptr; 266 + REQUIRE_CHAR(enc, ptr, end); 267 + switch (BYTE_TYPE(enc, ptr)) { 268 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 269 + default: 270 + *nextTokPtr = ptr; 271 + return XML_TOK_INVALID; 272 + } 273 + while (HAS_CHAR(enc, ptr, end)) { 274 + switch (BYTE_TYPE(enc, ptr)) { 275 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 276 + case BT_S: 277 + case BT_CR: 278 + case BT_LF: 279 + if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 280 + *nextTokPtr = ptr; 281 + return XML_TOK_INVALID; 282 + } 283 + ptr += MINBPC(enc); 284 + while (HAS_CHAR(enc, ptr, end)) { 285 + switch (BYTE_TYPE(enc, ptr)) { 286 + INVALID_CASES(ptr, nextTokPtr) 287 + case BT_QUEST: 288 + ptr += MINBPC(enc); 289 + REQUIRE_CHAR(enc, ptr, end); 290 + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 291 + *nextTokPtr = ptr + MINBPC(enc); 292 + return tok; 293 + } 294 + break; 295 + default: 296 + ptr += MINBPC(enc); 297 + break; 298 + } 299 + } 300 + return XML_TOK_PARTIAL; 301 + case BT_QUEST: 302 + if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 303 + *nextTokPtr = ptr; 304 + return XML_TOK_INVALID; 305 + } 306 + ptr += MINBPC(enc); 307 + REQUIRE_CHAR(enc, ptr, end); 308 + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 309 + *nextTokPtr = ptr + MINBPC(enc); 310 + return tok; 311 + } 312 + /* fall through */ 313 + default: 314 + *nextTokPtr = ptr; 315 + return XML_TOK_INVALID; 316 + } 317 + } 318 + return XML_TOK_PARTIAL; 319 + } 320 + 321 + static int PTRCALL PREFIX(scanCdataSection)(const ENCODING* enc, const char* ptr, const char* end, 322 + const char** nextTokPtr) { 323 + static const char CDATA_LSQB[] = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB}; 324 + int i; 325 + UNUSED_P(enc); 326 + /* CDATA[ */ 327 + REQUIRE_CHARS(enc, ptr, end, 6); 328 + for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { 329 + if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { 330 + *nextTokPtr = ptr; 331 + return XML_TOK_INVALID; 332 + } 333 + } 334 + *nextTokPtr = ptr; 335 + return XML_TOK_CDATA_SECT_OPEN; 336 + } 337 + 338 + static int PTRCALL PREFIX(cdataSectionTok)(const ENCODING* enc, const char* ptr, const char* end, 339 + const char** nextTokPtr) { 340 + if (ptr >= end) return XML_TOK_NONE; 341 + if (MINBPC(enc) > 1) { 342 + size_t n = end - ptr; 343 + if (n & (MINBPC(enc) - 1)) { 344 + n &= ~(MINBPC(enc) - 1); 345 + if (n == 0) return XML_TOK_PARTIAL; 346 + end = ptr + n; 347 + } 348 + } 349 + switch (BYTE_TYPE(enc, ptr)) { 350 + case BT_RSQB: 351 + ptr += MINBPC(enc); 352 + REQUIRE_CHAR(enc, ptr, end); 353 + if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; 354 + ptr += MINBPC(enc); 355 + REQUIRE_CHAR(enc, ptr, end); 356 + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 357 + ptr -= MINBPC(enc); 358 + break; 359 + } 360 + *nextTokPtr = ptr + MINBPC(enc); 361 + return XML_TOK_CDATA_SECT_CLOSE; 362 + case BT_CR: 363 + ptr += MINBPC(enc); 364 + REQUIRE_CHAR(enc, ptr, end); 365 + if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); 366 + *nextTokPtr = ptr; 367 + return XML_TOK_DATA_NEWLINE; 368 + case BT_LF: 369 + *nextTokPtr = ptr + MINBPC(enc); 370 + return XML_TOK_DATA_NEWLINE; 371 + INVALID_CASES(ptr, nextTokPtr) 372 + default: 373 + ptr += MINBPC(enc); 374 + break; 375 + } 376 + while (HAS_CHAR(enc, ptr, end)) { 377 + switch (BYTE_TYPE(enc, ptr)) { 378 + #define LEAD_CASE(n) \ 379 + case BT_LEAD##n: \ 380 + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 381 + *nextTokPtr = ptr; \ 382 + return XML_TOK_DATA_CHARS; \ 383 + } \ 384 + ptr += n; \ 385 + break; 386 + LEAD_CASE(2) 387 + LEAD_CASE(3) 388 + LEAD_CASE(4) 389 + #undef LEAD_CASE 390 + case BT_NONXML: 391 + case BT_MALFORM: 392 + case BT_TRAIL: 393 + case BT_CR: 394 + case BT_LF: 395 + case BT_RSQB: 396 + *nextTokPtr = ptr; 397 + return XML_TOK_DATA_CHARS; 398 + default: 399 + ptr += MINBPC(enc); 400 + break; 401 + } 402 + } 403 + *nextTokPtr = ptr; 404 + return XML_TOK_DATA_CHARS; 405 + } 406 + 407 + /* ptr points to character following "</" */ 408 + 409 + static int PTRCALL PREFIX(scanEndTag)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 410 + REQUIRE_CHAR(enc, ptr, end); 411 + switch (BYTE_TYPE(enc, ptr)) { 412 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 413 + default: 414 + *nextTokPtr = ptr; 415 + return XML_TOK_INVALID; 416 + } 417 + while (HAS_CHAR(enc, ptr, end)) { 418 + switch (BYTE_TYPE(enc, ptr)) { 419 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 420 + case BT_S: 421 + case BT_CR: 422 + case BT_LF: 423 + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { 424 + switch (BYTE_TYPE(enc, ptr)) { 425 + case BT_S: 426 + case BT_CR: 427 + case BT_LF: 428 + break; 429 + case BT_GT: 430 + *nextTokPtr = ptr + MINBPC(enc); 431 + return XML_TOK_END_TAG; 432 + default: 433 + *nextTokPtr = ptr; 434 + return XML_TOK_INVALID; 435 + } 436 + } 437 + return XML_TOK_PARTIAL; 438 + #ifdef XML_NS 439 + case BT_COLON: 440 + /* no need to check qname syntax here, 441 + since end-tag must match exactly */ 442 + ptr += MINBPC(enc); 443 + break; 444 + #endif 445 + case BT_GT: 446 + *nextTokPtr = ptr + MINBPC(enc); 447 + return XML_TOK_END_TAG; 448 + default: 449 + *nextTokPtr = ptr; 450 + return XML_TOK_INVALID; 451 + } 452 + } 453 + return XML_TOK_PARTIAL; 454 + } 455 + 456 + /* ptr points to character following "&#X" */ 457 + 458 + static int PTRCALL PREFIX(scanHexCharRef)(const ENCODING* enc, const char* ptr, const char* end, 459 + const char** nextTokPtr) { 460 + if (HAS_CHAR(enc, ptr, end)) { 461 + switch (BYTE_TYPE(enc, ptr)) { 462 + case BT_DIGIT: 463 + case BT_HEX: 464 + break; 465 + default: 466 + *nextTokPtr = ptr; 467 + return XML_TOK_INVALID; 468 + } 469 + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { 470 + switch (BYTE_TYPE(enc, ptr)) { 471 + case BT_DIGIT: 472 + case BT_HEX: 473 + break; 474 + case BT_SEMI: 475 + *nextTokPtr = ptr + MINBPC(enc); 476 + return XML_TOK_CHAR_REF; 477 + default: 478 + *nextTokPtr = ptr; 479 + return XML_TOK_INVALID; 480 + } 481 + } 482 + } 483 + return XML_TOK_PARTIAL; 484 + } 485 + 486 + /* ptr points to character following "&#" */ 487 + 488 + static int PTRCALL PREFIX(scanCharRef)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 489 + if (HAS_CHAR(enc, ptr, end)) { 490 + if (CHAR_MATCHES(enc, ptr, ASCII_x)) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 491 + switch (BYTE_TYPE(enc, ptr)) { 492 + case BT_DIGIT: 493 + break; 494 + default: 495 + *nextTokPtr = ptr; 496 + return XML_TOK_INVALID; 497 + } 498 + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { 499 + switch (BYTE_TYPE(enc, ptr)) { 500 + case BT_DIGIT: 501 + break; 502 + case BT_SEMI: 503 + *nextTokPtr = ptr + MINBPC(enc); 504 + return XML_TOK_CHAR_REF; 505 + default: 506 + *nextTokPtr = ptr; 507 + return XML_TOK_INVALID; 508 + } 509 + } 510 + } 511 + return XML_TOK_PARTIAL; 512 + } 513 + 514 + /* ptr points to character following "&" */ 515 + 516 + static int PTRCALL PREFIX(scanRef)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 517 + REQUIRE_CHAR(enc, ptr, end); 518 + switch (BYTE_TYPE(enc, ptr)) { 519 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 520 + case BT_NUM: 521 + return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 522 + default: 523 + *nextTokPtr = ptr; 524 + return XML_TOK_INVALID; 525 + } 526 + while (HAS_CHAR(enc, ptr, end)) { 527 + switch (BYTE_TYPE(enc, ptr)) { 528 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 529 + case BT_SEMI: 530 + *nextTokPtr = ptr + MINBPC(enc); 531 + return XML_TOK_ENTITY_REF; 532 + default: 533 + *nextTokPtr = ptr; 534 + return XML_TOK_INVALID; 535 + } 536 + } 537 + return XML_TOK_PARTIAL; 538 + } 539 + 540 + /* ptr points to character following first character of attribute name */ 541 + 542 + static int PTRCALL PREFIX(scanAtts)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 543 + #ifdef XML_NS 544 + int hadColon = 0; 545 + #endif 546 + while (HAS_CHAR(enc, ptr, end)) { 547 + switch (BYTE_TYPE(enc, ptr)) { 548 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 549 + #ifdef XML_NS 550 + case BT_COLON: 551 + if (hadColon) { 552 + *nextTokPtr = ptr; 553 + return XML_TOK_INVALID; 554 + } 555 + hadColon = 1; 556 + ptr += MINBPC(enc); 557 + REQUIRE_CHAR(enc, ptr, end); 558 + switch (BYTE_TYPE(enc, ptr)) { 559 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 560 + default: 561 + *nextTokPtr = ptr; 562 + return XML_TOK_INVALID; 563 + } 564 + break; 565 + #endif 566 + case BT_S: 567 + case BT_CR: 568 + case BT_LF: 569 + for (;;) { 570 + int t; 571 + 572 + ptr += MINBPC(enc); 573 + REQUIRE_CHAR(enc, ptr, end); 574 + t = BYTE_TYPE(enc, ptr); 575 + if (t == BT_EQUALS) break; 576 + switch (t) { 577 + case BT_S: 578 + case BT_LF: 579 + case BT_CR: 580 + break; 581 + default: 582 + *nextTokPtr = ptr; 583 + return XML_TOK_INVALID; 584 + } 585 + } 586 + /* fall through */ 587 + case BT_EQUALS: { 588 + int open; 589 + #ifdef XML_NS 590 + hadColon = 0; 591 + #endif 592 + for (;;) { 593 + ptr += MINBPC(enc); 594 + REQUIRE_CHAR(enc, ptr, end); 595 + open = BYTE_TYPE(enc, ptr); 596 + if (open == BT_QUOT || open == BT_APOS) break; 597 + switch (open) { 598 + case BT_S: 599 + case BT_LF: 600 + case BT_CR: 601 + break; 602 + default: 603 + *nextTokPtr = ptr; 604 + return XML_TOK_INVALID; 605 + } 606 + } 607 + ptr += MINBPC(enc); 608 + /* in attribute value */ 609 + for (;;) { 610 + int t; 611 + REQUIRE_CHAR(enc, ptr, end); 612 + t = BYTE_TYPE(enc, ptr); 613 + if (t == open) break; 614 + switch (t) { 615 + INVALID_CASES(ptr, nextTokPtr) 616 + case BT_AMP: { 617 + int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); 618 + if (tok <= 0) { 619 + if (tok == XML_TOK_INVALID) *nextTokPtr = ptr; 620 + return tok; 621 + } 622 + break; 623 + } 624 + case BT_LT: 625 + *nextTokPtr = ptr; 626 + return XML_TOK_INVALID; 627 + default: 628 + ptr += MINBPC(enc); 629 + break; 630 + } 631 + } 632 + ptr += MINBPC(enc); 633 + REQUIRE_CHAR(enc, ptr, end); 634 + switch (BYTE_TYPE(enc, ptr)) { 635 + case BT_S: 636 + case BT_CR: 637 + case BT_LF: 638 + break; 639 + case BT_SOL: 640 + goto sol; 641 + case BT_GT: 642 + goto gt; 643 + default: 644 + *nextTokPtr = ptr; 645 + return XML_TOK_INVALID; 646 + } 647 + /* ptr points to closing quote */ 648 + for (;;) { 649 + ptr += MINBPC(enc); 650 + REQUIRE_CHAR(enc, ptr, end); 651 + switch (BYTE_TYPE(enc, ptr)) { 652 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 653 + case BT_S: 654 + case BT_CR: 655 + case BT_LF: 656 + continue; 657 + case BT_GT: 658 + gt: 659 + *nextTokPtr = ptr + MINBPC(enc); 660 + return XML_TOK_START_TAG_WITH_ATTS; 661 + case BT_SOL: 662 + sol: 663 + ptr += MINBPC(enc); 664 + REQUIRE_CHAR(enc, ptr, end); 665 + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 666 + *nextTokPtr = ptr; 667 + return XML_TOK_INVALID; 668 + } 669 + *nextTokPtr = ptr + MINBPC(enc); 670 + return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; 671 + default: 672 + *nextTokPtr = ptr; 673 + return XML_TOK_INVALID; 674 + } 675 + break; 676 + } 677 + break; 678 + } 679 + default: 680 + *nextTokPtr = ptr; 681 + return XML_TOK_INVALID; 682 + } 683 + } 684 + return XML_TOK_PARTIAL; 685 + } 686 + 687 + /* ptr points to character following "<" */ 688 + 689 + static int PTRCALL PREFIX(scanLt)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 690 + #ifdef XML_NS 691 + int hadColon; 692 + #endif 693 + REQUIRE_CHAR(enc, ptr, end); 694 + switch (BYTE_TYPE(enc, ptr)) { 695 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 696 + case BT_EXCL: 697 + ptr += MINBPC(enc); 698 + REQUIRE_CHAR(enc, ptr, end); 699 + switch (BYTE_TYPE(enc, ptr)) { 700 + case BT_MINUS: 701 + return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 702 + case BT_LSQB: 703 + return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); 704 + } 705 + *nextTokPtr = ptr; 706 + return XML_TOK_INVALID; 707 + case BT_QUEST: 708 + return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 709 + case BT_SOL: 710 + return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); 711 + default: 712 + *nextTokPtr = ptr; 713 + return XML_TOK_INVALID; 714 + } 715 + #ifdef XML_NS 716 + hadColon = 0; 717 + #endif 718 + /* we have a start-tag */ 719 + while (HAS_CHAR(enc, ptr, end)) { 720 + switch (BYTE_TYPE(enc, ptr)) { 721 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 722 + #ifdef XML_NS 723 + case BT_COLON: 724 + if (hadColon) { 725 + *nextTokPtr = ptr; 726 + return XML_TOK_INVALID; 727 + } 728 + hadColon = 1; 729 + ptr += MINBPC(enc); 730 + REQUIRE_CHAR(enc, ptr, end); 731 + switch (BYTE_TYPE(enc, ptr)) { 732 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 733 + default: 734 + *nextTokPtr = ptr; 735 + return XML_TOK_INVALID; 736 + } 737 + break; 738 + #endif 739 + case BT_S: 740 + case BT_CR: 741 + case BT_LF: { 742 + ptr += MINBPC(enc); 743 + while (HAS_CHAR(enc, ptr, end)) { 744 + switch (BYTE_TYPE(enc, ptr)) { 745 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 746 + case BT_GT: 747 + goto gt; 748 + case BT_SOL: 749 + goto sol; 750 + case BT_S: 751 + case BT_CR: 752 + case BT_LF: 753 + ptr += MINBPC(enc); 754 + continue; 755 + default: 756 + *nextTokPtr = ptr; 757 + return XML_TOK_INVALID; 758 + } 759 + return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); 760 + } 761 + return XML_TOK_PARTIAL; 762 + } 763 + case BT_GT: 764 + gt: 765 + *nextTokPtr = ptr + MINBPC(enc); 766 + return XML_TOK_START_TAG_NO_ATTS; 767 + case BT_SOL: 768 + sol: 769 + ptr += MINBPC(enc); 770 + REQUIRE_CHAR(enc, ptr, end); 771 + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 772 + *nextTokPtr = ptr; 773 + return XML_TOK_INVALID; 774 + } 775 + *nextTokPtr = ptr + MINBPC(enc); 776 + return XML_TOK_EMPTY_ELEMENT_NO_ATTS; 777 + default: 778 + *nextTokPtr = ptr; 779 + return XML_TOK_INVALID; 780 + } 781 + } 782 + return XML_TOK_PARTIAL; 783 + } 784 + 785 + static int PTRCALL PREFIX(contentTok)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 786 + if (ptr >= end) return XML_TOK_NONE; 787 + if (MINBPC(enc) > 1) { 788 + size_t n = end - ptr; 789 + if (n & (MINBPC(enc) - 1)) { 790 + n &= ~(MINBPC(enc) - 1); 791 + if (n == 0) return XML_TOK_PARTIAL; 792 + end = ptr + n; 793 + } 794 + } 795 + switch (BYTE_TYPE(enc, ptr)) { 796 + case BT_LT: 797 + return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); 798 + case BT_AMP: 799 + return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 800 + case BT_CR: 801 + ptr += MINBPC(enc); 802 + if (!HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; 803 + if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); 804 + *nextTokPtr = ptr; 805 + return XML_TOK_DATA_NEWLINE; 806 + case BT_LF: 807 + *nextTokPtr = ptr + MINBPC(enc); 808 + return XML_TOK_DATA_NEWLINE; 809 + case BT_RSQB: 810 + ptr += MINBPC(enc); 811 + if (!HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; 812 + if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; 813 + ptr += MINBPC(enc); 814 + if (!HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; 815 + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 816 + ptr -= MINBPC(enc); 817 + break; 818 + } 819 + *nextTokPtr = ptr; 820 + return XML_TOK_INVALID; 821 + INVALID_CASES(ptr, nextTokPtr) 822 + default: 823 + ptr += MINBPC(enc); 824 + break; 825 + } 826 + while (HAS_CHAR(enc, ptr, end)) { 827 + switch (BYTE_TYPE(enc, ptr)) { 828 + #define LEAD_CASE(n) \ 829 + case BT_LEAD##n: \ 830 + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 831 + *nextTokPtr = ptr; \ 832 + return XML_TOK_DATA_CHARS; \ 833 + } \ 834 + ptr += n; \ 835 + break; 836 + LEAD_CASE(2) 837 + LEAD_CASE(3) 838 + LEAD_CASE(4) 839 + #undef LEAD_CASE 840 + case BT_RSQB: 841 + if (HAS_CHARS(enc, ptr, end, 2)) { 842 + if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { 843 + ptr += MINBPC(enc); 844 + break; 845 + } 846 + if (HAS_CHARS(enc, ptr, end, 3)) { 847 + if (!CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) { 848 + ptr += MINBPC(enc); 849 + break; 850 + } 851 + *nextTokPtr = ptr + 2 * MINBPC(enc); 852 + return XML_TOK_INVALID; 853 + } 854 + } 855 + /* fall through */ 856 + case BT_AMP: 857 + case BT_LT: 858 + case BT_NONXML: 859 + case BT_MALFORM: 860 + case BT_TRAIL: 861 + case BT_CR: 862 + case BT_LF: 863 + *nextTokPtr = ptr; 864 + return XML_TOK_DATA_CHARS; 865 + default: 866 + ptr += MINBPC(enc); 867 + break; 868 + } 869 + } 870 + *nextTokPtr = ptr; 871 + return XML_TOK_DATA_CHARS; 872 + } 873 + 874 + /* ptr points to character following "%" */ 875 + 876 + static int PTRCALL PREFIX(scanPercent)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 877 + REQUIRE_CHAR(enc, ptr, end); 878 + switch (BYTE_TYPE(enc, ptr)) { 879 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 880 + case BT_S: 881 + case BT_LF: 882 + case BT_CR: 883 + case BT_PERCNT: 884 + *nextTokPtr = ptr; 885 + return XML_TOK_PERCENT; 886 + default: 887 + *nextTokPtr = ptr; 888 + return XML_TOK_INVALID; 889 + } 890 + while (HAS_CHAR(enc, ptr, end)) { 891 + switch (BYTE_TYPE(enc, ptr)) { 892 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 893 + case BT_SEMI: 894 + *nextTokPtr = ptr + MINBPC(enc); 895 + return XML_TOK_PARAM_ENTITY_REF; 896 + default: 897 + *nextTokPtr = ptr; 898 + return XML_TOK_INVALID; 899 + } 900 + } 901 + return XML_TOK_PARTIAL; 902 + } 903 + 904 + static int PTRCALL PREFIX(scanPoundName)(const ENCODING* enc, const char* ptr, const char* end, 905 + const char** nextTokPtr) { 906 + REQUIRE_CHAR(enc, ptr, end); 907 + switch (BYTE_TYPE(enc, ptr)) { 908 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 909 + default: 910 + *nextTokPtr = ptr; 911 + return XML_TOK_INVALID; 912 + } 913 + while (HAS_CHAR(enc, ptr, end)) { 914 + switch (BYTE_TYPE(enc, ptr)) { 915 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 916 + case BT_CR: 917 + case BT_LF: 918 + case BT_S: 919 + case BT_RPAR: 920 + case BT_GT: 921 + case BT_PERCNT: 922 + case BT_VERBAR: 923 + *nextTokPtr = ptr; 924 + return XML_TOK_POUND_NAME; 925 + default: 926 + *nextTokPtr = ptr; 927 + return XML_TOK_INVALID; 928 + } 929 + } 930 + return -XML_TOK_POUND_NAME; 931 + } 932 + 933 + static int PTRCALL PREFIX(scanLit)(int open, const ENCODING* enc, const char* ptr, const char* end, 934 + const char** nextTokPtr) { 935 + while (HAS_CHAR(enc, ptr, end)) { 936 + int t = BYTE_TYPE(enc, ptr); 937 + switch (t) { 938 + INVALID_CASES(ptr, nextTokPtr) 939 + case BT_QUOT: 940 + case BT_APOS: 941 + ptr += MINBPC(enc); 942 + if (t != open) break; 943 + if (!HAS_CHAR(enc, ptr, end)) return -XML_TOK_LITERAL; 944 + *nextTokPtr = ptr; 945 + switch (BYTE_TYPE(enc, ptr)) { 946 + case BT_S: 947 + case BT_CR: 948 + case BT_LF: 949 + case BT_GT: 950 + case BT_PERCNT: 951 + case BT_LSQB: 952 + return XML_TOK_LITERAL; 953 + default: 954 + return XML_TOK_INVALID; 955 + } 956 + default: 957 + ptr += MINBPC(enc); 958 + break; 959 + } 960 + } 961 + return XML_TOK_PARTIAL; 962 + } 963 + 964 + static int PTRCALL PREFIX(prologTok)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 965 + int tok; 966 + if (ptr >= end) return XML_TOK_NONE; 967 + if (MINBPC(enc) > 1) { 968 + size_t n = end - ptr; 969 + if (n & (MINBPC(enc) - 1)) { 970 + n &= ~(MINBPC(enc) - 1); 971 + if (n == 0) return XML_TOK_PARTIAL; 972 + end = ptr + n; 973 + } 974 + } 975 + switch (BYTE_TYPE(enc, ptr)) { 976 + case BT_QUOT: 977 + return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); 978 + case BT_APOS: 979 + return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); 980 + case BT_LT: { 981 + ptr += MINBPC(enc); 982 + REQUIRE_CHAR(enc, ptr, end); 983 + switch (BYTE_TYPE(enc, ptr)) { 984 + case BT_EXCL: 985 + return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); 986 + case BT_QUEST: 987 + return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 988 + case BT_NMSTRT: 989 + case BT_HEX: 990 + case BT_NONASCII: 991 + case BT_LEAD2: 992 + case BT_LEAD3: 993 + case BT_LEAD4: 994 + *nextTokPtr = ptr - MINBPC(enc); 995 + return XML_TOK_INSTANCE_START; 996 + } 997 + *nextTokPtr = ptr; 998 + return XML_TOK_INVALID; 999 + } 1000 + case BT_CR: 1001 + if (ptr + MINBPC(enc) == end) { 1002 + *nextTokPtr = end; 1003 + /* indicate that this might be part of a CR/LF pair */ 1004 + return -XML_TOK_PROLOG_S; 1005 + } 1006 + /* fall through */ 1007 + case BT_S: 1008 + case BT_LF: 1009 + for (;;) { 1010 + ptr += MINBPC(enc); 1011 + if (!HAS_CHAR(enc, ptr, end)) break; 1012 + switch (BYTE_TYPE(enc, ptr)) { 1013 + case BT_S: 1014 + case BT_LF: 1015 + break; 1016 + case BT_CR: 1017 + /* don't split CR/LF pair */ 1018 + if (ptr + MINBPC(enc) != end) break; 1019 + /* fall through */ 1020 + default: 1021 + *nextTokPtr = ptr; 1022 + return XML_TOK_PROLOG_S; 1023 + } 1024 + } 1025 + *nextTokPtr = ptr; 1026 + return XML_TOK_PROLOG_S; 1027 + case BT_PERCNT: 1028 + return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1029 + case BT_COMMA: 1030 + *nextTokPtr = ptr + MINBPC(enc); 1031 + return XML_TOK_COMMA; 1032 + case BT_LSQB: 1033 + *nextTokPtr = ptr + MINBPC(enc); 1034 + return XML_TOK_OPEN_BRACKET; 1035 + case BT_RSQB: 1036 + ptr += MINBPC(enc); 1037 + if (!HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_BRACKET; 1038 + if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1039 + REQUIRE_CHARS(enc, ptr, end, 2); 1040 + if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { 1041 + *nextTokPtr = ptr + 2 * MINBPC(enc); 1042 + return XML_TOK_COND_SECT_CLOSE; 1043 + } 1044 + } 1045 + *nextTokPtr = ptr; 1046 + return XML_TOK_CLOSE_BRACKET; 1047 + case BT_LPAR: 1048 + *nextTokPtr = ptr + MINBPC(enc); 1049 + return XML_TOK_OPEN_PAREN; 1050 + case BT_RPAR: 1051 + ptr += MINBPC(enc); 1052 + if (!HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_PAREN; 1053 + switch (BYTE_TYPE(enc, ptr)) { 1054 + case BT_AST: 1055 + *nextTokPtr = ptr + MINBPC(enc); 1056 + return XML_TOK_CLOSE_PAREN_ASTERISK; 1057 + case BT_QUEST: 1058 + *nextTokPtr = ptr + MINBPC(enc); 1059 + return XML_TOK_CLOSE_PAREN_QUESTION; 1060 + case BT_PLUS: 1061 + *nextTokPtr = ptr + MINBPC(enc); 1062 + return XML_TOK_CLOSE_PAREN_PLUS; 1063 + case BT_CR: 1064 + case BT_LF: 1065 + case BT_S: 1066 + case BT_GT: 1067 + case BT_COMMA: 1068 + case BT_VERBAR: 1069 + case BT_RPAR: 1070 + *nextTokPtr = ptr; 1071 + return XML_TOK_CLOSE_PAREN; 1072 + } 1073 + *nextTokPtr = ptr; 1074 + return XML_TOK_INVALID; 1075 + case BT_VERBAR: 1076 + *nextTokPtr = ptr + MINBPC(enc); 1077 + return XML_TOK_OR; 1078 + case BT_GT: 1079 + *nextTokPtr = ptr + MINBPC(enc); 1080 + return XML_TOK_DECL_CLOSE; 1081 + case BT_NUM: 1082 + return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1083 + #define LEAD_CASE(n) \ 1084 + case BT_LEAD##n: \ 1085 + if (end - ptr < n) return XML_TOK_PARTIAL_CHAR; \ 1086 + if (IS_INVALID_CHAR(enc, ptr, n)) { \ 1087 + *nextTokPtr = ptr; \ 1088 + return XML_TOK_INVALID; \ 1089 + } \ 1090 + if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ 1091 + ptr += n; \ 1092 + tok = XML_TOK_NAME; \ 1093 + break; \ 1094 + } \ 1095 + if (IS_NAME_CHAR(enc, ptr, n)) { \ 1096 + ptr += n; \ 1097 + tok = XML_TOK_NMTOKEN; \ 1098 + break; \ 1099 + } \ 1100 + *nextTokPtr = ptr; \ 1101 + return XML_TOK_INVALID; 1102 + LEAD_CASE(2) 1103 + LEAD_CASE(3) 1104 + LEAD_CASE(4) 1105 + #undef LEAD_CASE 1106 + case BT_NMSTRT: 1107 + case BT_HEX: 1108 + tok = XML_TOK_NAME; 1109 + ptr += MINBPC(enc); 1110 + break; 1111 + case BT_DIGIT: 1112 + case BT_NAME: 1113 + case BT_MINUS: 1114 + #ifdef XML_NS 1115 + case BT_COLON: 1116 + #endif 1117 + tok = XML_TOK_NMTOKEN; 1118 + ptr += MINBPC(enc); 1119 + break; 1120 + case BT_NONASCII: 1121 + if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { 1122 + ptr += MINBPC(enc); 1123 + tok = XML_TOK_NAME; 1124 + break; 1125 + } 1126 + if (IS_NAME_CHAR_MINBPC(enc, ptr)) { 1127 + ptr += MINBPC(enc); 1128 + tok = XML_TOK_NMTOKEN; 1129 + break; 1130 + } 1131 + /* fall through */ 1132 + default: 1133 + *nextTokPtr = ptr; 1134 + return XML_TOK_INVALID; 1135 + } 1136 + while (HAS_CHAR(enc, ptr, end)) { 1137 + switch (BYTE_TYPE(enc, ptr)) { 1138 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1139 + case BT_GT: 1140 + case BT_RPAR: 1141 + case BT_COMMA: 1142 + case BT_VERBAR: 1143 + case BT_LSQB: 1144 + case BT_PERCNT: 1145 + case BT_S: 1146 + case BT_CR: 1147 + case BT_LF: 1148 + *nextTokPtr = ptr; 1149 + return tok; 1150 + #ifdef XML_NS 1151 + case BT_COLON: 1152 + ptr += MINBPC(enc); 1153 + switch (tok) { 1154 + case XML_TOK_NAME: 1155 + REQUIRE_CHAR(enc, ptr, end); 1156 + tok = XML_TOK_PREFIXED_NAME; 1157 + switch (BYTE_TYPE(enc, ptr)) { 1158 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1159 + default: 1160 + tok = XML_TOK_NMTOKEN; 1161 + break; 1162 + } 1163 + break; 1164 + case XML_TOK_PREFIXED_NAME: 1165 + tok = XML_TOK_NMTOKEN; 1166 + break; 1167 + } 1168 + break; 1169 + #endif 1170 + case BT_PLUS: 1171 + if (tok == XML_TOK_NMTOKEN) { 1172 + *nextTokPtr = ptr; 1173 + return XML_TOK_INVALID; 1174 + } 1175 + *nextTokPtr = ptr + MINBPC(enc); 1176 + return XML_TOK_NAME_PLUS; 1177 + case BT_AST: 1178 + if (tok == XML_TOK_NMTOKEN) { 1179 + *nextTokPtr = ptr; 1180 + return XML_TOK_INVALID; 1181 + } 1182 + *nextTokPtr = ptr + MINBPC(enc); 1183 + return XML_TOK_NAME_ASTERISK; 1184 + case BT_QUEST: 1185 + if (tok == XML_TOK_NMTOKEN) { 1186 + *nextTokPtr = ptr; 1187 + return XML_TOK_INVALID; 1188 + } 1189 + *nextTokPtr = ptr + MINBPC(enc); 1190 + return XML_TOK_NAME_QUESTION; 1191 + default: 1192 + *nextTokPtr = ptr; 1193 + return XML_TOK_INVALID; 1194 + } 1195 + } 1196 + return -tok; 1197 + } 1198 + 1199 + static int PTRCALL PREFIX(attributeValueTok)(const ENCODING* enc, const char* ptr, const char* end, 1200 + const char** nextTokPtr) { 1201 + const char* start; 1202 + if (ptr >= end) 1203 + return XML_TOK_NONE; 1204 + else if (!HAS_CHAR(enc, ptr, end)) { 1205 + /* This line cannot be executed. The incoming data has already 1206 + * been tokenized once, so incomplete characters like this have 1207 + * already been eliminated from the input. Retaining the paranoia 1208 + * check is still valuable, however. 1209 + */ 1210 + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ 1211 + } 1212 + start = ptr; 1213 + while (HAS_CHAR(enc, ptr, end)) { 1214 + switch (BYTE_TYPE(enc, ptr)) { 1215 + #define LEAD_CASE(n) \ 1216 + case BT_LEAD##n: \ 1217 + ptr += n; /* NOTE: The encoding has already been validated. */ \ 1218 + break; 1219 + LEAD_CASE(2) 1220 + LEAD_CASE(3) 1221 + LEAD_CASE(4) 1222 + #undef LEAD_CASE 1223 + case BT_AMP: 1224 + if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1225 + *nextTokPtr = ptr; 1226 + return XML_TOK_DATA_CHARS; 1227 + case BT_LT: 1228 + /* this is for inside entity references */ 1229 + *nextTokPtr = ptr; 1230 + return XML_TOK_INVALID; 1231 + case BT_LF: 1232 + if (ptr == start) { 1233 + *nextTokPtr = ptr + MINBPC(enc); 1234 + return XML_TOK_DATA_NEWLINE; 1235 + } 1236 + *nextTokPtr = ptr; 1237 + return XML_TOK_DATA_CHARS; 1238 + case BT_CR: 1239 + if (ptr == start) { 1240 + ptr += MINBPC(enc); 1241 + if (!HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; 1242 + if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); 1243 + *nextTokPtr = ptr; 1244 + return XML_TOK_DATA_NEWLINE; 1245 + } 1246 + *nextTokPtr = ptr; 1247 + return XML_TOK_DATA_CHARS; 1248 + case BT_S: 1249 + if (ptr == start) { 1250 + *nextTokPtr = ptr + MINBPC(enc); 1251 + return XML_TOK_ATTRIBUTE_VALUE_S; 1252 + } 1253 + *nextTokPtr = ptr; 1254 + return XML_TOK_DATA_CHARS; 1255 + default: 1256 + ptr += MINBPC(enc); 1257 + break; 1258 + } 1259 + } 1260 + *nextTokPtr = ptr; 1261 + return XML_TOK_DATA_CHARS; 1262 + } 1263 + 1264 + static int PTRCALL PREFIX(entityValueTok)(const ENCODING* enc, const char* ptr, const char* end, 1265 + const char** nextTokPtr) { 1266 + const char* start; 1267 + if (ptr >= end) 1268 + return XML_TOK_NONE; 1269 + else if (!HAS_CHAR(enc, ptr, end)) { 1270 + /* This line cannot be executed. The incoming data has already 1271 + * been tokenized once, so incomplete characters like this have 1272 + * already been eliminated from the input. Retaining the paranoia 1273 + * check is still valuable, however. 1274 + */ 1275 + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ 1276 + } 1277 + start = ptr; 1278 + while (HAS_CHAR(enc, ptr, end)) { 1279 + switch (BYTE_TYPE(enc, ptr)) { 1280 + #define LEAD_CASE(n) \ 1281 + case BT_LEAD##n: \ 1282 + ptr += n; /* NOTE: The encoding has already been validated. */ \ 1283 + break; 1284 + LEAD_CASE(2) 1285 + LEAD_CASE(3) 1286 + LEAD_CASE(4) 1287 + #undef LEAD_CASE 1288 + case BT_AMP: 1289 + if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1290 + *nextTokPtr = ptr; 1291 + return XML_TOK_DATA_CHARS; 1292 + case BT_PERCNT: 1293 + if (ptr == start) { 1294 + int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1295 + return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; 1296 + } 1297 + *nextTokPtr = ptr; 1298 + return XML_TOK_DATA_CHARS; 1299 + case BT_LF: 1300 + if (ptr == start) { 1301 + *nextTokPtr = ptr + MINBPC(enc); 1302 + return XML_TOK_DATA_NEWLINE; 1303 + } 1304 + *nextTokPtr = ptr; 1305 + return XML_TOK_DATA_CHARS; 1306 + case BT_CR: 1307 + if (ptr == start) { 1308 + ptr += MINBPC(enc); 1309 + if (!HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; 1310 + if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); 1311 + *nextTokPtr = ptr; 1312 + return XML_TOK_DATA_NEWLINE; 1313 + } 1314 + *nextTokPtr = ptr; 1315 + return XML_TOK_DATA_CHARS; 1316 + default: 1317 + ptr += MINBPC(enc); 1318 + break; 1319 + } 1320 + } 1321 + *nextTokPtr = ptr; 1322 + return XML_TOK_DATA_CHARS; 1323 + } 1324 + 1325 + #ifdef XML_DTD 1326 + 1327 + static int PTRCALL PREFIX(ignoreSectionTok)(const ENCODING* enc, const char* ptr, const char* end, 1328 + const char** nextTokPtr) { 1329 + int level = 0; 1330 + if (MINBPC(enc) > 1) { 1331 + size_t n = end - ptr; 1332 + if (n & (MINBPC(enc) - 1)) { 1333 + n &= ~(MINBPC(enc) - 1); 1334 + end = ptr + n; 1335 + } 1336 + } 1337 + while (HAS_CHAR(enc, ptr, end)) { 1338 + switch (BYTE_TYPE(enc, ptr)) { 1339 + INVALID_CASES(ptr, nextTokPtr) 1340 + case BT_LT: 1341 + ptr += MINBPC(enc); 1342 + REQUIRE_CHAR(enc, ptr, end); 1343 + if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { 1344 + ptr += MINBPC(enc); 1345 + REQUIRE_CHAR(enc, ptr, end); 1346 + if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { 1347 + ++level; 1348 + ptr += MINBPC(enc); 1349 + } 1350 + } 1351 + break; 1352 + case BT_RSQB: 1353 + ptr += MINBPC(enc); 1354 + REQUIRE_CHAR(enc, ptr, end); 1355 + if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1356 + ptr += MINBPC(enc); 1357 + REQUIRE_CHAR(enc, ptr, end); 1358 + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1359 + ptr += MINBPC(enc); 1360 + if (level == 0) { 1361 + *nextTokPtr = ptr; 1362 + return XML_TOK_IGNORE_SECT; 1363 + } 1364 + --level; 1365 + } 1366 + } 1367 + break; 1368 + default: 1369 + ptr += MINBPC(enc); 1370 + break; 1371 + } 1372 + } 1373 + return XML_TOK_PARTIAL; 1374 + } 1375 + 1376 + #endif /* XML_DTD */ 1377 + 1378 + static int PTRCALL PREFIX(isPublicId)(const ENCODING* enc, const char* ptr, const char* end, const char** badPtr) { 1379 + ptr += MINBPC(enc); 1380 + end -= MINBPC(enc); 1381 + for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { 1382 + switch (BYTE_TYPE(enc, ptr)) { 1383 + case BT_DIGIT: 1384 + case BT_HEX: 1385 + case BT_MINUS: 1386 + case BT_APOS: 1387 + case BT_LPAR: 1388 + case BT_RPAR: 1389 + case BT_PLUS: 1390 + case BT_COMMA: 1391 + case BT_SOL: 1392 + case BT_EQUALS: 1393 + case BT_QUEST: 1394 + case BT_CR: 1395 + case BT_LF: 1396 + case BT_SEMI: 1397 + case BT_EXCL: 1398 + case BT_AST: 1399 + case BT_PERCNT: 1400 + case BT_NUM: 1401 + #ifdef XML_NS 1402 + case BT_COLON: 1403 + #endif 1404 + break; 1405 + case BT_S: 1406 + if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { 1407 + *badPtr = ptr; 1408 + return 0; 1409 + } 1410 + break; 1411 + case BT_NAME: 1412 + case BT_NMSTRT: 1413 + if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; 1414 + /* fall through */ 1415 + default: 1416 + switch (BYTE_TO_ASCII(enc, ptr)) { 1417 + case 0x24: /* $ */ 1418 + case 0x40: /* @ */ 1419 + break; 1420 + default: 1421 + *badPtr = ptr; 1422 + return 0; 1423 + } 1424 + break; 1425 + } 1426 + } 1427 + return 1; 1428 + } 1429 + 1430 + /* This must only be called for a well-formed start-tag or empty 1431 + element tag. Returns the number of attributes. Pointers to the 1432 + first attsMax attributes are stored in atts. 1433 + */ 1434 + 1435 + static int PTRCALL PREFIX(getAtts)(const ENCODING* enc, const char* ptr, int attsMax, ATTRIBUTE* atts) { 1436 + enum { other, inName, inValue } state = inName; 1437 + int nAtts = 0; 1438 + int open = 0; /* defined when state == inValue; 1439 + initialization just to shut up compilers */ 1440 + 1441 + for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { 1442 + switch (BYTE_TYPE(enc, ptr)) { 1443 + #define START_NAME \ 1444 + if (state == other) { \ 1445 + if (nAtts < attsMax) { \ 1446 + atts[nAtts].name = ptr; \ 1447 + atts[nAtts].normalized = 1; \ 1448 + } \ 1449 + state = inName; \ 1450 + } 1451 + #define LEAD_CASE(n) \ 1452 + case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ 1453 + START_NAME ptr += (n - MINBPC(enc)); \ 1454 + break; 1455 + LEAD_CASE(2) 1456 + LEAD_CASE(3) 1457 + LEAD_CASE(4) 1458 + #undef LEAD_CASE 1459 + case BT_NONASCII: 1460 + case BT_NMSTRT: 1461 + case BT_HEX: 1462 + START_NAME 1463 + break; 1464 + #undef START_NAME 1465 + case BT_QUOT: 1466 + if (state != inValue) { 1467 + if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC(enc); 1468 + state = inValue; 1469 + open = BT_QUOT; 1470 + } else if (open == BT_QUOT) { 1471 + state = other; 1472 + if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; 1473 + nAtts++; 1474 + } 1475 + break; 1476 + case BT_APOS: 1477 + if (state != inValue) { 1478 + if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC(enc); 1479 + state = inValue; 1480 + open = BT_APOS; 1481 + } else if (open == BT_APOS) { 1482 + state = other; 1483 + if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; 1484 + nAtts++; 1485 + } 1486 + break; 1487 + case BT_AMP: 1488 + if (nAtts < attsMax) atts[nAtts].normalized = 0; 1489 + break; 1490 + case BT_S: 1491 + if (state == inName) 1492 + state = other; 1493 + else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized && 1494 + (ptr == atts[nAtts].valuePtr || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE || 1495 + BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) 1496 + atts[nAtts].normalized = 0; 1497 + break; 1498 + case BT_CR: 1499 + case BT_LF: 1500 + /* This case ensures that the first attribute name is counted 1501 + Apart from that we could just change state on the quote. */ 1502 + if (state == inName) 1503 + state = other; 1504 + else if (state == inValue && nAtts < attsMax) 1505 + atts[nAtts].normalized = 0; 1506 + break; 1507 + case BT_GT: 1508 + case BT_SOL: 1509 + if (state != inValue) return nAtts; 1510 + break; 1511 + default: 1512 + break; 1513 + } 1514 + } 1515 + /* not reached */ 1516 + } 1517 + 1518 + static int PTRFASTCALL PREFIX(charRefNumber)(const ENCODING* enc, const char* ptr) { 1519 + int result = 0; 1520 + /* skip &# */ 1521 + UNUSED_P(enc); 1522 + ptr += 2 * MINBPC(enc); 1523 + if (CHAR_MATCHES(enc, ptr, ASCII_x)) { 1524 + for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { 1525 + int c = BYTE_TO_ASCII(enc, ptr); 1526 + switch (c) { 1527 + case ASCII_0: 1528 + case ASCII_1: 1529 + case ASCII_2: 1530 + case ASCII_3: 1531 + case ASCII_4: 1532 + case ASCII_5: 1533 + case ASCII_6: 1534 + case ASCII_7: 1535 + case ASCII_8: 1536 + case ASCII_9: 1537 + result <<= 4; 1538 + result |= (c - ASCII_0); 1539 + break; 1540 + case ASCII_A: 1541 + case ASCII_B: 1542 + case ASCII_C: 1543 + case ASCII_D: 1544 + case ASCII_E: 1545 + case ASCII_F: 1546 + result <<= 4; 1547 + result += 10 + (c - ASCII_A); 1548 + break; 1549 + case ASCII_a: 1550 + case ASCII_b: 1551 + case ASCII_c: 1552 + case ASCII_d: 1553 + case ASCII_e: 1554 + case ASCII_f: 1555 + result <<= 4; 1556 + result += 10 + (c - ASCII_a); 1557 + break; 1558 + } 1559 + if (result >= 0x110000) return -1; 1560 + } 1561 + } else { 1562 + for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { 1563 + int c = BYTE_TO_ASCII(enc, ptr); 1564 + result *= 10; 1565 + result += (c - ASCII_0); 1566 + if (result >= 0x110000) return -1; 1567 + } 1568 + } 1569 + return checkCharRefNumber(result); 1570 + } 1571 + 1572 + static int PTRCALL PREFIX(predefinedEntityName)(const ENCODING* enc, const char* ptr, const char* end) { 1573 + UNUSED_P(enc); 1574 + switch ((end - ptr) / MINBPC(enc)) { 1575 + case 2: 1576 + if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { 1577 + switch (BYTE_TO_ASCII(enc, ptr)) { 1578 + case ASCII_l: 1579 + return ASCII_LT; 1580 + case ASCII_g: 1581 + return ASCII_GT; 1582 + } 1583 + } 1584 + break; 1585 + case 3: 1586 + if (CHAR_MATCHES(enc, ptr, ASCII_a)) { 1587 + ptr += MINBPC(enc); 1588 + if (CHAR_MATCHES(enc, ptr, ASCII_m)) { 1589 + ptr += MINBPC(enc); 1590 + if (CHAR_MATCHES(enc, ptr, ASCII_p)) return ASCII_AMP; 1591 + } 1592 + } 1593 + break; 1594 + case 4: 1595 + switch (BYTE_TO_ASCII(enc, ptr)) { 1596 + case ASCII_q: 1597 + ptr += MINBPC(enc); 1598 + if (CHAR_MATCHES(enc, ptr, ASCII_u)) { 1599 + ptr += MINBPC(enc); 1600 + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 1601 + ptr += MINBPC(enc); 1602 + if (CHAR_MATCHES(enc, ptr, ASCII_t)) return ASCII_QUOT; 1603 + } 1604 + } 1605 + break; 1606 + case ASCII_a: 1607 + ptr += MINBPC(enc); 1608 + if (CHAR_MATCHES(enc, ptr, ASCII_p)) { 1609 + ptr += MINBPC(enc); 1610 + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 1611 + ptr += MINBPC(enc); 1612 + if (CHAR_MATCHES(enc, ptr, ASCII_s)) return ASCII_APOS; 1613 + } 1614 + } 1615 + break; 1616 + } 1617 + } 1618 + return 0; 1619 + } 1620 + 1621 + static int PTRCALL PREFIX(nameMatchesAscii)(const ENCODING* enc, const char* ptr1, const char* end1, const char* ptr2) { 1622 + UNUSED_P(enc); 1623 + for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { 1624 + if (end1 - ptr1 < MINBPC(enc)) { 1625 + /* This line cannot be executed. The incoming data has already 1626 + * been tokenized once, so incomplete characters like this have 1627 + * already been eliminated from the input. Retaining the 1628 + * paranoia check is still valuable, however. 1629 + */ 1630 + return 0; /* LCOV_EXCL_LINE */ 1631 + } 1632 + if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; 1633 + } 1634 + return ptr1 == end1; 1635 + } 1636 + 1637 + static int PTRFASTCALL PREFIX(nameLength)(const ENCODING* enc, const char* ptr) { 1638 + const char* start = ptr; 1639 + for (;;) { 1640 + switch (BYTE_TYPE(enc, ptr)) { 1641 + #define LEAD_CASE(n) \ 1642 + case BT_LEAD##n: \ 1643 + ptr += n; /* NOTE: The encoding has already been validated. */ \ 1644 + break; 1645 + LEAD_CASE(2) 1646 + LEAD_CASE(3) 1647 + LEAD_CASE(4) 1648 + #undef LEAD_CASE 1649 + case BT_NONASCII: 1650 + case BT_NMSTRT: 1651 + #ifdef XML_NS 1652 + case BT_COLON: 1653 + #endif 1654 + case BT_HEX: 1655 + case BT_DIGIT: 1656 + case BT_NAME: 1657 + case BT_MINUS: 1658 + ptr += MINBPC(enc); 1659 + break; 1660 + default: 1661 + return (int)(ptr - start); 1662 + } 1663 + } 1664 + } 1665 + 1666 + static const char* PTRFASTCALL PREFIX(skipS)(const ENCODING* enc, const char* ptr) { 1667 + for (;;) { 1668 + switch (BYTE_TYPE(enc, ptr)) { 1669 + case BT_LF: 1670 + case BT_CR: 1671 + case BT_S: 1672 + ptr += MINBPC(enc); 1673 + break; 1674 + default: 1675 + return ptr; 1676 + } 1677 + } 1678 + } 1679 + 1680 + static void PTRCALL PREFIX(updatePosition)(const ENCODING* enc, const char* ptr, const char* end, POSITION* pos) { 1681 + while (HAS_CHAR(enc, ptr, end)) { 1682 + switch (BYTE_TYPE(enc, ptr)) { 1683 + #define LEAD_CASE(n) \ 1684 + case BT_LEAD##n: \ 1685 + ptr += n; /* NOTE: The encoding has already been validated. */ \ 1686 + pos->columnNumber++; \ 1687 + break; 1688 + LEAD_CASE(2) 1689 + LEAD_CASE(3) 1690 + LEAD_CASE(4) 1691 + #undef LEAD_CASE 1692 + case BT_LF: 1693 + pos->columnNumber = 0; 1694 + pos->lineNumber++; 1695 + ptr += MINBPC(enc); 1696 + break; 1697 + case BT_CR: 1698 + pos->lineNumber++; 1699 + ptr += MINBPC(enc); 1700 + if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); 1701 + pos->columnNumber = 0; 1702 + break; 1703 + default: 1704 + ptr += MINBPC(enc); 1705 + pos->columnNumber++; 1706 + break; 1707 + } 1708 + } 1709 + } 1710 + 1711 + #undef DO_LEAD_CASE 1712 + #undef MULTIBYTE_CASES 1713 + #undef INVALID_CASES 1714 + #undef CHECK_NAME_CASE 1715 + #undef CHECK_NAME_CASES 1716 + #undef CHECK_NMSTRT_CASE 1717 + #undef CHECK_NMSTRT_CASES 1718 + 1719 + #endif /* XML_TOK_IMPL_C */
+74
lib/expat/xmltok_impl.h
··· 1 + /* 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2017-2019 Sebastian Pipping <sebastian@pipping.org> 12 + Licensed under the MIT license: 13 + 14 + Permission is hereby granted, free of charge, to any person obtaining 15 + a copy of this software and associated documentation files (the 16 + "Software"), to deal in the Software without restriction, including 17 + without limitation the rights to use, copy, modify, merge, publish, 18 + distribute, sublicense, and/or sell copies of the Software, and to permit 19 + persons to whom the Software is furnished to do so, subject to the 20 + following conditions: 21 + 22 + The above copyright notice and this permission notice shall be included 23 + in all copies or substantial portions of the Software. 24 + 25 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 28 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 29 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 30 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 31 + USE OR OTHER DEALINGS IN THE SOFTWARE. 32 + */ 33 + 34 + enum { 35 + BT_NONXML, /* e.g. noncharacter-FFFF */ 36 + BT_MALFORM, /* illegal, with regard to encoding */ 37 + BT_LT, /* less than = "<" */ 38 + BT_AMP, /* ampersand = "&" */ 39 + BT_RSQB, /* right square bracket = "[" */ 40 + BT_LEAD2, /* lead byte of a 2-byte UTF-8 character */ 41 + BT_LEAD3, /* lead byte of a 3-byte UTF-8 character */ 42 + BT_LEAD4, /* lead byte of a 4-byte UTF-8 character */ 43 + BT_TRAIL, /* trailing unit, e.g. second 16-bit unit of a 4-byte char. */ 44 + BT_CR, /* carriage return = "\r" */ 45 + BT_LF, /* line feed = "\n" */ 46 + BT_GT, /* greater than = ">" */ 47 + BT_QUOT, /* quotation character = "\"" */ 48 + BT_APOS, /* apostrophe = "'" */ 49 + BT_EQUALS, /* equal sign = "=" */ 50 + BT_QUEST, /* question mark = "?" */ 51 + BT_EXCL, /* exclamation mark = "!" */ 52 + BT_SOL, /* solidus, slash = "/" */ 53 + BT_SEMI, /* semicolon = ";" */ 54 + BT_NUM, /* number sign = "#" */ 55 + BT_LSQB, /* left square bracket = "[" */ 56 + BT_S, /* white space, e.g. "\t", " "[, "\r"] */ 57 + BT_NMSTRT, /* non-hex name start letter = "G".."Z" + "g".."z" + "_" */ 58 + BT_COLON, /* colon = ":" */ 59 + BT_HEX, /* hex letter = "A".."F" + "a".."f" */ 60 + BT_DIGIT, /* digit = "0".."9" */ 61 + BT_NAME, /* dot and middle dot = "." + chr(0xb7) */ 62 + BT_MINUS, /* minus = "-" */ 63 + BT_OTHER, /* known not to be a name or name start character */ 64 + BT_NONASCII, /* might be a name or name start character */ 65 + BT_PERCNT, /* percent sign = "%" */ 66 + BT_LPAR, /* left parenthesis = "(" */ 67 + BT_RPAR, /* right parenthesis = "(" */ 68 + BT_AST, /* asterisk = "*" */ 69 + BT_PLUS, /* plus sign = "+" */ 70 + BT_COMMA, /* comma = "," */ 71 + BT_VERBAR /* vertical bar = "|" */ 72 + }; 73 + 74 + #include <stddef.h>
+98
lib/expat/xmltok_ns.c
··· 1 + /* This file is included! 2 + __ __ _ 3 + ___\ \/ /_ __ __ _| |_ 4 + / _ \\ /| '_ \ / _` | __| 5 + | __// \| |_) | (_| | |_ 6 + \___/_/\_\ .__/ \__,_|\__| 7 + |_| XML parser 8 + 9 + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 + Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 + Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net> 12 + Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 13 + Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net> 14 + Copyright (c) 2017-2021 Sebastian Pipping <sebastian@pipping.org> 15 + Licensed under the MIT license: 16 + 17 + Permission is hereby granted, free of charge, to any person obtaining 18 + a copy of this software and associated documentation files (the 19 + "Software"), to deal in the Software without restriction, including 20 + without limitation the rights to use, copy, modify, merge, publish, 21 + distribute, sublicense, and/or sell copies of the Software, and to permit 22 + persons to whom the Software is furnished to do so, subject to the 23 + following conditions: 24 + 25 + The above copyright notice and this permission notice shall be included 26 + in all copies or substantial portions of the Software. 27 + 28 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 31 + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 32 + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 33 + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 34 + USE OR OTHER DEALINGS IN THE SOFTWARE. 35 + */ 36 + 37 + #ifdef XML_TOK_NS_C 38 + 39 + const ENCODING* NS(XmlGetUtf8InternalEncoding)(void) { return &ns(internal_utf8_encoding).enc; } 40 + 41 + const ENCODING* NS(XmlGetUtf16InternalEncoding)(void) { 42 + #if BYTEORDER == 1234 43 + return &ns(internal_little2_encoding).enc; 44 + #elif BYTEORDER == 4321 45 + return &ns(internal_big2_encoding).enc; 46 + #else 47 + const short n = 1; 48 + return (*(const char*)&n ? &ns(internal_little2_encoding).enc : &ns(internal_big2_encoding).enc); 49 + #endif 50 + } 51 + 52 + static const ENCODING* const NS(encodings)[] = { 53 + &ns(latin1_encoding).enc, &ns(ascii_encoding).enc, &ns(utf8_encoding).enc, &ns(big2_encoding).enc, 54 + &ns(big2_encoding).enc, &ns(little2_encoding).enc, &ns(utf8_encoding).enc /* NO_ENC */ 55 + }; 56 + 57 + static int PTRCALL NS(initScanProlog)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 58 + return initScan(NS(encodings), (const INIT_ENCODING*)enc, XML_PROLOG_STATE, ptr, end, nextTokPtr); 59 + } 60 + 61 + static int PTRCALL NS(initScanContent)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) { 62 + return initScan(NS(encodings), (const INIT_ENCODING*)enc, XML_CONTENT_STATE, ptr, end, nextTokPtr); 63 + } 64 + 65 + int NS(XmlInitEncoding)(INIT_ENCODING* p, const ENCODING** encPtr, const char* name) { 66 + int i = getEncodingIndex(name); 67 + if (i == UNKNOWN_ENC) return 0; 68 + SET_INIT_ENC_INDEX(p, i); 69 + p->initEnc.scanners[XML_PROLOG_STATE] = NS(initScanProlog); 70 + p->initEnc.scanners[XML_CONTENT_STATE] = NS(initScanContent); 71 + p->initEnc.updatePosition = initUpdatePosition; 72 + p->encPtr = encPtr; 73 + *encPtr = &(p->initEnc); 74 + return 1; 75 + } 76 + 77 + static const ENCODING* NS(findEncoding)(const ENCODING* enc, const char* ptr, const char* end) { 78 + #define ENCODING_MAX 128 79 + char buf[ENCODING_MAX] = ""; 80 + char* p = buf; 81 + int i; 82 + XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); 83 + if (ptr != end) return 0; 84 + *p = 0; 85 + if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) return enc; 86 + i = getEncodingIndex(buf); 87 + if (i == UNKNOWN_ENC) return 0; 88 + return NS(encodings)[i]; 89 + } 90 + 91 + int NS(XmlParseXmlDecl)(int isGeneralTextEntity, const ENCODING* enc, const char* ptr, const char* end, 92 + const char** badPtr, const char** versionPtr, const char** versionEndPtr, 93 + const char** encodingName, const ENCODING** encoding, int* standalone) { 94 + return doParseXmlDecl(NS(findEncoding), isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, versionEndPtr, 95 + encodingName, encoding, standalone); 96 + } 97 + 98 + #endif /* XML_TOK_NS_C */
+3
platformio.ini
··· 28 28 -DARDUINO_USB_MODE=1 29 29 -DARDUINO_USB_CDC_ON_BOOT=1 30 30 -DMINIZ_NO_ZLIB_COMPATIBLE_NAMES=1 31 + # https://libexpat.github.io/doc/api/latest/#XML_GE 32 + -DXML_GE=0 33 + -DXML_CONTEXT_BYTES=1024 31 34 32 35 ; Libraries 33 36 lib_deps =