A fork of https://github.com/crosspoint-reader/crosspoint-reader
1#include "OpdsParser.h"
2
3#include <Logging.h>
4#include <XmlParserUtils.h>
5
6#include <cstring>
7
8OpdsParser::OpdsParser() {
9 parser = XML_ParserCreate(nullptr);
10 if (!parser) {
11 errorOccured = true;
12 LOG_DBG("OPDS", "Couldn't allocate memory for parser");
13 }
14}
15
16OpdsParser::~OpdsParser() { destroyXmlParser(parser); }
17
18size_t OpdsParser::write(uint8_t c) { return write(&c, 1); }
19
20size_t OpdsParser::write(const uint8_t* xmlData, const size_t length) {
21 if (errorOccured) return length;
22
23 XML_SetUserData(parser, this);
24 XML_SetElementHandler(parser, startElement, endElement);
25 XML_SetCharacterDataHandler(parser, characterData);
26
27 const char* currentPos = reinterpret_cast<const char*>(xmlData);
28 size_t remaining = length;
29 constexpr size_t chunkSize = 1024;
30
31 while (remaining > 0) {
32 void* const buf = XML_GetBuffer(parser, chunkSize);
33 if (!buf) {
34 errorOccured = true;
35 LOG_DBG("OPDS", "Couldn't allocate memory for buffer");
36 destroyXmlParser(parser);
37 return length;
38 }
39
40 const size_t toRead = remaining < chunkSize ? remaining : chunkSize;
41 memcpy(buf, currentPos, toRead);
42
43 if (XML_ParseBuffer(parser, static_cast<int>(toRead), 0) == XML_STATUS_ERROR) {
44 errorOccured = true;
45 LOG_DBG("OPDS", "Parse error at line %lu: %s", XML_GetCurrentLineNumber(parser),
46 XML_ErrorString(XML_GetErrorCode(parser)));
47 destroyXmlParser(parser);
48 return length;
49 }
50 currentPos += toRead;
51 remaining -= toRead;
52 }
53 return length;
54}
55
56void OpdsParser::flush() {
57 if (XML_Parse(parser, nullptr, 0, XML_TRUE) != XML_STATUS_OK) {
58 errorOccured = true;
59 destroyXmlParser(parser);
60 }
61}
62
63bool OpdsParser::error() const { return errorOccured; }
64
65void OpdsParser::clear() {
66 entries.clear();
67 searchTemplate.clear();
68 nextPageUrl.clear();
69 prevPageUrl.clear();
70 currentEntry = OpdsEntry{};
71 currentText.clear();
72 inEntry = inTitle = inAuthor = inAuthorName = inId = false;
73}
74
75std::vector<OpdsEntry> OpdsParser::getBooks() const {
76 std::vector<OpdsEntry> books;
77 for (const auto& entry : entries) {
78 if (entry.type == OpdsEntryType::BOOK) books.push_back(entry);
79 }
80 return books;
81}
82
83const char* OpdsParser::findAttribute(const XML_Char** atts, const char* name) {
84 for (int i = 0; atts[i]; i += 2) {
85 if (strcmp(atts[i], name) == 0) return atts[i + 1];
86 }
87 return nullptr;
88}
89
90void XMLCALL OpdsParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
91 auto* self = static_cast<OpdsParser*>(userData);
92
93 if (strcmp(name, "link") == 0 || strstr(name, ":link") != nullptr) {
94 const char* href = findAttribute(atts, "href");
95 if (href) {
96 const char* rel = findAttribute(atts, "rel");
97 const char* type = findAttribute(atts, "type");
98
99 if (rel && strcmp(rel, "search") == 0) {
100 std::string sHref(href);
101 if (sHref.find("{searchTerms}") != std::string::npos) {
102 self->searchTemplate = sHref;
103 }
104 } else if (rel && strcmp(rel, "next") == 0 && !self->inEntry) {
105 self->nextPageUrl = href;
106 } else if (rel && strcmp(rel, "previous") == 0 && !self->inEntry) {
107 self->prevPageUrl = href;
108 }
109
110 if (self->inEntry) {
111 if (rel && type && strstr(rel, "opds-spec.org/acquisition") != nullptr &&
112 strcmp(type, "application/epub+zip") == 0) {
113 self->currentEntry.type = OpdsEntryType::BOOK;
114 self->currentEntry.href = href;
115 } else if (type && strstr(type, "application/atom+xml") != nullptr) {
116 if (self->currentEntry.type != OpdsEntryType::BOOK) {
117 self->currentEntry.type = OpdsEntryType::NAVIGATION;
118 self->currentEntry.href = href;
119 }
120 }
121 }
122 }
123 }
124
125 if (strcmp(name, "entry") == 0 || strstr(name, ":entry") != nullptr) {
126 self->inEntry = true;
127 self->currentEntry = OpdsEntry{};
128 return;
129 }
130
131 if (!self->inEntry) return;
132
133 if (strcmp(name, "title") == 0 || strstr(name, ":title") != nullptr) {
134 self->inTitle = true;
135 self->currentText.clear();
136 } else if (strcmp(name, "author") == 0 || strstr(name, ":author") != nullptr) {
137 self->inAuthor = true;
138 } else if (self->inAuthor && (strcmp(name, "name") == 0 || strstr(name, ":name") != nullptr)) {
139 self->inAuthorName = true;
140 self->currentText.clear();
141 } else if (strcmp(name, "id") == 0 || strstr(name, ":id") != nullptr) {
142 self->inId = true;
143 self->currentText.clear();
144 }
145}
146
147void XMLCALL OpdsParser::endElement(void* userData, const XML_Char* name) {
148 auto* self = static_cast<OpdsParser*>(userData);
149
150 if (strcmp(name, "entry") == 0 || strstr(name, ":entry") != nullptr) {
151 if (!self->currentEntry.title.empty() && !self->currentEntry.href.empty()) {
152 self->entries.push_back(self->currentEntry);
153 }
154 self->inEntry = false;
155 } else if (self->inEntry) {
156 if (strcmp(name, "title") == 0 || strstr(name, ":title") != nullptr) {
157 if (self->inTitle) self->currentEntry.title = self->currentText;
158 self->inTitle = false;
159 } else if (strcmp(name, "author") == 0 || strstr(name, ":author") != nullptr) {
160 self->inAuthor = false;
161 } else if (self->inAuthorName && (strcmp(name, "name") == 0 || strstr(name, ":name") != nullptr)) {
162 self->currentEntry.author = self->currentText;
163 self->inAuthorName = false;
164 } else if (strcmp(name, "id") == 0 || strstr(name, ":id") != nullptr) {
165 if (self->inId) self->currentEntry.id = self->currentText;
166 self->inId = false;
167 }
168 }
169}
170
171void XMLCALL OpdsParser::characterData(void* userData, const XML_Char* s, const int len) {
172 auto* self = static_cast<OpdsParser*>(userData);
173 if (self->inTitle || self->inAuthorName || self->inId) {
174 self->currentText.append(s, len);
175 }
176}