summaryrefslogtreecommitdiff
path: root/xml/xml.c
diff options
context:
space:
mode:
Diffstat (limited to 'xml/xml.c')
-rw-r--r--xml/xml.c295
1 files changed, 295 insertions, 0 deletions
diff --git a/xml/xml.c b/xml/xml.c
new file mode 100644
index 0000000..07dae8c
--- /dev/null
+++ b/xml/xml.c
@@ -0,0 +1,295 @@
+/*
+ * $Id: xml.c,v 1.2 2004/12/06 15:01:02 lordjaxom Exp $
+ * This module was kindly provided by Clemens Kirchgatterer
+ */
+
+#include <ctype.h>
+
+#include <iostream>
+#include <fstream>
+
+#include "xml.h"
+
+using namespace std;
+
+enum {
+ LOOK4START, // looking for first element start
+ LOOK4TAG, // looking for element tag
+ INTAG, // reading tag
+ LOOK4ATTRN, // looking for attr name, > or /
+ INATTRN, // reading attr name
+ LOOK4ATTRV, // looking for attr value
+ SAWSLASH, // saw / in element opening
+ INATTRV, // in attr value
+ LOOK4CLOSETAG, // looking for closing tag after <
+ INCLOSETAG, // reading closing tag
+};
+
+XML::XML(const string &file) {
+ char *buffer;
+ long size;
+
+ ifstream f(file.c_str(), ios::in|ios::binary|ios::ate);
+ size = f.tellg();
+ f.seekg(0, ios::beg);
+ buffer = new char [size];
+ f.read(buffer, size);
+ f.close();
+ data = buffer;
+ delete[] buffer;
+ nodestartcb = NULL;
+ nodeendcb = NULL;
+ cdatacb = NULL;
+ parseerrorcb = NULL;
+ progresscb = NULL;
+}
+
+XML::XML(const char *mem, unsigned int len) {
+ data.assign(mem, len);
+ nodestartcb = NULL;
+ nodeendcb = NULL;
+ cdatacb = NULL;
+ parseerrorcb = NULL;
+ progresscb = NULL;
+}
+
+void
+XML::nodeStartCB(XML_NODE_START_CB(cb)) {
+ nodestartcb = cb;
+}
+
+void
+XML::nodeEndCB(XML_NODE_END_CB(cb)) {
+ nodeendcb = cb;
+}
+
+void
+XML::cdataCB(XML_CDATA_CB(cb)) {
+ cdatacb = cb;
+}
+
+void
+XML::parseErrorCB(XML_PARSE_ERROR_CB(cb)) {
+ parseerrorcb = cb;
+}
+
+void
+XML::progressCB(XML_PROGRESS_CB(cb)) {
+ progresscb = cb;
+}
+
+int
+XML::parse(void) {
+ float percent = 0;
+ unsigned int len;
+ int last = 0;
+
+ state = LOOK4START;
+ linenr = 1;
+ skipping = false;
+ len = data.length();
+ for (unsigned int i=0; i<len; i++) {
+ if (readChar(data[i])) return (-1);
+ if (progresscb) {
+ percent = ((float)i/len)*100;
+ if ((int)percent>last) {
+ progresscb((int)percent);
+ last = ((int)percent);
+ }
+ }
+ }
+ return (0);
+}
+
+bool
+XML::isTokenChar(bool start, int c) {
+ return (isalpha(c) || c == '_' || (!start && isdigit(c)));
+}
+
+int
+XML::readChar(int c) {
+ // new line?
+ if (c == '\n') linenr++;
+
+ switch (state) {
+
+ // looking for element start
+ case LOOK4START:
+ if (c == '<') {
+ if (cdatacb) {
+ int pos = 0;
+ while ((pos = cdata.find('&', pos)) != -1) {
+ if (cdata.substr(pos, 4) == "&lt;")
+ cdata.replace(pos, 4, "<");
+ else if (cdata.substr(pos, 4) == "&gt;")
+ cdata.replace(pos, 4, ">");
+ else if (cdata.substr(pos, 5) == "&amp;")
+ cdata.replace(pos, 5, "&");
+ ++pos;
+ }
+ if (!cdatacb(cdata))
+ return (-1);
+ }
+ cdata = "";
+ attr.clear();
+ tag = "";
+ state = LOOK4TAG;
+ } else
+ cdata += c;
+ // silently ignore until resync
+ break;
+
+ // looking for element tag
+ case LOOK4TAG:
+ // skip comments and declarations.
+ if (skipping) {
+ if (c == '>') {
+ skipping = false;
+ state = LOOK4START;
+ }
+ break;
+ } else {
+ if (c == '?' || c == '!') {
+ skipping = true;
+ break;
+ }
+ }
+ if (isTokenChar(1, c)) {
+ tag += c;
+ state = INTAG;
+ } else if (c == '/') {
+ state = LOOK4CLOSETAG;
+ } else if (!isspace(c)) {
+ if (parseerrorcb) {
+ parseerrorcb(linenr, "Bogus tag char", c);
+ }
+ return (-1);
+ }
+ break;
+
+ // reading tag
+ case INTAG:
+ if (isTokenChar(0, c)) {
+ tag += c;
+ } else if (c == '>') {
+ if (nodestartcb)
+ if (!nodestartcb(tag, attr))
+ return (-1);
+ state = LOOK4START;
+ } else if (c == '/') {
+ state = SAWSLASH;
+ } else {
+ state = LOOK4ATTRN;
+ }
+ break;
+
+ // looking for attr name, > or /
+ case LOOK4ATTRN:
+ if (c == '>') {
+ if (nodestartcb)
+ if (!nodestartcb(tag, attr))
+ return (-1);
+ state = LOOK4START;
+ } else if (c == '/') {
+ state = SAWSLASH;
+ } else if (isTokenChar(1, c)) {
+ attrn = "";
+ attrn += c;
+ state = INATTRN;
+ } else if (!isspace(c)) {
+ if (parseerrorcb) {
+ parseerrorcb(linenr, "Bogus 1st attr name char", c);
+ }
+ return (-2);
+ }
+ break;
+
+ // saw / in element opening
+ case SAWSLASH:
+ if (c == '>') {
+ if (nodestartcb)
+ if (!nodestartcb(tag, attr))
+ return (-1);
+ if (nodeendcb)
+ if (!nodeendcb(tag))
+ return (-1);
+ state = LOOK4START;
+ } else {
+ if (parseerrorcb) {
+ parseerrorcb(linenr, "Bogus char before >", c);
+ }
+ return (-3);
+ }
+ break;
+
+ // reading attr name
+ case INATTRN:
+ if (isTokenChar(0, c)) {
+ attrn += c;
+ } else if (isspace(c) || c == '=') {
+ state = LOOK4ATTRV;
+ } else {
+ if (parseerrorcb) {
+ parseerrorcb(linenr, "Bogus attr name char", c);
+ }
+ return (-4);
+ }
+ break;
+
+ // looking for attr value
+ case LOOK4ATTRV:
+ if (c == '\'' || c == '"') {
+ delim = c;
+ attrv = "";
+ state = INATTRV;
+ } else if (!(isspace(c) || c == '=')) {
+ if (parseerrorcb) {
+ parseerrorcb(linenr, "No attribute value", c);
+ }
+ return (-5);
+ }
+ break;
+
+ // in attr value
+ case INATTRV:
+ if (c == delim) {
+ attr[attrn] = attrv;
+ state = LOOK4ATTRN;
+ } else if (!iscntrl(c)) {
+ attrv += c;
+ }
+ break;
+
+ // looking for closing tag after <
+ case LOOK4CLOSETAG:
+ if (isTokenChar(1, c)) {
+ tag += c;
+ state = INCLOSETAG;
+ } else if (!isspace(c)) {
+ if (parseerrorcb) {
+ parseerrorcb(linenr, "Bogus preend tag char", c);
+ }
+ return (-6);
+ }
+ break;
+
+ // reading closing tag
+ case INCLOSETAG:
+ if (isTokenChar(0, c)) {
+ tag += c;
+ } else if (c == '>') {
+ if (nodeendcb)
+ if (!nodeendcb(tag))
+ return false;
+ state = LOOK4START;
+ } else if (!isspace(c)) {
+ if (parseerrorcb) {
+ parseerrorcb(linenr, "Bogus end tag char", c);
+ }
+ return (-7);
+ }
+ break;
+ }
+
+ return (0);
+}