Back Midas Rome Roody Rootana
  Rome Analyzer Framework  Not logged in ELOG logo
Entry  08 Sep 2005, Ryu Sawada, Forum, Dividing xml the definition xml file. 
    Reply  06 Oct 2005, Ryu Sawada, Forum, Dividing xml the definition xml file. mxml.diff
       Reply  07 Oct 2005, Ryu Sawada, Forum, Dividing xml the definition xml file. mxml.patch
Message ID: 100     Entry time: 06 Oct 2005     In reply to: 99     Reply to this: 101
Author: Ryu Sawada 
Topic: Forum 
Subject: Dividing xml the definition xml file. 
> I am thinking to divide my definition xml file into several files.
> 
> I do not know if there is a general way, but I found some web pages mentioning about it. According to 
> these pages we can include an xml document to  other one like.
> 
> ---------- booklist.xml ------------
> <?xml version="1.0"?>
> <!DOCTYPE books [
> <!ENTITY book1 SYSTEM "book1.xml">
> ]>
> 
> <books>
>   &book1;
> </books>
> ------------------------------------
> ------------ book1.xml -------------
> <?xml version="1.0"?>
> <book>
>   <title>Title of the book</title>
>   <author>Author of the book</author>
> </book>
> ------------------------------------
> 
> Is it possible to do it with mxml ?

I made a patch to enable it with mxml.
Please test it.

If it is reliable, I will commit it.

----
cd $ROMESYS
patch -p0 < mxml.diff  
----
Attachment 1: mxml.diff  10 kB  | Hide | Hide all
Index: include/mxml.h
===================================================================
RCS file: /usr/local/cvsroot/rome/include/mxml.h,v
retrieving revision 1.7
diff -u -r1.7 mxml.h
--- include/mxml.h	12 Jul 2005 09:04:15 -0000	1.7
+++ include/mxml.h	6 Oct 2005 21:53:13 -0000
@@ -43,6 +43,10 @@
 #define PROCESSING_INSTRUCTION_NODE   3
 #define COMMENT_NODE                  4
 #define DOCUMENT_NODE                 5
+#define ENTITY_NODE                   6
+
+#define INTERNAL_ENTITY               0
+#define EXTERNAL_ENTITY               1
 
 typedef struct {
    int  fh;
@@ -115,6 +119,7 @@
 PMXML_NODE mxml_create_root_node();
 PMXML_NODE mxml_parse_file(char *file_name, char *error, int error_size);
 PMXML_NODE mxml_parse_buffer(char *buffer, char *error, int error_size);
+PMXML_NODE mxml_parse_entity(char *buf, char *error, int error_size);
 int mxml_write_tree(char *file_name, PMXML_NODE tree);
 void mxml_debug_tree(PMXML_NODE tree, int level);
 void mxml_free_tree(PMXML_NODE tree);
Index: src/mxml.c
===================================================================
RCS file: /usr/local/cvsroot/rome/src/mxml.c,v
retrieving revision 1.8
diff -u -r1.8 mxml.c
--- src/mxml.c	11 May 2005 12:50:02 -0000	1.8
+++ src/mxml.c	6 Oct 2005 21:53:13 -0000
@@ -1209,6 +1209,30 @@
 
             p += 2;
 
+         } else if (strncmp(p, "!ENTITY", 7) == 0) {
+
+            /* found !ENTITY element */
+            pnew = mxml_add_special_node(ptree, ENTITY_NODE, "ENTYTY", NULL);
+            pv = p+1;
+
+            p++;
+            if (strstr(p, ">") == NULL)
+               return read_error(HERE, "Unterminated !ENTITY element");
+
+            while (*p != '>') {
+               if (*p == '\n')
+                  line_number++;
+               p++;
+            }
+
+            len = (size_t)p - (size_t)pv;
+            pnew->value = (char *)malloc(len+1);
+            memcpy(pnew->value, pv, len);
+            pnew->value[len] = 0;
+            mxml_decode(pnew->value);
+
+            p ++;
+
          } else {
             
             /* found normal element */
@@ -1407,6 +1431,249 @@
 
 /*------------------------------------------------------------------*/
 
+PMXML_NODE mxml_parse_entity(char *buf, char *error, int error_size)
+/* parse !ENTYTY entries of XML files and replace with references. Return NULL
+   in case of error, return error description. Optional file_name is used
+   for error reporting if called from mxml_parse_file() */
+{
+   char *p;
+   char *pv;
+   char delimiter;
+   int  i,j,k, line_number;
+   char replacement[1000];
+   char entity_name[256][256];
+   char entity_reference_name[256][256];
+   char *entity_value[256];
+   int  entity_type[256]; /* internal or external */
+   int  nentity;
+   int  fh, length,len;
+   char *buffer;
+   PMXML_NODE root = mxml_create_root_node(); // dummy for 'HERE'
+   char *file_name = NULL; // dummy for 'HERE'
+
+   /* copy string to temporary space */
+   buffer = (char *)malloc(strlen(buf)+1);
+   strcpy(buffer,buf);
+   free(buf);
+
+   p = buffer;
+   line_number = 1;
+   nentity = -1;
+
+   /* search !ENTITY */
+   do {
+      if (*p == '<') {
+
+         /* found new entity */
+         p++;
+         while (*p && isspace(*p)) {
+            if (*p == '\n')
+               line_number++;
+            p++;
+         }
+         if (!*p)
+            return read_error(HERE, "Unexpected end of file");
+
+         if (strncmp(p, "!ENTITY", 7) == 0) {
+
+            /* found entity */
+            nentity++;
+            if(nentity>=1000)
+               return read_error(HERE, "Too much entities");
+
+            pv = p+7;
+            while (*pv == ' ')
+               pv++;
+
+            /* extract entity name */
+            p = pv;
+
+            while (*p && isspace(*p) && *p != '<' && *p != '>') {
+               if (*p == '\n')
+                  line_number++;
+               p++;
+            }
+            if (!*p)
+               return read_error(HERE, "Unexpected end of file");
+            if (*p == '<' || *p == '>')
+               return read_error(HERE, "Unexpected \'%c\' inside !ENTITY", *p);
+
+            pv = p;
+            while (*pv && !isspace(*pv) && *pv != '<' && *pv != '>')
+               pv++;
+
+            if (!*pv)
+               return read_error(HERE, "Unexpected end of file");
+            if (*pv == '<'  || *pv == '>' )
+               return read_error(HERE, "Unexpected \'%c\' inside entity \"%s\"", *pv, &entity_name[nentity][1]);
+
+            len = (size_t)pv - (size_t)p;
+            if (len > (int)sizeof(replacement)-1)
+               len = sizeof(replacement)-1;
+            memcpy(replacement, p, len);
+            replacement[len] = 0;
+            mxml_decode(replacement);
+
+            entity_name[nentity][0] = '&';
+            i = 1;
+            entity_name[nentity][i] = 0;
+            while (*p && !isspace(*p) && *p != '/' && *p != '>' && *p != '<' && i<253)
+               entity_name[nentity][i++] = *p++;
+            entity_name[nentity][i++] = ';';
+            entity_name[nentity][i] = 0;
+
+            if (!*p)
+               return read_error(HERE, "Unexpected end of file");
+            if (*p == '<')
+               return read_error(HERE, "Unexpected \'<\' inside entity \"%s\"", &entity_name[nentity][1]);
+
+            /* extract replacement or SYSTEM*/
+            while (*p && isspace(*p)) {
+               if (*p == '\n')
+                  line_number++;
+               p++;
+            }
+            if (!*p)
+               return read_error(HERE, "Unexpected end of file");
+            if (*p == '>')
+               return read_error(HERE, "Unexpected \'>\' inside entity \"%s\"", &entity_name[nentity][1]);
+
+            /* check if SYSTEM */
+            if(strncmp(p, "SYSTEM", 6) == 0){
+               entity_type[nentity] = EXTERNAL_ENTITY;
+               p += 6;
+            }
+            else{
+               entity_type[nentity] = INTERNAL_ENTITY;
+            }
+
+            /* extract replacement */
+            while (*p && isspace(*p)) {
+               if (*p == '\n')
+                  line_number++;
+               p++;
+            }
+            if (!*p)
+               return read_error(HERE, "Unexpected end of file");
+            if (*p == '>')
+               return read_error(HERE, "Unexpected \'>\' inside entity \"%s\"", &entity_name[nentity][1]);
+
+            if (*p != '\"' && *p != '\'')
+               return read_error(HERE, "Replacement was not found for entity \"%s\"", &entity_name[nentity][1]);
+            delimiter = *p;
+            p++;
+            if (!*p)
+               return read_error(HERE, "Unexpected end of file");
+            pv = p;
+            while (*pv && *pv != delimiter )
+               pv++;
+
+            if (!*pv)
+               return read_error(HERE, "Unexpected end of file");
+            if (*pv == '<' )
+               return read_error(HERE, "Unexpected \'%c\' inside entity \"%s\"", *pv, &entity_name[nentity][1]);
+
+            len = (size_t)pv - (size_t)p;
+            if (len > (int)sizeof(replacement)-1)
+               len = sizeof(replacement)-1;
+            memcpy(replacement, p, len);
+            replacement[len] = 0;
+
+            if(entity_type[nentity] == EXTERNAL_ENTITY){
+               strcpy(entity_reference_name[nentity],replacement);
+            }
+            else{
+               entity_value[nentity] = (char *)malloc(strlen(replacement));
+               strcpy(entity_value[nentity],replacement);
+            }
+
+            p = pv;
+            while (*p && isspace(*p)) {
+               if (*p == '\n')
+                  line_number++;
+               p++;
+            }
+            if (!*p)
+               return read_error(HERE, "Unexpected end of file");
+         }
+      }
+
+      /* go to next element */
+      while (*p && *p != '<') {
+         if (*p == '\n')
+            line_number++;
+         p++;
+      }
+   } while (*p);
+   nentity++;
+
+   /* read external file */
+   for(i=0;i<nentity;i++){
+      if(entity_type[i] == EXTERNAL_ENTITY){
+         fh = open(entity_reference_name[i], O_RDONLY | O_TEXT, 0644);
+
+         if (fh == -1) {
+            return read_error(HERE, "Unable to open file \"%s\"",entity_reference_name[i]);
+         }
+
+         length = lseek(fh, 0, SEEK_END);
+         lseek(fh, 0, SEEK_SET);
+         entity_value[i] = (char *)malloc(length+1);
+         if (entity_value[i] == NULL) {
+            close(fh);
+            return read_error(HERE, "Cannot allocate buffer of %d bytes for \"%s\".",length+1,&entity_name[1]);
+         }
+
+         /* read complete file at once */
+         length = read(fh, entity_value[i], length);
+         entity_value[i][length] = 0;
+         close(fh);
+      }
+   }
+
+   /* count length of output string */
+   length = strlen(buffer);
+   for(i=0;i<nentity;i++){
+      p = buffer;
+      while(1){
+         pv =strstr(entity_name[i],p);
+         if(pv){
+            length += -strlen(entity_name[i]) + strlen(entity_value[i]);
+            p = pv+1;
+         }
+         else{
+            break;
+         }
+      }
+   }
+
+   /* allocate memory */
+   buf = (char *)malloc(length+1);
+
+   /* replace entities */
+   p = buffer;
+   pv = buf;
+   do {
+      if (*p == '&') {
+         /* found entity */
+         for(j=0;j<nentity;j++){
+            if(strncmp(p,entity_name[j],strlen(entity_name[j])) == 0){
+               for(k=0;k<(int)strlen(entity_value[j]);k++)
+                  *pv++ = entity_value[j][k];
+               p += strlen(entity_name[j]);
+               break;
+            }
... 27 more lines ...
ELOG V3.1.4-2e1708b5