68#define NumberPredefinedEntities 10
69#define XMLWhitespace "\t\r\n "
213 const char *path,
const size_t offset)
239 if (components == (
char **) NULL)
241 for (i=0; i < (ssize_t) number_components; i++)
261 for ( ; i < (ssize_t) number_components; i++)
314 for (p=
content; *p !=
'\0'; p++)
315 length+=(*p & 0x80) != 0 ? 2 : 1;
316 utf8=(
unsigned char *) NULL;
319 if (utf8 == (
unsigned char *) NULL)
320 return((
unsigned char *) NULL);
322 for (p=
content; *p !=
'\0'; p++)
329 *q++=0xc0 | ((c >> 6) & 0x3f);
330 *q++=0x80 | (c & 0x3f);
358 if (utf8 == (
unsigned char *) NULL)
359 return((
char *) NULL);
360 for (p=utf8; *p !=
'\0'; p++)
361 if ((*p < 0x20) && (*p != 0x09) && (*p != 0x0a) && (*p != 0x0d))
368 base64=
Base64Encode(utf8,strlen((
char *) utf8),&length);
370 if (base64 == (
char *) NULL)
371 return((
char *) NULL);
376 return(canonical_content);
384 for (p=utf8; *p !=
'\0'; p++)
390 sizeof(*canonical_content));
391 if (canonical_content == (
char *) NULL)
392 return(canonical_content);
420 canonical_content[i++]=(char) (*p);
430 canonical_content[i++]=(char) (*p);
443 canonical_content[i++]=(char) (*p);
448 canonical_content[i]=
'\0';
450 return(canonical_content);
485 return((
char **) NULL);
486 for (i=0;
attributes[i] != (
char *) NULL; i+=2)
497 return((
char **) NULL);
558 for (i=0; root->
attributes[i] != (
char **) NULL; i++)
563 for (j=1;
attributes[j] != (
char *) NULL; j+=3)
656 assert(filename != (
const char *) NULL);
662 return((
char *) NULL);
665 if ((file == fileno(stdin)) || (
offset < 0) ||
679 if ((fstat(file,&file_stats) == 0) && (file_stats.st_size > 0))
682 for (i=0; xml != (
char *) NULL; i+=count)
684 count=read(file,xml+i,quantum);
691 if (~((
size_t) i) < (quantum+1))
697 if ((
size_t) (i+count) >= extent)
702 if (xml == (
char *) NULL)
703 return((
char *) NULL);
707 return((
char *) NULL);
709 length=(size_t)
WizardMin(i+count,extent);
717 if (xml == (
char *) NULL)
720 return((
char *) NULL);
723 if (map != (
char *) NULL)
725 (void) memcpy(xml,map,length);
730 (void) lseek(file,0,SEEK_SET);
731 for (i=0; i < length; i+=count)
733 count=read(file,xml+i,(
size_t)
WizardMin(length-i,(ssize_t) SSIZE_MAX));
745 return((
char *) NULL);
784 return(xml_info->
next);
829 return((
const char *) NULL);
831 while ((xml_info->
attributes[i] != (
char *) NULL) &&
840 while ((root->
attributes[i] != (
char **) NULL) &&
844 return((
const char *) NULL);
846 while ((root->
attributes[i][j] != (
char *) NULL) &&
850 return((
const char *) NULL);
894 while (xml_info->
attributes[i] != (
char *) NULL)
936 child=xml_info->
child;
937 if (tag != (
const char *) NULL)
938 while ((child != (
XMLTreeInfo *) NULL) && (strcmp(child->
tag,tag) != 0))
1056 if (components == (
char **) NULL)
1058 for (i=0; i < (ssize_t) number_components; i++)
1075 for ( ; i < (ssize_t) number_components; i++)
1189 return(xml_info->
tag);
1235 xml_info->
child=child;
1238 head=xml_info->
child;
1239 if (head->
offset > offset)
1242 xml_info->
child=child;
1334 if (utf8 == (
char *) NULL)
1335 return((
char *) NULL);
1336 encoding=(*content ==
'\xFE') ? 1 : (*content ==
'\xFF') ? 0 : -1;
1342 (void) memcpy(utf8,content,*length*
sizeof(*utf8));
1348 for (i=2; i < (ssize_t) (*length-1); i+=2)
1350 c=(encoding != 0) ? ((content[i] & 0xff) << 8) | (content[i+1] & 0xff) :
1351 ((content[i+1] & 0xff) << 8) | (content[i] & 0xff);
1352 if ((c >= 0xd800) && (c <= 0xdfff) && ((i+=2) < (ssize_t) (*length-1)))
1354 byte=(encoding != 0) ? ((content[i] & 0xff) << 8) |
1355 (content[i+1] & 0xff) : ((content[i+1] & 0xff) << 8) |
1356 (content[i] & 0xff);
1357 c=(((c & 0x3ff) << 10) | (
byte & 0x3ff))+0x10000;
1363 if (utf8 == (
char *) NULL)
1376 for (bits=0;
byte != 0;
byte/=2)
1379 utf8[j++]=(0xFF << (7-bits)) | (c >> (6*bits));
1383 utf8[j]=0x80 | ((c >> (6*bits)) & 0x3f);
1389 if (utf8 != (
char *) NULL)
1422 for ( ; *xml !=
'\0'; xml++)
1423 while (*xml ==
'\r')
1427 (void) memmove(xml,xml+1,strlen(xml));
1431 while ((*xml !=
'\0') && (*xml !=
'&') && ((*xml !=
'%') ||
1432 (state !=
'%')) && (isspace((
int) ((
unsigned char) *xml)) == 0))
1444 if ((state !=
'c') && (strncmp(xml,
"&#",2) == 0))
1450 c=strtol(xml+2,&entity,10);
1452 c=strtol(xml+3,&entity,16);
1453 if ((c == 0) || (*entity !=
';'))
1469 for (i=0;
byte != 0;
byte/=2)
1472 *xml=(char) ((0xFF << (7-i)) | (c >> (6*i)));
1477 *xml=(char) (0x80 | ((c >> (6*i)) & 0x3F));
1481 (void) memmove(xml,strchr(xml,
';')+1,strlen(strchr(xml,
';')));
1484 if (((*xml ==
'&') && ((state ==
'&') || (state ==
' ') ||
1485 (state ==
'*'))) || ((state ==
'%') && (*xml ==
'%')))
1491 while ((entities[i] != (
char *) NULL) &&
1492 (strncmp(xml+1,entities[i],strlen(entities[i])) != 0))
1494 if (entities[i++] == (
char *) NULL)
1497 if (entities[i] != (
char *) NULL)
1502 length=strlen(entities[i]);
1503 entity=strchr(xml,
';');
1504 if ((entity != (
char *) NULL) &&
1505 ((length-1L) >= (
size_t) (entity-xml)))
1507 offset=(ssize_t) (xml-p);
1508 extent=(size_t) (offset+length+strlen(entity));
1520 sizeof(*extent_xml));
1521 if (extent_xml != (
char *) NULL)
1523 memset(extent_xml,0,extent*
1524 sizeof(*extent_xml));
1526 sizeof(*extent_xml));
1530 if (p == (
char *) NULL)
1532 "unable to acquire string `%s'");
1534 entity=strchr(xml,
';');
1536 if (entity != (
char *) NULL)
1537 (void) memmove(xml+length,entity+1,strlen(entity));
1538 (void) strncpy(xml,entities[i],length);
1542 if (((state ==
' ') || (state ==
'*')) &&
1543 (isspace((
int) ((
unsigned char) *xml)) != 0))
1553 for (xml=p; *xml !=
'\0'; xml++)
1558 i=(ssize_t) strspn(xml,accept);
1560 (void) memmove(xml,xml+i,strlen(xml+i)+1);
1561 while ((*xml !=
'\0') && (*xml !=
' '))
1567 if ((xml >= p) && (*xml ==
' '))
1579 xml_info=root->
node;
1580 if ((xml_info == (
XMLTreeInfo *) NULL) || (xml_info->
tag == (
char *) NULL) ||
1585 if ((xml_info->
content != (
char *) NULL) && (*xml_info->
content !=
'\0'))
1592 if (xml_info->
content != (
char *) NULL)
1602 (root->
node->
tag == (
char *) NULL) || (strcmp(tag,root->
node->
tag) != 0))
1605 "unexpected closing tag </%s>",tag);
1606 return(&root->
root);
1613 const size_t depth,
char **entities)
1625 while ((*xml !=
'\0') && (*xml !=
'&'))
1629 if (strncmp(xml+1,tag,strlen(tag)) == 0)
1632 while ((entities[i] != (
char *) NULL) &&
1633 (strncmp(entities[i],xml+1,strlen(entities[i])) == 0))
1635 if ((entities[i] != (
char *) NULL) &&
1662 if (strcmp(target,
"xml") == 0)
1664 xml=strstr(xml,
"standalone");
1665 if ((xml != (
char *) NULL) &&
1666 (strncmp(xml+strspn(xml+10,
XMLWhitespace "='\"")+10,
"yes",3) == 0))
1714 root->
root.
tag != (
char *) NULL ?
">" :
"<",2);
1726 **predefined_entitites,
1739 if (predefined_entitites == (
char **) NULL)
1742 "memory allocation failed `%s'",strerror(errno));
1746 for (xml[length]=
'\0'; xml != (
char *) NULL; )
1748 while ((*xml !=
'\0') && (*xml !=
'<') && (*xml !=
'%'))
1752 if ((strlen(xml) > 9) && (strncmp(xml,
"<!ENTITY",8) == 0))
1762 if ((isalpha((
int) ((
unsigned char) *n)) == 0) && (*n !=
'_'))
1771 if ((q !=
'"') && (q !=
'\''))
1776 xml=strchr(xml,
'>');
1779 entities=(*c ==
'%') ? predefined_entitites : root->
entities;
1780 for (i=0; entities[i] != (
char *) NULL; i++) ;
1783 if (entities == (
char **) NULL)
1785 "unable to acquire string `%s'");
1787 predefined_entitites=entities;
1793 if (xml != (
char *) NULL)
1799 entities[i+2]=(
char *) NULL;
1804 if (entities[i+1] != v)
1809 predefined_entitites);
1814 if (strncmp(xml,
"<!ATTLIST",9) == 0)
1825 predefined_entitites);
1833 while ((root->
attributes[i] != (
char **) NULL) &&
1834 (n != (
char *) NULL) &&
1848 predefined_entitites);
1852 c=(
char *) (strncmp(xml,
"CDATA",5) != 0 ?
"*" :
" ");
1853 if (strncmp(xml,
"NOTATION",8) == 0)
1855 xml=(*xml ==
'(') ? strchr(xml,
')') : xml+
1857 if (xml == (
char *) NULL)
1862 predefined_entitites);
1866 if (strncmp(xml,
"#FIXED",6) == 0)
1876 if (((*xml ==
'"') || (*xml ==
'\'')) &&
1877 ((xml=strchr(v=xml+1,*xml)) != (
char *) NULL))
1884 predefined_entitites);
1901 "unable to acquire string `%s'");
1906 "unable to acquire string `%s'");
1911 for (j=1; root->
attributes[i][j] != (
char *) NULL; j+=3) ;
1916 "unable to acquire string `%s'");
1920 if (v != (
char *) NULL)
1926 if (strncmp(xml,
"<!--", 4) == 0)
1927 xml=strstr(xml+4,
"-->");
1929 if (strncmp(xml,
"<?", 2) == 0)
1933 if (xml != (
char *) NULL)
1941 xml=strchr(xml,
'>');
1955 xml_info=root->
node;
1956 if (xml_info->
tag == (
char *) NULL)
1962 root->
node=xml_info;
2023 if ((xml == (
const char *) NULL) || (strlen(xml) == 0))
2026 "root tag missing");
2032 if (utf8 == (
char *) NULL)
2035 "memory allocation failed `%s'",strerror(errno));
2038 terminal=utf8[length-1];
2039 utf8[length-1]=
'\0';
2041 while ((*p !=
'\0') && (*p !=
'<'))
2046 "root tag missing");
2050 attribute=(
char **) NULL;
2058 if ((isalpha((
int) ((
unsigned char) *p)) != 0) || (*p ==
'_') ||
2059 (*p ==
':') || (c <
'\0'))
2069 return(&root->
root);
2072 while (isspace((
int) ((
unsigned char) *p)) != 0)
2074 if (((isalpha((
int) ((
unsigned char) *p)) != 0) || (*p ==
'_')) &&
2075 (ignore_depth == 0))
2077 if ((*p !=
'\0') && (*p !=
'/') && (*p !=
'>'))
2083 while ((root->
attributes[i] != (
char **) NULL) &&
2088 for (l=0; (*p !=
'\0') && (*p !=
'/') && (*p !=
'>'); l+=2)
2095 sizeof(*attributes));
2098 (l+4),
sizeof(*attributes));
2099 if (attributes == (
char **) NULL)
2105 return(&root->
root);
2107 attributes[l+2]=(
char *) NULL;
2108 attributes[l+1]=(
char *) NULL;
2111 if ((*p !=
'=') && (isspace((
int) ((
unsigned char) *p)) == 0))
2118 if ((c ==
'"') || (c ==
'\''))
2125 while ((*p !=
'\0') && (*p != c))
2137 return(&root->
root);
2140 while ((attribute != (
char **) NULL) &&
2141 (attribute[j] != (
char *) NULL) &&
2142 (strcmp(attribute[j],attributes[l]) != 0))
2145 root->
entities,(attribute != (
char **) NULL) &&
2146 (attribute[j] != (
char *) NULL) ? *attribute[j+2] :
2151 while (isspace((
int) ((
unsigned char) *p)) != 0)
2157 while((*p !=
'\0') && (*p !=
'/') && (*p !=
'>'))
2166 if (((*p !=
'\0') && (*p !=
'>')) ||
2167 ((*p ==
'\0') && (terminal !=
'>')))
2174 return(&root->
root);
2187 if ((*p ==
'>') || ((*p ==
'\0') && (terminal ==
'>')))
2206 return(&root->
root);
2219 if ((c ==
'\0') && (terminal !=
'>'))
2224 return(&root->
root);
2227 if ((ignore_depth == 0) &&
2231 return(&root->
root);
2233 if (ignore_depth > 0)
2236 if (isspace((
int) ((
unsigned char) *p)) != 0)
2240 if (strncmp(p,
"!--",3) == 0)
2246 if ((p == (
char *) NULL) || ((*(p+=2) !=
'>') && (*p !=
'\0')) ||
2247 ((*p ==
'\0') && (terminal !=
'>')))
2252 return(&root->
root);
2256 if (strncmp(p,
"![CDATA[",8) == 0)
2262 if (p != (
char *) NULL)
2265 if (ignore_depth == 0)
2273 return(&root->
root);
2277 if (strncmp(p,
"!DOCTYPE",8) == 0)
2282 for (l=0; (*p !=
'\0') && (((l == 0) && (*p !=
'>')) ||
2283 ((l != 0) && ((*p !=
']') ||
2285 l=(ssize_t) ((*p ==
'[') ? 1 : l))
2286 p+=strcspn(p+1,
"[]>")+1;
2287 if ((*p ==
'\0') && (terminal !=
'>'))
2292 return(&root->
root);
2295 tag=strchr(tag,
'[')+1;
2303 return(&root->
root);
2317 if (p == (
char *) NULL)
2320 }
while ((*p !=
'\0') && (*p !=
'>'));
2321 if ((p == (
char *) NULL) || ((*p ==
'\0') &&
2327 return(&root->
root);
2336 return(&root->
root);
2338 if ((p == (
char *) NULL) || (*p ==
'\0'))
2342 if ((*p !=
'\0') && (*p !=
'<'))
2347 while ((*p !=
'\0') && (*p !=
'<'))
2351 if (ignore_depth == 0)
2360 return(&root->
root);
2361 if (root->
node->
tag == (
char *) NULL)
2364 "root tag missing");
2365 return(&root->
root);
2368 "unclosed tag: `%s'",root->
node->
tag);
2369 return(&root->
root);
2399 "lt;",
"<",
"gt;",
">",
"quot;",
""",
2400 "apos;",
"'",
"amp;",
"&", (
char *) NULL
2409 (void) memset(root,0,
sizeof(*root));
2411 if (tag != (
char *) NULL)
2416 if (root->
entities == (
char **) NULL)
2418 (void) memcpy(root->
entities,predefined_entities,
sizeof(predefined_entities));
2424 return(&root->
root);
2464 if (node == xml_info)
2468 while (node->
ordered != xml_info)
2472 if (strcmp(node->
tag,xml_info->
tag) != 0)
2476 if (node->
sibling != xml_info)
2483 (node->
next != xml_info))
2524 const char *tag,
const char *value)
2537 while ((xml_info->
attributes[i] != (
char *) NULL) &&
2540 if (xml_info->
attributes[i] == (
char *) NULL)
2545 if (value == (
const char *) NULL)
2566 for (j=i; xml_info->
attributes[j] != (
char *) NULL; j+=2) ;
2567 if (xml_info->
attributes[i+1] != (
char *) NULL)
2569 if (value != (
const char *) NULL)
2574 if (xml_info->
attributes[i] != (
char *) NULL)
2579 (
size_t) (j+2),
sizeof(*xml_info->
attributes));
2584 (i/2)+1,(
size_t) (((j+2)/2)-(i/2))*
sizeof(**xml_info->
attributes));
2615 const char *content)
2621 if (xml_info->
content != (
char *) NULL)
2664 content[offset]=
'\0';
2668 if (canonical_content == (
char *) NULL)
2669 return(*destination);
2674 sizeof(**destination));
2675 if (*destination == (
char *) NULL)
2676 return(*destination);
2681 return(*destination);
2685 size_t *extent,
size_t start,
char ***attributes)
2702 content=(
char *)
"";
2712 if (*source == (
char *) NULL)
2725 if (*source == (
char *) NULL)
2726 return((
char *) NULL);
2735 while ((attributes[i] != (
char **) NULL) &&
2736 (strcmp(attributes[i][0],xml_info->
tag) != 0))
2739 while ((attributes[i] != (
char **) NULL) &&
2740 (attributes[i][j] != (
char *) NULL))
2742 if ((attributes[i][j+1] == (
char *) NULL) ||
2752 if (*source == (
char *) NULL)
2753 return((
char *) NULL);
2773 if (*source == (
char *) NULL)
2774 return((
char *) NULL);
2776 if (*xml_info->
content !=
'\0')
2779 while ((offset < xml_info->offset) && (content[offset] !=
'\0'))
2821 if (xml_info->
tag == (
char *) NULL)
2822 return((
char *) NULL);
2838 for (j=1; p != (
char *) NULL; j++)
2850 if (xml == (
char *) NULL)
2854 *p !=
'\0' ?
" " :
"",p);
2872 for (j=1; p != (
char *) NULL; j++)
2884 if (xml == (
char *) NULL)
2888 *p !=
'\0' ?
" " :
"",p);
WizardExport WizardBooleanType UnmapBlob(void *map, const size_t length)
WizardExport void * MapBlob(int file, const MapMode mode, const WizardOffsetType offset, const size_t length)
#define WizardMaxBufferExtent
#define WizardAssert(domain, predicate)
#define ThrowFatalException(severity, tag)
WizardExport WizardBooleanType ThrowWizardException(ExceptionInfo *exception, const char *module, const char *function, const size_t line, const ExceptionType severity, const char *format,...)
WizardExport ssize_t FormatLocaleString(char *string, const size_t length, const char *format,...)
WizardBooleanType LogWizardEvent(const LogEventType type, const char *module, const char *function, const size_t line, const char *format,...)
WizardExport WizardBooleanType IsEventLogging(void)
#define GetWizardModule()
WizardExport void * AcquireWizardMemory(const size_t size)
WizardExport void * AcquireQuantumMemory(const size_t count, const size_t quantum)
WizardExport void * RelinquishWizardMemory(void *memory)
WizardExport void * ResizeQuantumMemory(void *memory, const size_t count, const size_t quantum)
WizardExport WizardBooleanType AddValueToSplayTree(SplayTreeInfo *splay_tree, const void *key, const void *value)
static long StringToLong(const char *value)
WizardExport char * DestroyString(char *string)
WizardExport int LocaleCompare(const char *p, const char *q)
WizardExport char * ConstantString(const char *source)
WizardExport char * AcquireString(const char *source)
WizardExport WizardBooleanType ConcatenateString(char **destination, const char *source)
WizardExport size_t CopyWizardString(char *destination, const char *source, const size_t length)
SemaphoreInfo * semaphore
WizardBooleanType standalone
SemaphoreInfo * semaphore
char *** processing_instructions
#define WizardMaxRecursionDepth
static int open_utf8(const char *path, int flags, mode_t mode)
WizardExport char ** GetPathComponents(const char *, size_t *)
WizardExport void GetPathComponent(const char *path, PathType type, char *component)
WizardExport char * Base64Encode(const unsigned char *blob, const size_t blob_length, size_t *encode_length)
static char * ConvertUTF16ToUTF8(const char *content, size_t *length)
WizardExport const char * GetXMLTreeTag(XMLTreeInfo *xml_info)
static void ParseProcessingInstructions(XMLTreeRoot *root, char *xml, size_t length)
WizardExport XMLTreeInfo * SetXMLTreeAttribute(XMLTreeInfo *xml_info, const char *tag, const char *value)
static WizardBooleanType ValidateEntities(char *tag, char *xml, const size_t depth, char **entities)
WizardExport const char * GetXMLTreeContent(XMLTreeInfo *xml_info)
WizardExport XMLTreeInfo * SetXMLTreeContent(XMLTreeInfo *xml_info, const char *content)
static void DestroyXMLTreeOrdered(XMLTreeInfo *xml_info)
void ParseCharacterContent(XMLTreeRoot *root, char *xml, const size_t length, const char state)
WizardExport const char ** GetXMLTreeProcessingInstructions(XMLTreeInfo *xml_info, const char *target)
WizardExport XMLTreeInfo * DestroyXMLTree(XMLTreeInfo *xml_info)
WizardExport XMLTreeInfo * InsertTagIntoXMLTree(XMLTreeInfo *xml_info, XMLTreeInfo *child, const size_t offset)
#define NumberPredefinedEntities
static void ParseOpenTag(XMLTreeRoot *root, char *tag, char **attributes)
WizardExport XMLTreeInfo * AddPathToXMLTree(XMLTreeInfo *xml_info, const char *path, const size_t offset)
static void DestroyXMLTreeRoot(XMLTreeInfo *xml_info)
static char ** DestroyXMLTreeAttributes(char **attributes)
WizardExport XMLTreeInfo * GetXMLTreeSibling(XMLTreeInfo *xml_info)
static char * XMLTreeTagToXML(XMLTreeInfo *xml_info, char **source, size_t *length, size_t *extent, size_t start, char ***attributes)
WizardExport XMLTreeInfo * NewXMLTreeTag(const char *tag)
WizardExport char * XMLTreeInfoToXML(XMLTreeInfo *xml_info)
static unsigned char * ConvertLatin1ToUTF8(const unsigned char *content)
WizardExport XMLTreeInfo * PruneTagFromXMLTree(XMLTreeInfo *xml_info)
static WizardBooleanType ParseInternalDoctype(XMLTreeRoot *root, char *xml, size_t length, ExceptionInfo *exception)
static WizardBooleanType IsSkipTag(const char *tag)
WizardExport XMLTreeInfo * AddChildToXMLTree(XMLTreeInfo *xml_info, const char *tag, const size_t offset)
WizardExport XMLTreeInfo * GetXMLTreePath(XMLTreeInfo *xml_info, const char *path)
WizardPrivate char * FileToXML(const char *filename, const size_t extent)
static char * ParseEntities(char *xml, char **entities, int state)
WizardExport char * CanonicalXMLContent(const char *content, const WizardBooleanType pedantic)
WizardExport const char * GetXMLTreeAttribute(XMLTreeInfo *xml_info, const char *tag)
WizardExport XMLTreeInfo * GetXMLTreeOrdered(XMLTreeInfo *xml_info)
static XMLTreeInfo * ParseCloseTag(XMLTreeRoot *root, char *tag, ExceptionInfo *exception)
WizardExport XMLTreeInfo * NewXMLTree(const char *xml, ExceptionInfo *exception)
WizardExport XMLTreeInfo * GetNextXMLTreeTag(XMLTreeInfo *xml_info)
WizardExport WizardBooleanType GetXMLTreeAttributes(const XMLTreeInfo *xml_info, SplayTreeInfo *attributes)
WizardExport XMLTreeInfo * GetXMLTreeChild(XMLTreeInfo *xml_info, const char *tag)
static const char * ignore_tags[3]
static char * EncodePredefinedEntities(const char *source, ssize_t offset, char **destination, size_t *length, size_t *extent, WizardBooleanType pedantic)
static void DestroyXMLTreeChild(XMLTreeInfo *xml_info)