1
0
mirror of https://github.com/GNOME/libxml2.git synced 2025-10-16 23:26:56 +08:00

parser: Check attribute normalization standalone constraint

To fully implement "VC: Standalone Document Declaration", we have to
check for normalization changes caused by non-CDATA attribute types
declared externally.

Fixes #119.
This commit is contained in:
Nick Wellnhofer
2025-05-20 21:57:01 +02:00
parent 682195c869
commit e4cbc295fa
2 changed files with 83 additions and 43 deletions

124
parser.c
View File

@@ -92,6 +92,12 @@
#define XML_MAX_ATTRS 100000000 /* 100 million */ #define XML_MAX_ATTRS 100000000 /* 100 million */
#define XML_SPECIAL_EXTERNAL (1 << 20)
#define XML_SPECIAL_TYPE_MASK (XML_SPECIAL_EXTERNAL - 1)
#define XML_ATTVAL_ALLOC (1 << 0)
#define XML_ATTVAL_NORM_CHANGE (1 << 1)
struct _xmlStartTag { struct _xmlStartTag {
const xmlChar *prefix; const xmlChar *prefix;
const xmlChar *URI; const xmlChar *URI;
@@ -1174,6 +1180,9 @@ xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
goto mem_error; goto mem_error;
} }
if (PARSER_EXTERNAL(ctxt))
type |= XML_SPECIAL_EXTERNAL;
if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr, if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
XML_INT_TO_PTR(type)) < 0) XML_INT_TO_PTR(type)) < 0)
goto mem_error; goto mem_error;
@@ -3814,34 +3823,36 @@ done:
* @param inSpace whitespace state * @param inSpace whitespace state
* @param depth nesting depth * @param depth nesting depth
* @param check whether to check for amplification * @param check whether to check for amplification
* @returns whether there was a normalization change
*/ */
static void static int
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf, xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
const xmlChar *str, xmlEntityPtr pent, int normalize, const xmlChar *str, xmlEntityPtr pent, int normalize,
int *inSpace, int depth, int check) { int *inSpace, int depth, int check) {
int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20; int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
int c, chunkSize; int c, chunkSize;
int normChange = 0;
if (str == NULL) if (str == NULL)
return; return(0);
depth += 1; depth += 1;
if (depth > maxDepth) { if (depth > maxDepth) {
xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
"Maximum entity nesting depth exceeded"); "Maximum entity nesting depth exceeded");
return; return(0);
} }
if (pent != NULL) { if (pent != NULL) {
if (pent->flags & XML_ENT_EXPANDING) { if (pent->flags & XML_ENT_EXPANDING) {
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
xmlHaltParser(ctxt); xmlHaltParser(ctxt);
return; return(0);
} }
if (check) { if (check) {
if (xmlParserEntityCheck(ctxt, pent->length)) if (xmlParserEntityCheck(ctxt, pent->length))
return; return(0);
} }
} }
@@ -3877,6 +3888,7 @@ xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
xmlSBufAddString(buf, str - chunkSize, chunkSize); xmlSBufAddString(buf, str - chunkSize, chunkSize);
chunkSize = 0; chunkSize = 0;
} }
normChange = 1;
} else if (c < 0x20) { } else if (c < 0x20) {
if (chunkSize > 0) { if (chunkSize > 0) {
xmlSBufAddString(buf, str - chunkSize, chunkSize); xmlSBufAddString(buf, str - chunkSize, chunkSize);
@@ -3911,7 +3923,9 @@ xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
} }
if (val == ' ') { if (val == ' ') {
if ((!normalize) || (!*inSpace)) if ((normalize) && (*inSpace))
normChange = 1;
else
xmlSBufAddCString(buf, " ", 1); xmlSBufAddCString(buf, " ", 1);
*inSpace = 1; *inSpace = 1;
} else { } else {
@@ -3951,8 +3965,8 @@ xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
} else if ((ent != NULL) && (ent->content != NULL)) { } else if ((ent != NULL) && (ent->content != NULL)) {
if (pent != NULL) if (pent != NULL)
pent->flags |= XML_ENT_EXPANDING; pent->flags |= XML_ENT_EXPANDING;
xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent, normChange |= xmlExpandEntityInAttValue(ctxt, buf,
normalize, inSpace, depth, check); ent->content, ent, normalize, inSpace, depth, check);
if (pent != NULL) if (pent != NULL)
pent->flags &= ~XML_ENT_EXPANDING; pent->flags &= ~XML_ENT_EXPANDING;
} }
@@ -3961,6 +3975,8 @@ xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
if (chunkSize > 0) if (chunkSize > 0)
xmlSBufAddString(buf, str - chunkSize, chunkSize); xmlSBufAddString(buf, str - chunkSize, chunkSize);
return(normChange);
} }
/** /**
@@ -4022,23 +4038,25 @@ xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
* *
* @param ctxt an XML parser context * @param ctxt an XML parser context
* @param attlen attribute len result * @param attlen attribute len result
* @param alloc whether the attribute was reallocated as a new string * @param outFlags resulting XML_ATTVAL_* flags
* @param normalize if 1 then further non-CDATA normalization must be done * @param special value from attsSpecial
* @param isNamespace whether this is a namespace declaration * @param isNamespace whether this is a namespace declaration
* @returns the AttValue parsed or NULL. The value has to be freed by the * @returns the AttValue parsed or NULL. The value has to be freed by the
* caller if it was copied, this can be detected by val[*len] == 0. * caller if it was copied, this can be detected by val[*len] == 0.
*/ */
static xmlChar * static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc, xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
int normalize, int isNamespace) { int special, int isNamespace) {
unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ? unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
XML_MAX_HUGE_LENGTH : XML_MAX_HUGE_LENGTH :
XML_MAX_TEXT_LENGTH; XML_MAX_TEXT_LENGTH;
xmlSBuf buf; xmlSBuf buf;
xmlChar *ret; xmlChar *ret;
int c, l, quote, flags, chunkSize; int c, l, quote, entFlags, chunkSize;
int inSpace = 1; int inSpace = 1;
int replaceEntities; int replaceEntities;
int normalize = (special & XML_SPECIAL_TYPE_MASK) != 0;
int attvalFlags = 0;
/* Always expand namespace URIs */ /* Always expand namespace URIs */
replaceEntities = (ctxt->replaceEntities) || (isNamespace); replaceEntities = (ctxt->replaceEntities) || (isNamespace);
@@ -4055,9 +4073,9 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
NEXTL(1); NEXTL(1);
if (ctxt->inSubset == 0) if (ctxt->inSubset == 0)
flags = XML_ENT_CHECKED | XML_ENT_VALIDATED; entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
else else
flags = XML_ENT_VALIDATED; entFlags = XML_ENT_VALIDATED;
inSpace = 1; inSpace = 1;
chunkSize = 0; chunkSize = 0;
@@ -4123,6 +4141,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize); xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
chunkSize = 0; chunkSize = 0;
} }
attvalFlags |= XML_ATTVAL_NORM_CHANGE;
} else if (c < 0x20) { } else if (c < 0x20) {
/* Convert to space */ /* Convert to space */
if (chunkSize > 0) { if (chunkSize > 0) {
@@ -4162,7 +4181,9 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
xmlSBufAddCString(&buf, "&#38;", 5); xmlSBufAddCString(&buf, "&#38;", 5);
inSpace = 0; inSpace = 0;
} else if (val == ' ') { } else if (val == ' ') {
if ((!normalize) || (!inSpace)) if ((normalize) && (inSpace))
attvalFlags |= XML_ATTVAL_NORM_CHANGE;
else
xmlSBufAddCString(&buf, " ", 1); xmlSBufAddCString(&buf, " ", 1);
inSpace = 1; inSpace = 1;
} else { } else {
@@ -4198,11 +4219,12 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
xmlSBufAddString(&buf, ent->content, ent->length); xmlSBufAddString(&buf, ent->content, ent->length);
inSpace = 0; inSpace = 0;
} else if (replaceEntities) { } else if (replaceEntities) {
xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent, if (xmlExpandEntityInAttValue(ctxt, &buf,
normalize, &inSpace, ctxt->inputNr, ent->content, ent, normalize, &inSpace, ctxt->inputNr,
/* check */ 1); /* check */ 1) > 0)
attvalFlags |= XML_ATTVAL_NORM_CHANGE;
} else { } else {
if ((ent->flags & flags) != flags) if ((ent->flags & entFlags) != entFlags)
xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr); xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
if (xmlParserEntityCheck(ctxt, ent->expandedSize)) { if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
@@ -4222,14 +4244,15 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
} }
} }
if ((buf.mem == NULL) && (alloc != NULL)) { if ((buf.mem == NULL) && (outFlags != NULL)) {
ret = (xmlChar *) CUR_PTR - chunkSize; ret = (xmlChar *) CUR_PTR - chunkSize;
if (attlen != NULL) if (attlen != NULL)
*attlen = chunkSize; *attlen = chunkSize;
if ((normalize) && (inSpace) && (chunkSize > 0)) if ((normalize) && (inSpace) && (chunkSize > 0)) {
attvalFlags |= XML_ATTVAL_NORM_CHANGE;
*attlen -= 1; *attlen -= 1;
*alloc = 0; }
/* Report potential error */ /* Report potential error */
xmlSBufCleanup(&buf, ctxt, "AttValue length too long"); xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
@@ -4237,19 +4260,23 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
if (chunkSize > 0) if (chunkSize > 0)
xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize); xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
if ((normalize) && (inSpace) && (buf.size > 0)) if ((normalize) && (inSpace) && (buf.size > 0)) {
attvalFlags |= XML_ATTVAL_NORM_CHANGE;
buf.size--; buf.size--;
}
ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long"); ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
attvalFlags |= XML_ATTVAL_ALLOC;
if (ret != NULL) { if (ret != NULL) {
if (attlen != NULL) if (attlen != NULL)
*attlen = buf.size; *attlen = buf.size;
if (alloc != NULL)
*alloc = 1;
} }
} }
if (outFlags != NULL)
*outFlags = attvalFlags;
NEXTL(1); NEXTL(1);
return(ret); return(ret);
@@ -8505,8 +8532,9 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
xmlHashedString hname; xmlHashedString hname;
const xmlChar *prefix, *name; const xmlChar *prefix, *name;
xmlChar *val = NULL, *internal_val = NULL; xmlChar *val = NULL, *internal_val = NULL;
int normalize = 0; int special = 0;
int isNamespace; int isNamespace;
int flags;
*value = NULL; *value = NULL;
GROW; GROW;
@@ -8523,34 +8551,46 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
* get the type if needed * get the type if needed
*/ */
if (ctxt->attsSpecial != NULL) { if (ctxt->attsSpecial != NULL) {
int type; special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
type = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
prefix, name)); prefix, name));
if (type != 0)
normalize = 1;
} }
/* /*
* read the value * read the value
*/ */
SKIP_BLANKS; SKIP_BLANKS;
if (RAW == '=') { if (RAW != '=') {
NEXT;
SKIP_BLANKS;
isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
(prefix == ctxt->str_xmlns));
val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
isNamespace);
if (val == NULL)
goto error;
} else {
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
"Specification mandates value for attribute %s\n", "Specification mandates value for attribute %s\n",
name); name);
goto error; goto error;
} }
NEXT;
SKIP_BLANKS;
flags = 0;
isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
(prefix == ctxt->str_xmlns));
val = xmlParseAttValueInternal(ctxt, len, &flags, special,
isNamespace);
if (val == NULL)
goto error;
*alloc = (flags & XML_ATTVAL_ALLOC) != 0;
#ifdef LIBXML_VALID_ENABLED
if ((ctxt->validate) &&
(ctxt->standalone) &&
(special & XML_SPECIAL_EXTERNAL) &&
(flags & XML_ATTVAL_NORM_CHANGE)) {
xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
"standalone: normalization of attribute %s on %s "
"by external subset declaration\n",
name, elem);
}
#endif
if (prefix == ctxt->str_xml) { if (prefix == ctxt->str_xml) {
/* /*
* Check that xml:lang conforms to the specification * Check that xml:lang conforms to the specification

View File

@@ -29,7 +29,7 @@
static FILE *logfile = NULL; static FILE *logfile = NULL;
static int verbose = 0; static int verbose = 0;
#define NB_EXPECTED_ERRORS 13 #define NB_EXPECTED_ERRORS 5
const char *skipped_tests[] = { const char *skipped_tests[] = {