1
0
mirror of https://github.com/GNOME/libxml2.git synced 2025-05-07 20:39:30 +08:00

parser: Make undeclared entities in XML content fatal

When parsing XML content with functions like xmlParseBalancedChunk or
xmlParseInNodeContext, make undeclared entities always a fatal error to
match 2.13 behavior.

This was deliberately changed in 4f329dc5, probably to make the tests
pass.

Should fix #895.
This commit is contained in:
Nick Wellnhofer 2025-04-25 11:41:50 +02:00
parent 097fe87f1b
commit 72906f161c
3 changed files with 63 additions and 7 deletions

View File

@ -12364,13 +12364,6 @@ xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
{
xmlCtxtInitializeLate(ctxt);
/*
* This hack lowers the error level of undeclared entities
* from XML_ERR_FATAL (well-formedness error) to XML_ERR_ERROR
* or XML_ERR_WARNING.
*/
ctxt->hasExternalSubset = 1;
/*
* initialize the SAX2 namespaces stack
*/

View File

@ -2355,6 +2355,14 @@ testParseContent(xmlParserCtxtPtr ctxt, xmlDocPtr doc, const char *filename) {
char *content, *roundTrip;
int ret = 0;
/*
* Undeclared entities aren't a fatal error if there's an
* external DTD. When parsing content, we assume there's no
* DTD, so the undeclared entity test would fail.
*/
if (strcmp(filename, "./test/undeclared-entity.xml") == 0)
return 0;
if (ctxt->html) {
xmlNodePtr cur;

View File

@ -17,6 +17,13 @@
#include <string.h>
#ifdef LIBXML_SAX1_ENABLED
static void
ignoreError(void *ctxt ATTRIBUTE_UNUSED,
const xmlError *error ATTRIBUTE_UNUSED) {
}
#endif
static int
testNewDocNode(void) {
xmlNodePtr node;
@ -128,6 +135,53 @@ testCFileIO(void) {
return err;
}
/*
* The exact rules when undeclared entities are a fatal error
* depend on some conditions that aren't recovered from the
* context document when parsing XML content. This test case
* demonstrates such an asymmetry.
*/
static int
testUndeclEntInContent(void) {
const char xml[] = "<!DOCTYPE doc SYSTEM 'my.dtd'><doc>&undecl;</doc>";
const char content[] = "<doc>&undecl;</doc>";
xmlDocPtr doc;
xmlNodePtr root, list;
int options = XML_PARSE_NOENT | XML_PARSE_NOERROR;
int err = 0;
int res;
/* Parsing the document succeeds because of the external DTD. */
doc = xmlReadDoc(BAD_CAST xml, NULL, NULL, options);
root = xmlDocGetRootElement(doc);
/* Parsing content fails. */
res = xmlParseInNodeContext(root, content, sizeof(content) - 1, options,
&list);
if (res != XML_ERR_UNDECLARED_ENTITY || list != NULL) {
fprintf(stderr, "Wrong result from xmlParseInNodeContext\n");
err = 1;
}
xmlFreeNodeList(list);
#ifdef LIBXML_SAX1_ENABLED
xmlSetStructuredErrorFunc(NULL, ignoreError);
res = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, BAD_CAST content,
&list);
if (res != XML_ERR_UNDECLARED_ENTITY || list != NULL) {
fprintf(stderr, "Wrong result from xmlParseBalancedChunkMemory\n");
err = 1;
}
xmlFreeNodeList(list);
xmlSetStructuredErrorFunc(NULL, NULL);
#endif /* LIBXML_SAX1_ENABLED */
xmlFreeDoc(doc);
return err;
}
#ifdef LIBXML_VALID_ENABLED
static void
testSwitchDtdExtSubset(void *vctxt, const xmlChar *name ATTRIBUTE_UNUSED,
@ -1219,6 +1273,7 @@ main(void) {
err |= testUnsupportedEncoding();
err |= testNodeGetContent();
err |= testCFileIO();
err |= testUndeclEntInContent();
#ifdef LIBXML_VALID_ENABLED
err |= testSwitchDtd();
#endif