@@ -76,6 +76,15 @@ typedef struct {
7676 PyObject_HEAD
7777
7878 XML_Parser itself ;
79+ /*
80+ * Strong reference to a parent `xmlparseobject` if this parser
81+ * is a child parser. Set to NULL if this parser is a root parser.
82+ * This is needed to keep the parent parser alive as long as it has
83+ * at least one child parser.
84+ *
85+ * See https://github.com/python/cpython/issues/139400 for details.
86+ */
87+ PyObject * parent ;
7988 int ordered_attributes ; /* Return attributes as a list. */
8089 int specified_attributes ; /* Report only specified attributes. */
8190 int in_callback ; /* Is a callback active? */
@@ -1067,6 +1076,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10671076 return NULL ;
10681077 }
10691078
1079+ // The new subparser will make use of the parent XML_Parser inside of Expat.
1080+ // So we need to take subparsers into account with the reference counting
1081+ // of their parent parser.
1082+ Py_INCREF (self );
1083+
10701084 new_parser -> buffer_size = self -> buffer_size ;
10711085 new_parser -> buffer_used = 0 ;
10721086 new_parser -> buffer = NULL ;
@@ -1076,18 +1090,21 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10761090 new_parser -> ns_prefixes = self -> ns_prefixes ;
10771091 new_parser -> itself = XML_ExternalEntityParserCreate (self -> itself , context ,
10781092 encoding );
1093+ new_parser -> parent = (PyObject * )self ;
10791094 new_parser -> handlers = 0 ;
10801095 new_parser -> intern = Py_XNewRef (self -> intern );
10811096
10821097 if (self -> buffer != NULL ) {
10831098 new_parser -> buffer = PyMem_Malloc (new_parser -> buffer_size );
10841099 if (new_parser -> buffer == NULL ) {
10851100 Py_DECREF (new_parser );
1101+ Py_DECREF (self );
10861102 return PyErr_NoMemory ();
10871103 }
10881104 }
10891105 if (!new_parser -> itself ) {
10901106 Py_DECREF (new_parser );
1107+ Py_DECREF (self );
10911108 return PyErr_NoMemory ();
10921109 }
10931110
@@ -1101,6 +1118,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
11011118 new_parser -> handlers = PyMem_New (PyObject * , i );
11021119 if (!new_parser -> handlers ) {
11031120 Py_DECREF (new_parser );
1121+ Py_DECREF (self );
11041122 return PyErr_NoMemory ();
11051123 }
11061124 clear_handlers (new_parser , 1 );
@@ -1481,6 +1499,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
14811499 /* namespace_separator is either NULL or contains one char + \0 */
14821500 self -> itself = XML_ParserCreate_MM (encoding , & ExpatMemoryHandler ,
14831501 namespace_separator );
1502+ self -> parent = NULL ;
14841503 if (self -> itself == NULL ) {
14851504 PyErr_SetString (PyExc_RuntimeError ,
14861505 "XML_ParserCreate failed" );
@@ -1517,6 +1536,7 @@ xmlparse_traverse(PyObject *op, visitproc visit, void *arg)
15171536 for (size_t i = 0 ; handler_info [i ].name != NULL ; i ++ ) {
15181537 Py_VISIT (self -> handlers [i ]);
15191538 }
1539+ Py_VISIT (self -> parent );
15201540 Py_VISIT (Py_TYPE (op ));
15211541 return 0 ;
15221542}
@@ -1527,6 +1547,10 @@ xmlparse_clear(PyObject *op)
15271547 xmlparseobject * self = xmlparseobject_CAST (op );
15281548 clear_handlers (self , 0 );
15291549 Py_CLEAR (self -> intern );
1550+ // NOTE: We cannot call Py_CLEAR(self->parent) prior to calling
1551+ // XML_ParserFree(self->itself), or a subparser could lose its parent
1552+ // XML_Parser while still making use of it internally.
1553+ // https://github.com/python/cpython/issues/139400
15301554 return 0 ;
15311555}
15321556
@@ -1540,6 +1564,7 @@ xmlparse_dealloc(PyObject *op)
15401564 XML_ParserFree (self -> itself );
15411565 }
15421566 self -> itself = NULL ;
1567+ Py_CLEAR (self -> parent );
15431568
15441569 if (self -> handlers != NULL ) {
15451570 PyMem_Free (self -> handlers );
0 commit comments