Skip to content

Commit 6d2bc72

Browse files
committed
Fix #80268: loadHTML() truncates at NUL bytes
libxml2 has no particular issues parsing HTML strings with NUL bytes; these just cause truncation of the current text content, but parsing continues generally. Since `::loadHTMLFile()` already supports NUL bytes, `::loadHTML()` should as well. Note that this is different from XML, which does not allow any NUL bytes. Closes phpGH-6368.
1 parent 824cbc2 commit 6d2bc72

File tree

3 files changed

+27
-1
lines changed

3 files changed

+27
-1
lines changed

NEWS

+3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ PHP NEWS
1111
- COM:
1212
. Fixed bug #62474 (com_event_sink crashes on certain arguments). (cmb)
1313

14+
- DOM:
15+
. Fixed bug #80268 (loadHTML() truncates at NUL bytes). (cmb)
16+
1417
- IMAP:
1518
. Fixed bug #64076 (imap_sort() does not return FALSE on failure). (cmb)
1619
. Fixed bug #76618 (segfault on imap_reopen). (girgias)

ext/dom/document.c

-1
Original file line numberDiff line numberDiff line change
@@ -2024,7 +2024,6 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
20242024
}
20252025
ctxt = htmlCreateFileParserCtxt(source, NULL);
20262026
} else {
2027-
source_len = xmlStrlen((xmlChar *) source);
20282027
if (ZEND_SIZE_T_INT_OVFL(source_len)) {
20292028
php_error_docref(NULL, E_WARNING, "Input string is too long");
20302029
RETURN_FALSE;

ext/dom/tests/bug80268.phpt

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
--TEST--
2+
Bug #80268 (loadHTML() truncates at NUL bytes)
3+
--SKIPIF--
4+
<?php require_once('skipif.inc'); ?>
5+
--FILE--
6+
<?php
7+
$doc = new DOMDocument;
8+
$doc->loadHTML("<p>foo\0bar</p>");
9+
$html = $doc->saveHTML();
10+
var_dump(strpos($html, '<p>foo</p>') !== false);
11+
12+
file_put_contents(__DIR__ . '/80268.html', "<p>foo\0bar</p>");
13+
$doc = new DOMDocument;
14+
$doc->loadHTMLFile(__DIR__ . '/80268.html');
15+
$html = $doc->saveHTML();
16+
var_dump(strpos($html, '<p>foo</p>') !== false);
17+
?>
18+
--CLEAN--
19+
<?php
20+
unlink(__DIR__ . '/80268.html');
21+
?>
22+
--EXPECT--
23+
bool(true)
24+
bool(true)

0 commit comments

Comments
 (0)