Skip to content

PCRE2 support #2857

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 45 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
94e8a0c
Path core for PCRE2 interoperation
weltling Oct 12, 2017
04218b4
Fix config.w32
weltling Oct 13, 2017
f4f94b6
Fix ret evaluation
weltling Oct 13, 2017
a1f4603
Fix config.m4
weltling Oct 14, 2017
cf9e419
Hide the pcre_cache_entry implementation
weltling Oct 14, 2017
1198ea9
Fix refcount type and add assert
weltling Oct 14, 2017
61db9ee
Fix config.h
weltling Oct 14, 2017
f944361
Add comment
weltling Oct 14, 2017
55f8a49
Fix visibility and double free
weltling Oct 14, 2017
e7a0c8a
Move to pemalloc/pefree
weltling Oct 14, 2017
d86233f
Fix datatype
weltling Oct 14, 2017
23232f9
Only need to assign stack once
weltling Oct 14, 2017
886d28c
Next refactoring round
weltling Oct 15, 2017
b803838
Implement setting extra compilation option and fix X modifier
weltling Oct 15, 2017
62e0e06
Fix JIT ini and subsequent handling
weltling Oct 15, 2017
ec63246
Rework MINFO and add version constants
weltling Oct 15, 2017
fe37d6d
Reorder pce items
weltling Oct 15, 2017
01e6852
Fix test
weltling Oct 15, 2017
0c36aa6
Add missing free
weltling Oct 15, 2017
85bbf2a
Rework comment
weltling Oct 16, 2017
80b9adc
Info table item
weltling Oct 16, 2017
4634d42
More robust PCRE2 initialization
weltling Oct 22, 2017
7151a34
Drop unused var
weltling Oct 22, 2017
fbe37cd
Fix start offset datatype and handling
weltling Oct 22, 2017
de10427
Retry PCRE2 init also in MINIT
weltling Oct 22, 2017
5c51c1d
Fix datatype
weltling Oct 22, 2017
7eda1e1
Not needed anymore with PCRE2
weltling Oct 22, 2017
5f3b8d7
Remove TODO
weltling Oct 22, 2017
34e1a35
Avoid unnecessary scoped var
weltling Oct 22, 2017
bebc1b0
Remove unused files
weltling Oct 31, 2017
14a366e
Bad UTF error is handled another way
weltling Nov 5, 2017
425c933
Check match data creation
weltling Nov 5, 2017
15e5094
More error checks
weltling Nov 5, 2017
08e0739
Error checks done and otherwise these functions return zero
weltling Nov 5, 2017
a4efe41
Missed error check
weltling Nov 5, 2017
8869ba3
Fix external PCRE2 version check
weltling Nov 9, 2017
15bb41a
Sync jit availability checks
weltling Nov 9, 2017
c9b4822
Fix symbol check for external pcre2
weltling Nov 9, 2017
0864586
Fix add library for external pcre2
weltling Nov 9, 2017
be984c0
Preallocate pcre2_match_data for offsets num <= 32
weltling Nov 13, 2017
ec68a9f
Zero global mdata after free
weltling Nov 13, 2017
cd2c26c
Check jit in pattern by flag instead of doing it on demand
dstogov Nov 13, 2017
5cc3525
Memorize match data usage
dstogov Nov 13, 2017
e110793
Don't overwrite poptions
weltling Nov 13, 2017
beabacb
Expand on preallocated match data usage
weltling Nov 13, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix start offset datatype and handling
  • Loading branch information
weltling committed Nov 9, 2017
commit fbe37cd1b933603218aef3589ea858f88e6c7fbe
36 changes: 20 additions & 16 deletions ext/pcre/php_pcre.c
Original file line number Diff line number Diff line change
Expand Up @@ -807,14 +807,14 @@ PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *preg
/* }}} */

/* {{{ add_offset_pair */
static inline void add_offset_pair(zval *result, char *str, size_t len, zend_off_t offset, char *name, uint32_t unmatched_as_null)
static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, char *name, uint32_t unmatched_as_null)
{
zval match_pair, tmp;

array_init_size(&match_pair, 2);

/* Add (match, offset) to the return value */
if (offset < 0) {
if (PCRE2_UNSET == offset) {
if (unmatched_as_null) {
ZVAL_NULL(&tmp);
} else {
Expand Down Expand Up @@ -891,6 +891,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
pcre2_match_data *match_data;
size_t jit_size;
int rc;
PCRE2_SIZE start_offset2;

ZVAL_UNDEF(&marks);

Expand Down Expand Up @@ -925,10 +926,13 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub

/* Negative offset counts from the end of the string. */
if (start_offset < 0) {
start_offset = subject_len + start_offset;
if (start_offset < 0) {
start_offset = 0;
if ((PCRE2_SIZE)-start_offset <= subject_len) {
start_offset2 = subject_len + start_offset;
} else {
start_offset2 = 0;
}
} else {
start_offset2 = (PCRE2_SIZE)start_offset;
}

/* Calculate the size of the offsets array, and allocate memory for it. */
Expand Down Expand Up @@ -973,15 +977,15 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
#ifdef HAVE_PCRE_JIT_SUPPORT
if (PCRE_G(jit) && !rc && jit_size > 0
&& no_utf_check && !g_notempty) {
if (start_offset < 0 || start_offset > subject_len) {
if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
break;
}
count = pcre2_jit_match(pce->re, subject, subject_len, start_offset,
count = pcre2_jit_match(pce->re, subject, subject_len, start_offset2,
no_utf_check|g_notempty, match_data, mctx);
} else
#endif
count = pcre2_match(pce->re, subject, subject_len, start_offset,
count = pcre2_match(pce->re, subject, subject_len, start_offset2,
no_utf_check|g_notempty, match_data, mctx);

/* the string was already proved to be valid UTF-8 */
Expand Down Expand Up @@ -1188,23 +1192,23 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
}

/* Advance to the next piece. */
start_offset = offsets[1];
start_offset2 = offsets[1];

/* If we have matched an empty string, mimic what Perl's /g options does.
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
g_notempty = (start_offset == offsets[0]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;
g_notempty = (start_offset2 == offsets[0]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;

} else if (count == PCRE2_ERROR_NOMATCH) {
/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
this is not necessarily the end. We need to advance
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
if (g_notempty != 0 && start_offset < subject_len) {
size_t unit_len = calculate_unit_length(pce, subject + start_offset);
if (g_notempty != 0 && start_offset2 < subject_len) {
size_t unit_len = calculate_unit_length(pce, subject + start_offset2);

start_offset += unit_len;
start_offset2 += unit_len;
g_notempty = 0;
} else {
break;
Expand Down Expand Up @@ -1392,7 +1396,7 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
size_t alloc_len; /* Actual allocated length */
size_t match_len; /* Length of the current match */
int backref; /* Backreference number */
zend_off_t start_offset; /* Where the new search starts */
PCRE2_SIZE start_offset; /* Where the new search starts */
uint32_t g_notempty=0; /* If the match should not be empty */
char *walkbuf, /* Location of current replacement in the result */
*walk, /* Used to walk the replacement string */
Expand Down Expand Up @@ -1630,7 +1634,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
size_t size_offsets; /* Size of the offsets array */
size_t new_len; /* Length of needed storage */
size_t alloc_len; /* Actual allocated length */
int start_offset; /* Where the new search starts */
PCRE2_SIZE start_offset; /* Where the new search starts */
uint32_t g_notempty=0; /* If the match should not be empty */
char *match, /* The current match */
*piece; /* The current piece of subject */
Expand Down Expand Up @@ -2280,7 +2284,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
size_t size_offsets; /* Size of the offsets array */
uint32_t no_utf_check = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
size_t start_offset; /* Where the new search starts */
PCRE2_SIZE start_offset; /* Where the new search starts */
PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */
uint32_t g_notempty = 0; /* If the match should not be empty */
char *last_match; /* Location of last match */
Expand Down