Skip to content

PCRE2 support #2857

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 45 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
94e8a0c
Path core for PCRE2 interoperation
weltling Oct 12, 2017
04218b4
Fix config.w32
weltling Oct 13, 2017
f4f94b6
Fix ret evaluation
weltling Oct 13, 2017
a1f4603
Fix config.m4
weltling Oct 14, 2017
cf9e419
Hide the pcre_cache_entry implementation
weltling Oct 14, 2017
1198ea9
Fix refcount type and add assert
weltling Oct 14, 2017
61db9ee
Fix config.h
weltling Oct 14, 2017
f944361
Add comment
weltling Oct 14, 2017
55f8a49
Fix visibility and double free
weltling Oct 14, 2017
e7a0c8a
Move to pemalloc/pefree
weltling Oct 14, 2017
d86233f
Fix datatype
weltling Oct 14, 2017
23232f9
Only need to assign stack once
weltling Oct 14, 2017
886d28c
Next refactoring round
weltling Oct 15, 2017
b803838
Implement setting extra compilation option and fix X modifier
weltling Oct 15, 2017
62e0e06
Fix JIT ini and subsequent handling
weltling Oct 15, 2017
ec63246
Rework MINFO and add version constants
weltling Oct 15, 2017
fe37d6d
Reorder pce items
weltling Oct 15, 2017
01e6852
Fix test
weltling Oct 15, 2017
0c36aa6
Add missing free
weltling Oct 15, 2017
85bbf2a
Rework comment
weltling Oct 16, 2017
80b9adc
Info table item
weltling Oct 16, 2017
4634d42
More robust PCRE2 initialization
weltling Oct 22, 2017
7151a34
Drop unused var
weltling Oct 22, 2017
fbe37cd
Fix start offset datatype and handling
weltling Oct 22, 2017
de10427
Retry PCRE2 init also in MINIT
weltling Oct 22, 2017
5c51c1d
Fix datatype
weltling Oct 22, 2017
7eda1e1
Not needed anymore with PCRE2
weltling Oct 22, 2017
5f3b8d7
Remove TODO
weltling Oct 22, 2017
34e1a35
Avoid unnecessary scoped var
weltling Oct 22, 2017
bebc1b0
Remove unused files
weltling Oct 31, 2017
14a366e
Bad UTF error is handled another way
weltling Nov 5, 2017
425c933
Check match data creation
weltling Nov 5, 2017
15e5094
More error checks
weltling Nov 5, 2017
08e0739
Error checks done and otherwise these functions return zero
weltling Nov 5, 2017
a4efe41
Missed error check
weltling Nov 5, 2017
8869ba3
Fix external PCRE2 version check
weltling Nov 9, 2017
15bb41a
Sync jit availability checks
weltling Nov 9, 2017
c9b4822
Fix symbol check for external pcre2
weltling Nov 9, 2017
0864586
Fix add library for external pcre2
weltling Nov 9, 2017
be984c0
Preallocate pcre2_match_data for offsets num <= 32
weltling Nov 13, 2017
ec68a9f
Zero global mdata after free
weltling Nov 13, 2017
cd2c26c
Check jit in pattern by flag instead of doing it on demand
dstogov Nov 13, 2017
5cc3525
Memorize match data usage
dstogov Nov 13, 2017
e110793
Don't overwrite poptions
weltling Nov 13, 2017
beabacb
Expand on preallocated match data usage
weltling Nov 13, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Next refactoring round
Don't refetch contexts in the loop

More datatype fixes
  • Loading branch information
weltling committed Nov 9, 2017
commit 886d28c8f7e8df3a2eaf1b61a9f721dc89182177
9 changes: 6 additions & 3 deletions ext/opcache/zend_accelerator_blacklist.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ static void zend_accel_blacklist_update_regexp(zend_blacklist *blacklist)
PCRE2_SIZE pcre_error_offset;
zend_regexp_list **regexp_list_it, *it;
char regexp[12*1024], *p, *end, *c, *backtrack = NULL;
pcre2_compile_context *cctx = php_pcre_cctx();

if (blacklist->pos == 0) {
/* we have no blacklist to talk about */
Expand Down Expand Up @@ -178,7 +179,7 @@ static void zend_accel_blacklist_update_regexp(zend_blacklist *blacklist)
}
it->next = NULL;

if ((it->re = pcre2_compile(regexp, PCRE2_ZERO_TERMINATED, PCRE2_NO_AUTO_CAPTURE, &errnumber, &pcre_error_offset, php_pcre_cctx())) == NULL) {
if ((it->re = pcre2_compile(regexp, PCRE2_ZERO_TERMINATED, PCRE2_NO_AUTO_CAPTURE, &errnumber, &pcre_error_offset, cctx)) == NULL) {
free(it);
pcre2_get_error_message(errnumber, pcre_error, sizeof(pcre_error));
blacklist_report_regexp_error(pcre_error, pcre_error_offset);
Expand Down Expand Up @@ -340,13 +341,15 @@ zend_bool zend_accel_blacklist_is_blacklisted(zend_blacklist *blacklist, char *v
{
int ret = 0;
zend_regexp_list *regexp_list_it = blacklist->regexp_list;
pcre2_match_context *mctx = php_pcre_mctx();
pcre2_general_context *gctx = php_pcre_gctx();

if (regexp_list_it == NULL) {
return 0;
}
while (regexp_list_it != NULL) {
pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(regexp_list_it->re, php_pcre_gctx());
int rc = pcre2_match(regexp_list_it->re, verify_path, strlen(verify_path), 0, 0, match_data, php_pcre_mctx());
pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(regexp_list_it->re, gctx);
int rc = pcre2_match(regexp_list_it->re, verify_path, strlen(verify_path), 0, 0, match_data, mctx);
if (rc >= 0) {
ret = 1;
pcre2_match_data_free(match_data);
Expand Down
66 changes: 33 additions & 33 deletions ext/pcre/php_pcre.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ static PHP_INI_MH(OnUpdateRecursionLimit)
}/*}}}*/

static PHP_INI_MH(OnUpdateJit)
{
{/*{{{*/
OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
#ifdef HAVE_PCRE_JIT_SUPPORT
if (PCRE_G(jit) && jit_stack) {
Expand All @@ -220,7 +220,7 @@ static PHP_INI_MH(OnUpdateJit)
#endif

return SUCCESS;
}
}/*}}}*/

PHP_INI_BEGIN()
STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
Expand Down Expand Up @@ -317,7 +317,7 @@ static int pcre_clean_cache(zval *data, void *arg)
/* }}} */

/* {{{ static make_subpats_table */
static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
static char **make_subpats_table(size_t num_subpats, pcre_cache_entry *pce)
{
uint32_t name_cnt = pce->name_count, name_size, ni = 0;
char *name_table;
Expand Down Expand Up @@ -371,7 +371,7 @@ static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, ch
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
{
pcre2_code *re = NULL;
int coptions = 0;
uint32_t coptions = 0;
PCRE2_UCHAR error[256];
PCRE2_SIZE erroffset;
int errnumber;
Expand All @@ -380,7 +380,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
char end_delimiter;
char *p, *pp;
char *pattern;
int poptions = 0;
uint32_t poptions = 0;
const uint8_t *tables = NULL;
pcre_cache_entry *pce;
pcre_cache_entry new_entry;
Expand Down Expand Up @@ -685,7 +685,7 @@ PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *preg
/* }}} */

/* {{{ add_offset_pair */
static inline void add_offset_pair(zval *result, char *str, size_t len, zend_off_t offset, char *name, int unmatched_as_null)
static inline void add_offset_pair(zval *result, char *str, size_t len, zend_off_t offset, char *name, uint32_t unmatched_as_null)
{
zval match_pair, tmp;

Expand Down Expand Up @@ -746,24 +746,24 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ *

/* {{{ php_pcre_match_impl() */
PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t subject_len, zval *return_value,
zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
{
zval result_set, /* Holds a set of subpatterns after
a global match */
*match_sets = NULL; /* An array of sets of matches for each
subpattern after a global match */
int no_utf_check = 0; /* Execution options */
uint32_t no_utf_check = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
int num_subpats; /* Number of captured subpatterns */
int size_offsets; /* Size of the offsets array */
size_t num_subpats; /* Number of captured subpatterns */
size_t size_offsets; /* Size of the offsets array */
int matched; /* Has anything matched */
int g_notempty = 0; /* If the match should not be empty */
uint32_t g_notempty = 0; /* If the match should not be empty */
char **subpat_names; /* Array for named subpatterns */
size_t i;
int subpats_order; /* Order of subpattern matches */
int offset_capture; /* Capture match offsets: yes/no */
int unmatched_as_null; /* Null non-matches: yes/no */
uint32_t subpats_order; /* Order of subpattern matches */
uint32_t offset_capture; /* Capture match offsets: yes/no */
uint32_t unmatched_as_null; /* Null non-matches: yes/no */
PCRE2_SPTR *mark = NULL; /* Target for MARK name */
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
pcre2_match_data *match_data;
Expand Down Expand Up @@ -1258,18 +1258,18 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex,
/* {{{ php_pcre_replace_impl() */
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
{
int no_utf_check = 0; /* Execution options */
uint32_t no_utf_check = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
char **subpat_names; /* Array for named subpatterns */
int num_subpats; /* Number of captured subpatterns */
int size_offsets; /* Size of the offsets array */
size_t num_subpats; /* Number of captured subpatterns */
size_t size_offsets; /* Size of the offsets array */
size_t new_len; /* Length of needed storage */
size_t alloc_len; /* Actual allocated length */
int match_len; /* Length of the current match */
size_t match_len; /* Length of the current match */
int backref; /* Backreference number */
int start_offset; /* Where the new search starts */
int g_notempty=0; /* If the match should not be empty */
zend_off_t start_offset; /* Where the new search starts */
uint32_t g_notempty=0; /* If the match should not be empty */
char *walkbuf, /* Location of current replacement in the result */
*walk, /* Used to walk the replacement string */
*match, /* The current match */
Expand Down Expand Up @@ -1497,16 +1497,16 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
/* {{{ php_pcre_replace_func_impl() */
static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count)
{
int no_utf_check = 0; /* Execution options */
uint32_t no_utf_check = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
char **subpat_names; /* Array for named subpatterns */
int num_subpats; /* Number of captured subpatterns */
int size_offsets; /* Size of the offsets array */
size_t num_subpats; /* Number of captured subpatterns */
size_t size_offsets; /* Size of the offsets array */
size_t new_len; /* Length of needed storage */
size_t alloc_len; /* Actual allocated length */
int start_offset; /* Where the new search starts */
int g_notempty=0; /* If the match should not be empty */
uint32_t g_notempty=0; /* If the match should not be empty */
char *match, /* The current match */
*piece; /* The current piece of subject */
size_t result_len; /* Length of result */
Expand Down Expand Up @@ -2151,16 +2151,16 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
zend_long limit_val, zend_long flags)
{
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
int size_offsets; /* Size of the offsets array */
int no_utf_check = 0; /* Execution options */
size_t size_offsets; /* Size of the offsets array */
uint32_t no_utf_check = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
size_t start_offset; /* Where the new search starts */
int next_offset; /* End of the last delimiter match + 1 */
int g_notempty = 0; /* If the match should not be empty */
PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */
uint32_t g_notempty = 0; /* If the match should not be empty */
char *last_match; /* Location of last match */
int no_empty; /* If NO_EMPTY flag is set */
int delim_capture; /* If delimiters should be captured */
int offset_capture; /* If offsets should be captured */
uint32_t no_empty; /* If NO_EMPTY flag is set */
uint32_t delim_capture; /* If delimiters should be captured */
uint32_t offset_capture; /* If offsets should be captured */
zval tmp;
pcre2_match_data *match_data;
size_t jit_size;
Expand Down Expand Up @@ -2467,9 +2467,9 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
{
zval *entry; /* An entry in the input array */
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
int size_offsets; /* Size of the offsets array */
size_t size_offsets; /* Size of the offsets array */
int count = 0; /* Count of matched subpatterns */
int no_utf_check; /* Execution options */
uint32_t no_utf_check; /* Execution options */
zend_string *string_key;
zend_ulong num_key;
zend_bool invert; /* Whether to return non-matching
Expand Down
2 changes: 1 addition & 1 deletion ext/pcre/php_pcre.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ typedef struct _pcre_cache_entry pcre_cache_entry;
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex);

PHPAPI void php_pcre_match_impl( pcre_cache_entry *pce, char *subject, size_t subject_len, zval *return_value,
zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset);
zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset);

PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str,
size_t limit, size_t *replace_count);
Expand Down
6 changes: 4 additions & 2 deletions ext/zip/php_zip.c
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,8 @@ int php_zip_pcre(zend_string *regexp, char *path, int path_len, zval *return_val
#endif
int files_cnt;
zend_string **namelist;
pcre2_match_context *mctx = php_pcre_mctx();
pcre2_general_context *gctx = php_pcre_gctx();

#ifdef ZTS
if (!IS_ABSOLUTE_PATH(path, path_len)) {
Expand Down Expand Up @@ -696,8 +698,8 @@ int php_zip_pcre(zend_string *regexp, char *path, int path_len, zval *return_val
continue;
}

match_data = pcre2_match_data_create_from_pattern(re, php_pcre_gctx());
rc = pcre2_match(re, ZSTR_VAL(namelist[i]), ZSTR_LEN(namelist[i]), 0, preg_options, match_data, php_pcre_mctx());
match_data = pcre2_match_data_create_from_pattern(re, gctx);
rc = pcre2_match(re, ZSTR_VAL(namelist[i]), ZSTR_LEN(namelist[i]), 0, preg_options, match_data, mctx);
pcre2_match_data_free(match_data);
/* 0 means that the vector is too small to hold all the captured substring offsets */
if (rc < 0) {
Expand Down