Skip to content

Commit 3ad422e

Browse files
authored
Avoid temporary string allocations in php_mb_parse_encoding_list() (#12714)
This brings execution time down from 0.91s to 0.86s on the reference benchmark [1]. [1] #12684 (comment)
1 parent a35a69f commit 3ad422e

File tree

3 files changed

+25
-16
lines changed

3 files changed

+25
-16
lines changed

ext/mbstring/libmbfl/mbfl/mbfl_encoding.c

+6-2
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,11 @@ static unsigned int mbfl_name2encoding_perfect_hash(const char *str, size_t len)
318318
#define NAME_HASH_MAX_NAME_LENGTH 23
319319

320320
const mbfl_encoding *mbfl_name2encoding(const char *name)
321+
{
322+
return mbfl_name2encoding_ex(name, strlen(name));
323+
}
324+
325+
const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
321326
{
322327
const mbfl_encoding *const *encoding;
323328

@@ -339,14 +344,13 @@ const mbfl_encoding *mbfl_name2encoding(const char *name)
339344
#endif
340345

341346
/* Use perfect hash lookup for name */
342-
size_t name_len = strlen(name);
343347
if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH) {
344348
unsigned int key = mbfl_name2encoding_perfect_hash(name, name_len);
345349
if (key <= 186) {
346350
int8_t offset = mbfl_encoding_ptr_list_after_hashing[key];
347351
if (offset >= 0) {
348352
encoding = mbfl_encoding_ptr_list + offset;
349-
if (strcasecmp((*encoding)->name, name) == 0) {
353+
if (strncasecmp((*encoding)->name, name, name_len) == 0) {
350354
return *encoding;
351355
}
352356
}

ext/mbstring/libmbfl/mbfl/mbfl_encoding.h

+1
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ static inline void mb_convert_buf_reset(mb_convert_buf *buf, size_t len)
285285
}
286286

287287
MBFLAPI extern const mbfl_encoding *mbfl_name2encoding(const char *name);
288+
MBFLAPI extern const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len);
288289
MBFLAPI extern const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding);
289290
MBFLAPI extern const mbfl_encoding **mbfl_get_supported_encodings(void);
290291
MBFLAPI extern const char *mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding);

ext/mbstring/mbstring.c

+18-14
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,14 @@ static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_nam
276276
return mbfl_name2encoding(encoding_name);
277277
}
278278

279+
static const mbfl_encoding *php_mb_get_encoding_or_pass_ex(const char *encoding_name, size_t encoding_name_len) {
280+
if (strncmp(encoding_name, "pass", encoding_name_len) == 0) {
281+
return &mbfl_encoding_pass;
282+
}
283+
284+
return mbfl_name2encoding_ex(encoding_name, encoding_name_len);
285+
}
286+
279287
static size_t count_commas(const char *p, const char *end) {
280288
size_t count = 0;
281289
while ((p = memchr(p, ',', end - p))) {
@@ -300,15 +308,14 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
300308
} else {
301309
bool included_auto;
302310
size_t n, size;
303-
char *p1, *endp, *tmpstr;
311+
const char *p1, *endp, *tmpstr;
304312
const mbfl_encoding **entry, **list;
305313

306-
/* copy the value string for work */
307314
if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
308-
tmpstr = (char *)estrndup(value+1, value_length-2);
315+
tmpstr = value + 1;
309316
value_length -= 2;
310317
} else {
311-
tmpstr = (char *)estrndup(value, value_length);
318+
tmpstr = value;
312319
}
313320

314321
endp = tmpstr + value_length;
@@ -319,20 +326,19 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
319326
included_auto = 0;
320327
p1 = tmpstr;
321328
while (1) {
322-
char *comma = memchr(p1, ',', endp - p1);
323-
char *p = comma ? comma : endp;
324-
*p = '\0';
329+
const char *comma = memchr(p1, ',', endp - p1);
330+
const char *p = comma ? comma : endp;
325331
/* trim spaces */
326332
while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
327333
p1++;
328334
}
329335
p--;
330336
while (p > p1 && (*p == ' ' || *p == '\t')) {
331-
*p = '\0';
332337
p--;
333338
}
339+
size_t p1_length = p - p1 + 1;
334340
/* convert to the encoding number and check encoding */
335-
if (strcasecmp(p1, "auto") == 0) {
341+
if (strncasecmp(p1, "auto", p1_length) == 0) {
336342
if (!included_auto) {
337343
const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
338344
const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
@@ -345,15 +351,14 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
345351
}
346352
} else {
347353
const mbfl_encoding *encoding =
348-
allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
354+
allow_pass_encoding ? php_mb_get_encoding_or_pass_ex(p1, p1_length) : mbfl_name2encoding_ex(p1, p1_length);
349355
if (!encoding) {
350356
/* Called from an INI setting modification */
351357
if (arg_num == 0) {
352-
php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
358+
php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%.*s\"", (int) p1_length, p1);
353359
} else {
354-
zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
360+
zend_argument_value_error(arg_num, "contains invalid encoding \"%.*s\"", (int) p1_length, p1);
355361
}
356-
efree(tmpstr);
357362
pefree(ZEND_VOIDP(list), persistent);
358363
return FAILURE;
359364
}
@@ -368,7 +373,6 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
368373
}
369374
*return_list = list;
370375
*return_size = n;
371-
efree(tmpstr);
372376
}
373377

374378
return SUCCESS;

0 commit comments

Comments
 (0)