From 14b4102123d21b3fc191d1c0fb26c090a5ae0bbd Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 26 Apr 2025 14:30:59 +0200 Subject: [PATCH 1/3] Use zend_string for arg_separators This allows us to avoid a call to `zend_ini_str` which took 6% of the profile on my i7-4790 for a call to `http_build_query`. Now we can just grab the value from the globals. In other files this can avoid some length recomputations. --- UPGRADING.INTERNALS | 4 ++++ ext/mbstring/mb_gpc.c | 2 +- ext/mbstring/mbstring.c | 2 +- ext/standard/http.c | 2 +- ext/standard/url_scanner_ex.re | 14 +++++++------- main/main.c | 4 ++-- main/php_globals.h | 4 ++-- main/php_variables.c | 2 +- sapi/cgi/cgi_main.c | 4 ++-- 9 files changed, 21 insertions(+), 17 deletions(-) diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 7c7f093e50cce..0ffdd6faaf886 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -14,6 +14,10 @@ PHP 8.5 INTERNALS UPGRADE NOTES 1. Internal API changes ======================== +- Core + . PG(arg_separator).input and PG(arg_separator).output are now `zend_string*` + instead of `char*`. + - Zend . Added zend_safe_assign_to_variable_noref() function to safely assign a value to a non-reference zval. diff --git a/ext/mbstring/mb_gpc.c b/ext/mbstring/mb_gpc.c index f90d86974fb73..5851cf1dd4ed8 100644 --- a/ext/mbstring/mb_gpc.c +++ b/ext/mbstring/mb_gpc.c @@ -100,7 +100,7 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) case PARSE_POST: case PARSE_GET: case PARSE_STRING: - separator = (char *) estrdup(PG(arg_separator).input); + separator = (char *) estrndup(ZSTR_VAL(PG(arg_separator).input), ZSTR_LEN(PG(arg_separator).input)); break; case PARSE_COOKIE: separator = ";\0"; diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 7f0cbaeb346ee..cbed77a2713fd 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1537,7 +1537,7 @@ PHP_FUNCTION(mb_parse_str) encstr = estrndup(encstr, encstr_len); info.data_type = PARSE_STRING; - info.separator = PG(arg_separator).input; + info.separator = ZSTR_VAL(PG(arg_separator).input); info.report_errors = true; info.to_encoding = MBSTRG(current_internal_encoding); info.from_encodings = MBSTRG(http_input_list); diff --git a/ext/standard/http.c b/ext/standard/http.c index fd862b605a7e6..d446e33b2ad21 100644 --- a/ext/standard/http.c +++ b/ext/standard/http.c @@ -124,7 +124,7 @@ PHPAPI void php_url_encode_hash_ex(HashTable *ht, smart_str *formstr, } if (!arg_sep) { - arg_sep = zend_ini_str("arg_separator.output", strlen("arg_separator.output"), false); + arg_sep = PG(arg_separator.output); if (ZSTR_LEN(arg_sep) == 0) { arg_sep = ZSTR_CHAR('&'); } diff --git a/ext/standard/url_scanner_ex.re b/ext/standard/url_scanner_ex.re index 1ce7521d7fab1..e3c4477cde363 100644 --- a/ext/standard/url_scanner_ex.re +++ b/ext/standard/url_scanner_ex.re @@ -188,7 +188,7 @@ alphadash = ([a-zA-Z] | "-"); #define YYLIMIT q #define YYMARKER r -static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator, int type) +static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const zend_string *separator, int type) { php_url *url_parts; @@ -271,7 +271,7 @@ static inline void append_modified_url(smart_str *url, smart_str *dest, smart_st smart_str_appendc(dest, '?'); if (url_parts->query) { smart_str_appends(dest, ZSTR_VAL(url_parts->query)); - smart_str_appends(dest, separator); + smart_str_append(dest, separator); smart_str_append_smart_str(dest, url_app); } else { smart_str_append_smart_str(dest, url_app); @@ -757,7 +757,7 @@ static inline void php_url_scanner_add_var_impl(const char *name, size_t name_le } if (url_state->url_app.s && ZSTR_LEN(url_state->url_app.s) != 0) { - smart_str_appends(&url_state->url_app, PG(arg_separator).output); + smart_str_append(&url_state->url_app, PG(arg_separator).output); } if (encode) { @@ -902,9 +902,9 @@ static inline zend_result php_url_scanner_reset_var_impl(zend_string *name, int /* Get end of url var */ limit = ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s); end = start + ZSTR_LEN(url_app.s); - separator_len = strlen(PG(arg_separator).output); + separator_len = ZSTR_LEN(PG(arg_separator).output); while (end < limit) { - if (!memcmp(end, PG(arg_separator).output, separator_len)) { + if (!memcmp(end, ZSTR_VAL(PG(arg_separator).output), separator_len)) { end += separator_len; sep_removed = 1; break; @@ -918,8 +918,8 @@ static inline zend_result php_url_scanner_reset_var_impl(zend_string *name, int } /* Check preceding separator */ if (!sep_removed - && (size_t)(start - PG(arg_separator).output) >= separator_len - && !memcmp(start - separator_len, PG(arg_separator).output, separator_len)) { + && (size_t)(start - ZSTR_VAL(PG(arg_separator).output)) >= separator_len + && !memcmp(start - separator_len, ZSTR_VAL(PG(arg_separator).output), separator_len)) { start -= separator_len; } /* Remove partially */ diff --git a/main/main.c b/main/main.c index 50894939782a0..18c8e2dfac7ec 100644 --- a/main/main.c +++ b/main/main.c @@ -771,8 +771,8 @@ PHP_INI_BEGIN() STD_PHP_INI_ENTRY("unserialize_callback_func", NULL, PHP_INI_ALL, OnUpdateString, unserialize_callback_func, php_core_globals, core_globals) STD_PHP_INI_ENTRY("serialize_precision", "-1", PHP_INI_ALL, OnSetSerializePrecision, serialize_precision, php_core_globals, core_globals) - STD_PHP_INI_ENTRY("arg_separator.output", "&", PHP_INI_ALL, OnUpdateStringUnempty, arg_separator.output, php_core_globals, core_globals) - STD_PHP_INI_ENTRY("arg_separator.input", "&", PHP_INI_SYSTEM|PHP_INI_PERDIR, OnUpdateStringUnempty, arg_separator.input, php_core_globals, core_globals) + STD_PHP_INI_ENTRY("arg_separator.output", "&", PHP_INI_ALL, OnUpdateStrNotEmpty, arg_separator.output, php_core_globals, core_globals) + STD_PHP_INI_ENTRY("arg_separator.input", "&", PHP_INI_SYSTEM|PHP_INI_PERDIR, OnUpdateStrNotEmpty, arg_separator.input, php_core_globals, core_globals) STD_PHP_INI_ENTRY("auto_append_file", NULL, PHP_INI_SYSTEM|PHP_INI_PERDIR, OnUpdateString, auto_append_file, php_core_globals, core_globals) STD_PHP_INI_ENTRY("auto_prepend_file", NULL, PHP_INI_SYSTEM|PHP_INI_PERDIR, OnUpdateString, auto_prepend_file, php_core_globals, core_globals) diff --git a/main/php_globals.h b/main/php_globals.h index b2f2696c2db2c..ab7a9a00b2f1d 100644 --- a/main/php_globals.h +++ b/main/php_globals.h @@ -48,8 +48,8 @@ extern ZEND_API struct _php_core_globals core_globals; struct _php_tick_function_entry; typedef struct _arg_separators { - char *output; - char *input; + zend_string *output; + zend_string *input; } arg_separators; struct _php_core_globals { diff --git a/main/php_variables.c b/main/php_variables.c index 7569fd43e900c..b81c049f6c5b3 100644 --- a/main/php_variables.c +++ b/main/php_variables.c @@ -527,7 +527,7 @@ SAPI_API SAPI_TREAT_DATA_FUNC(php_default_treat_data) switch (arg) { case PARSE_GET: case PARSE_STRING: - separator = PG(arg_separator).input; + separator = ZSTR_VAL(PG(arg_separator).input); break; case PARSE_COOKIE: separator = ";\0"; diff --git a/sapi/cgi/cgi_main.c b/sapi/cgi/cgi_main.c index 392a95d25ea3b..e3cd6f49b9f0c 100644 --- a/sapi/cgi/cgi_main.c +++ b/sapi/cgi/cgi_main.c @@ -2420,7 +2420,7 @@ consult the installation file that came with this distribution, or visit \n\ * test.php v1=test "v2=hello world!" */ if (!SG(request_info).query_string && argc > php_optind) { - size_t slen = strlen(PG(arg_separator).input); + size_t slen = ZSTR_LEN(PG(arg_separator).input); len = 0; for (i = php_optind; i < argc; i++) { if (i < (argc - 1)) { @@ -2436,7 +2436,7 @@ consult the installation file that came with this distribution, or visit \n\ for (i = php_optind; i < argc; i++) { strlcat(s, argv[i], len); if (i < (argc - 1)) { - strlcat(s, PG(arg_separator).input, len); + strlcat(s, ZSTR_VAL(PG(arg_separator).input), len); } } SG(request_info).query_string = s; From 1ec7bb374f86a98baf8418a993ceb48a88b9800e Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 26 Apr 2025 14:33:29 +0200 Subject: [PATCH 2/3] Use zend_string_efree() for temporary strings This avoids some assembly code bloat. --- ext/standard/http.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/standard/http.c b/ext/standard/http.c index d446e33b2ad21..32c8a66b35a9d 100644 --- a/ext/standard/http.c +++ b/ext/standard/http.c @@ -181,7 +181,7 @@ PHPAPI void php_url_encode_hash_ex(HashTable *ht, smart_str *formstr, } else { new_prefix = zend_string_concat2(ZSTR_VAL(encoded_key), ZSTR_LEN(encoded_key), "%5B", strlen("%5B")); } - zend_string_release_ex(encoded_key, false); + zend_string_efree(encoded_key); } else { /* is integer index */ char *index_int_as_str; size_t index_int_as_str_len; @@ -210,7 +210,7 @@ PHPAPI void php_url_encode_hash_ex(HashTable *ht, smart_str *formstr, GC_TRY_PROTECT_RECURSION(ht); php_url_encode_hash_ex(HASH_OF(zdata), formstr, NULL, 0, new_prefix, (Z_TYPE_P(zdata) == IS_OBJECT ? zdata : NULL), arg_sep, enc_type); GC_TRY_UNPROTECT_RECURSION(ht); - zend_string_release_ex(new_prefix, false); + zend_string_efree(new_prefix); } else if (Z_TYPE_P(zdata) == IS_NULL || Z_TYPE_P(zdata) == IS_RESOURCE) { /* Skip these types */ continue; From 876baa4d7743848dacdfaf58b3587852af5915f0 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 26 Apr 2025 14:34:33 +0200 Subject: [PATCH 3/3] Implement php_url_encode_to_smart_str() and use it in http_build_query() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids temporary allocations and some copies. For this benchmark: ```php for ($i=0;$i<2000000;$i++) { http_build_query([999999 => 'foo', 'aaab' => 'def', 'aaaaa'=>1, 'aaaaaaaa' => 'a']); } ``` On an i7-4790: ``` Benchmark 1: ./sapi/cli/php ../buildquery.php Time (mean ± σ): 298.9 ms ± 7.3 ms [User: 295.6 ms, System: 2.3 ms] Range (min … max): 293.6 ms … 314.0 ms 10 runs Benchmark 2: ./sapi/cli/php_old ../buildquery.php Time (mean ± σ): 594.8 ms ± 8.6 ms [User: 590.8 ms, System: 2.4 ms] Range (min … max): 586.3 ms … 616.1 ms 10 runs Summary ./sapi/cli/php ../buildquery.php ran 1.99 ± 0.06 times faster than ./sapi/cli/php_old ../buildquery.php ``` For this benchmark: ```php for ($i=0;$i<2000000;$i++) { http_build_query(['test' => 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa']); } ``` On an i7-4790: ``` Benchmark 1: ./sapi/cli/php ../buildquery.php Time (mean ± σ): 188.4 ms ± 6.7 ms [User: 184.6 ms, System: 2.9 ms] Range (min … max): 182.0 ms … 205.4 ms 14 runs Benchmark 2: ./sapi/cli/php_old ../buildquery.php Time (mean ± σ): 323.9 ms ± 8.7 ms [User: 319.8 ms, System: 2.7 ms] Range (min … max): 318.0 ms … 341.2 ms 10 runs Summary ./sapi/cli/php ../buildquery.php ran 1.72 ± 0.08 times faster than ./sapi/cli/php_old ../buildquery.php ``` --- UPGRADING.INTERNALS | 1 + ext/standard/http.c | 30 ++++-------------------------- ext/standard/url.c | 36 ++++++++++++++++++++++++------------ ext/standard/url.h | 1 + 4 files changed, 30 insertions(+), 38 deletions(-) diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 0ffdd6faaf886..795a4f048bf70 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -68,6 +68,7 @@ PHP 8.5 INTERNALS UPGRADE NOTES - ext/standard . Added php_url_decode_ex() and php_raw_url_decode_ex() that unlike their non-ex counterparts do not work in-place. + . Added php_url_encode_to_smart_str() to encode a URL to a smart_str buffer. ======================== 4. OpCode changes diff --git a/ext/standard/http.c b/ext/standard/http.c index 32c8a66b35a9d..0a2ca51f012ca 100644 --- a/ext/standard/http.c +++ b/ext/standard/http.c @@ -37,14 +37,7 @@ static void php_url_encode_scalar(zval *scalar, smart_str *form_str, smart_str_append(form_str, key_prefix); } if (index_string) { - zend_string *encoded_key; - if (encoding_type == PHP_QUERY_RFC3986) { - encoded_key = php_raw_url_encode(index_string, index_string_len); - } else { - encoded_key = php_url_encode(index_string, index_string_len); - } - smart_str_append(form_str, encoded_key); - zend_string_free(encoded_key); + php_url_encode_to_smart_str(form_str, index_string, index_string_len, encoding_type == PHP_QUERY_RFC3986); } else { /* Numeric key */ if (num_prefix) { @@ -59,31 +52,16 @@ static void php_url_encode_scalar(zval *scalar, smart_str *form_str, try_again: switch (Z_TYPE_P(scalar)) { - case IS_STRING: { - zend_string *encoded_data; - if (encoding_type == PHP_QUERY_RFC3986) { - encoded_data = php_raw_url_encode(Z_STRVAL_P(scalar), Z_STRLEN_P(scalar)); - } else { - encoded_data = php_url_encode(Z_STRVAL_P(scalar), Z_STRLEN_P(scalar)); - } - smart_str_append(form_str, encoded_data); - zend_string_free(encoded_data); + case IS_STRING: + php_url_encode_to_smart_str(form_str, Z_STRVAL_P(scalar), Z_STRLEN_P(scalar), encoding_type == PHP_QUERY_RFC3986); break; - } case IS_LONG: smart_str_append_long(form_str, Z_LVAL_P(scalar)); break; case IS_DOUBLE: { - zend_string *encoded_data; zend_string *tmp = zend_double_to_str(Z_DVAL_P(scalar)); - if (encoding_type == PHP_QUERY_RFC3986) { - encoded_data = php_raw_url_encode(ZSTR_VAL(tmp), ZSTR_LEN(tmp)); - } else { - encoded_data = php_url_encode(ZSTR_VAL(tmp), ZSTR_LEN(tmp)); - } - smart_str_append(form_str, encoded_data); + php_url_encode_to_smart_str(form_str, ZSTR_VAL(tmp), ZSTR_LEN(tmp), encoding_type == PHP_QUERY_RFC3986); zend_string_free(tmp); - zend_string_free(encoded_data); break; } case IS_FALSE: diff --git a/ext/standard/url.c b/ext/standard/url.c index da2ddea067314..41c0f3712ca24 100644 --- a/ext/standard/url.c +++ b/ext/standard/url.c @@ -27,6 +27,7 @@ #include "url.h" #include "file.h" +#include "Zend/zend_smart_str.h" /* {{{ free_url */ PHPAPI void php_url_free(php_url *theurl) @@ -449,16 +450,13 @@ static int php_htoi(const char *s) static const unsigned char hexchars[] = "0123456789ABCDEF"; -static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t len, bool raw) /* {{{ */ { +static zend_always_inline size_t php_url_encode_impl(unsigned char *to, const char *s, size_t len, bool raw) /* {{{ */ { unsigned char c; - unsigned char *to; unsigned char const *from, *end; - zend_string *start; + const unsigned char *to_init = to; from = (unsigned char *)s; end = (unsigned char *)s + len; - start = zend_string_safe_alloc(3, len, 0, 0); - to = (unsigned char*)ZSTR_VAL(start); #ifdef __SSE2__ while (from + 16 < end) { @@ -537,19 +535,24 @@ static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t *to++ = c; } } - *to = '\0'; - ZEND_ASSERT(!ZSTR_IS_INTERNED(start) && GC_REFCOUNT(start) == 1); - start = zend_string_truncate(start, to - (unsigned char*)ZSTR_VAL(start), 0); - - return start; + return to - to_init; } /* }}} */ +static zend_always_inline zend_string *php_url_encode_helper(char const *s, size_t len, bool raw) +{ + zend_string *result = zend_string_safe_alloc(3, len, 0, false); + size_t length = php_url_encode_impl((unsigned char *) ZSTR_VAL(result), s, len, raw); + ZSTR_VAL(result)[length] = '\0'; + ZEND_ASSERT(!ZSTR_IS_INTERNED(result) && GC_REFCOUNT(result) == 1); + return zend_string_truncate(result, length, false); +} + /* {{{ php_url_encode */ PHPAPI zend_string *php_url_encode(char const *s, size_t len) { - return php_url_encode_impl(s, len, 0); + return php_url_encode_helper(s, len, false); } /* }}} */ @@ -616,10 +619,19 @@ PHPAPI size_t php_url_decode(char *str, size_t len) /* {{{ php_raw_url_encode */ PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len) { - return php_url_encode_impl(s, len, 1); + return php_url_encode_helper(s, len, true); } /* }}} */ +PHPAPI void php_url_encode_to_smart_str(smart_str *buf, char const *s, size_t len, bool raw) +{ + size_t start_length = smart_str_get_len(buf); + size_t extend = zend_safe_address_guarded(3, len, 0); + char *dest = smart_str_extend(buf, extend); + size_t length = php_url_encode_impl((unsigned char *) dest, s, len, raw); + ZSTR_LEN(buf->s) = start_length + length; +} + /* {{{ URL-encodes string */ PHP_FUNCTION(rawurlencode) { diff --git a/ext/standard/url.h b/ext/standard/url.h index 5c531c0086a20..3885ecece5780 100644 --- a/ext/standard/url.h +++ b/ext/standard/url.h @@ -38,6 +38,7 @@ PHPAPI size_t php_raw_url_decode(char *str, size_t len); /* return value: length PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len); PHPAPI zend_string *php_url_encode(char const *s, size_t len); PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len); +PHPAPI void php_url_encode_to_smart_str(smart_str *buf, char const *s, size_t len, bool raw); #define PHP_URL_SCHEME 0 #define PHP_URL_HOST 1