Skip to content

ZPP: Reduce code bloat for Z_PARAM_STR #19359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from

Conversation

Girgias
Copy link
Member

@Girgias Girgias commented Aug 4, 2025

This idea is based on @nielsdos previous PR to reduce code bloat: #18436 To do so we create a specialized function to handle parameters that only accept strings and not null such that we can assign the zend_string to the destination directly.

Will take the assembly of the ext/zend_test function ZEND_FUNCTION(zend_test_is_string_marked_as_valid_utf8) as an example:
Compiled in a release build and retrieved using:

./configure -C --disable-all --enable-zend_test --prefix=/path/to/custom/releases
make -jN
gdb sapi/cli/php
disassemble zif_zend_test_is_string_marked_as_valid_utf8

Prior to this commit (master@98e0dbcefedeecf5e4d032e54df1aeebaa118754)the ASM looks like:

Address range 0x7344e0 to 0x73455a:
   0x00000000007344e0 <+0>:	push   %rbx
   0x00000000007344e1 <+1>:	sub    $0x20,%rsp
   0x00000000007344e5 <+5>:	mov    0x2c(%rdi),%r9d
   0x00000000007344e9 <+9>:	cmp    $0x1,%r9d
   0x00000000007344ed <+13>:	jne    0x41ab56 <zif_zend_test_is_string_marked_as_valid_utf8-3250570>
   0x00000000007344f3 <+19>:	mov    %rsi,%rcx
   0x00000000007344f6 <+22>:	cmpb   $0x6,0x58(%rdi)
   0x00000000007344fa <+26>:	jne    0x734520 <zif_zend_test_is_string_marked_as_valid_utf8+64>
   0x00000000007344fc <+28>:	mov    0x50(%rdi),%rax
   0x0000000000734500 <+32>:	testb  $0x2,0x5(%rax)
   0x0000000000734504 <+36>:	setne  %al
   0x0000000000734507 <+39>:	movzbl %al,%eax
   0x000000000073450a <+42>:	add    $0x2,%eax
   0x000000000073450d <+45>:	mov    %eax,0x8(%rcx)
   0x0000000000734510 <+48>:	add    $0x20,%rsp
   0x0000000000734514 <+52>:	pop    %rbx
   0x0000000000734515 <+53>:	ret
   0x0000000000734516 <+54>:	cs nopw 0x0(%rax,%rax,1)
   0x0000000000734520 <+64>:	mov    %rsi,0x8(%rsp)
   0x0000000000734525 <+69>:	lea    0x50(%rdi),%rbx
   0x0000000000734529 <+73>:	lea    0x18(%rsp),%rsi
   0x000000000073452e <+78>:	add    $0x50,%rdi
   0x0000000000734532 <+82>:	mov    $0x1,%edx
   0x0000000000734537 <+87>:	mov    %r9d,0x4(%rsp)
   0x000000000073453c <+92>:	call   0x7afd50 <zend_parse_arg_str_slow>
   0x0000000000734541 <+97>:	mov    0x4(%rsp),%r9d
   0x0000000000734546 <+102>:	test   %al,%al
   0x0000000000734548 <+104>:	je     0x41ab3a <zif_zend_test_is_string_marked_as_valid_utf8.cold>
   0x000000000073454e <+110>:	mov    0x18(%rsp),%rax
   0x0000000000734553 <+115>:	mov    0x8(%rsp),%rcx
   0x0000000000734558 <+120>:	jmp    0x734500 <zif_zend_test_is_string_marked_as_valid_utf8+32>
Address range 0x41ab3a to 0x41ab73:
   0x000000000041ab3a <-3250598>:	mov    $0x9,%edi
   0x000000000041ab3f <-3250593>:	mov    $0x4,%ecx
   0x000000000041ab44 <-3250588>:	mov    %rbx,%r8
   0x000000000041ab47 <-3250585>:	xor    %edx,%edx
   0x000000000041ab49 <-3250583>:	mov    %r9d,%esi
   0x000000000041ab4c <-3250580>:	call   0x41f549 <zend_wrong_parameter_error>
   0x000000000041ab51 <-3250575>:	jmp    0x734510 <zif_zend_test_is_string_marked_as_valid_utf8+48>
   0x000000000041ab56 <-3250570>:	mov    $0x1,%edi
   0x000000000041ab5b <-3250565>:	mov    $0x1,%esi
   0x000000000041ab60 <-3250560>:	xor    %ebx,%ebx
   0x000000000041ab62 <-3250558>:	call   0x41f017 <zend_wrong_parameters_count_error>
   0x000000000041ab67 <-3250553>:	xor    %r9d,%r9d
   0x000000000041ab6a <-3250550>:	mov    $0x1,%edi
   0x000000000041ab6f <-3250545>:	xor    %ecx,%ecx
   0x000000000041ab71 <-3250543>:	jmp    0x41ab44 <zif_zend_test_is_string_marked_as_valid_utf8-3250588>

Afterwards it looks like:

Address range 0x7339d0 to 0x733a45:
   0x00000000007339d0 <+0>:	sub    $0x28,%rsp
   0x00000000007339d4 <+4>:	mov    0x2c(%rdi),%r9d
   0x00000000007339d8 <+8>:	cmp    $0x1,%r9d
   0x00000000007339dc <+12>:	jne    0x41acd4 <zif_zend_test_is_string_marked_as_valid_utf8-3247356>
   0x00000000007339e2 <+18>:	mov    %rsi,%rdx
   0x00000000007339e5 <+21>:	lea    0x50(%rdi),%r8
   0x00000000007339e9 <+25>:	cmpb   $0x6,0x58(%rdi)
   0x00000000007339ed <+29>:	jne    0x733a18 <zif_zend_test_is_string_marked_as_valid_utf8+72>
   0x00000000007339ef <+31>:	mov    0x50(%rdi),%rax
   0x00000000007339f3 <+35>:	test   %rax,%rax
   0x00000000007339f6 <+38>:	je     0x41acbc <zif_zend_test_is_string_marked_as_valid_utf8.cold>
   0x00000000007339fc <+44>:	testb  $0x2,0x5(%rax)
   0x0000000000733a00 <+48>:	setne  %al
   0x0000000000733a03 <+51>:	movzbl %al,%eax
   0x0000000000733a06 <+54>:	add    $0x2,%eax
   0x0000000000733a09 <+57>:	mov    %eax,0x8(%rdx)
   0x0000000000733a0c <+60>:	add    $0x28,%rsp
   0x0000000000733a10 <+64>:	ret
   0x0000000000733a11 <+65>:	nopl   0x0(%rax)
   0x0000000000733a18 <+72>:	mov    %rsi,0x18(%rsp)
   0x0000000000733a1d <+77>:	mov    %r8,%rdi
   0x0000000000733a20 <+80>:	mov    $0x1,%esi
   0x0000000000733a25 <+85>:	mov    %r9d,0x14(%rsp)
   0x0000000000733a2a <+90>:	mov    %r8,0x8(%rsp)
   0x0000000000733a2f <+95>:	call   0x7af240 <zend_parse_arg_str_slow>
   0x0000000000733a34 <+100>:	mov    0x18(%rsp),%rdx
   0x0000000000733a39 <+105>:	mov    0x14(%rsp),%r9d
   0x0000000000733a3e <+110>:	mov    0x8(%rsp),%r8
   0x0000000000733a43 <+115>:	jmp    0x7339f3 <zif_zend_test_is_string_marked_as_valid_utf8+35>
Address range 0x41acbc to 0x41acf2:
   0x000000000041acbc <-3247380>:	mov    $0x9,%edi
   0x000000000041acc1 <-3247375>:	mov    $0x4,%ecx
   0x000000000041acc6 <-3247370>:	xor    %edx,%edx
   0x000000000041acc8 <-3247368>:	mov    %r9d,%esi
   0x000000000041accb <-3247365>:	add    $0x28,%rsp
   0x000000000041accf <-3247361>:	jmp    0x41f6f4 <zend_wrong_parameter_error>
   0x000000000041acd4 <-3247356>:	mov    $0x1,%edi
   0x000000000041acd9 <-3247351>:	mov    $0x1,%esi
   0x000000000041acde <-3247346>:	call   0x41f1c2 <zend_wrong_parameters_count_error>
   0x000000000041ace3 <-3247341>:	mov    $0x1,%edi
   0x000000000041ace8 <-3247336>:	xor    %ecx,%ecx
   0x000000000041acea <-3247334>:	xor    %r8d,%r8d
   0x000000000041aced <-3247331>:	xor    %r9d,%r9d
   0x000000000041acf0 <-3247328>:	jmp    0x41acc6 <zif_zend_test_is_string_marked_as_valid_utf8-3247370>

For reference we have 772 usages of Z_PARAM_STR (found using git grep -o "Z_PARAM_STR" | wc -l)

This approach should also work for the "path" and "C string" version, and more generally for the pure HashTable and object FZPP specifier

This idea is based on @nielsdos previous PR to reduce codebloat: php#18436
To do so we create a specialized function to handle parameters that only accept strings and not null such that we can assign the zend_string to the destination directly.
Comment on lines +2321 to +2329
static zend_always_inline zend_string *zend_parse_arg_str_no_null(zval *arg, uint32_t arg_num)
{
if (EXPECTED(Z_TYPE_P(arg) == IS_STRING)) {
return Z_STR_P(arg);
} else {
return zend_parse_arg_str_slow(arg, arg_num);
}
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I agree with getting rid of the dest param for the _slow functions, does this particular extra function actually change anything? Please check the asm; given that it's force inlined it ought to be equivalent to zend_parse_arg_str_ex.

@Girgias
Copy link
Member Author

Girgias commented Aug 4, 2025

@bwoebi Applying the following patch:

diff --git a/Zend/zend_API.h b/Zend/zend_API.h
index e6d17cf29bb..b1f3202b4db 100644
--- a/Zend/zend_API.h
+++ b/Zend/zend_API.h
@@ -2088,13 +2088,7 @@ ZEND_API ZEND_COLD void zend_class_redeclaration_error_ex(int type, zend_string
                }
 
 #define Z_PARAM_STR(dest) \
-       Z_PARAM_PROLOGUE(false, false); \
-       dest = zend_parse_arg_str_no_null(_arg, _i); \
-       if (UNEXPECTED(dest == NULL)) { \
-               _expected_type = Z_EXPECTED_STRING; \
-               _error_code = ZPP_ERROR_WRONG_ARG; \
-               break; \
-       }
+       Z_PARAM_STR_EX(dest, 0, 0)
 
 #define Z_PARAM_STR_OR_NULL(dest) \
        Z_PARAM_STR_EX(dest, 1, 0)
@@ -2318,15 +2312,6 @@ static zend_always_inline bool zend_parse_arg_str(zval *arg, zend_string **dest,
        return zend_parse_arg_str_ex(arg, dest, check_null, arg_num, /* frameless */ false);
 }
 
-static zend_always_inline zend_string *zend_parse_arg_str_no_null(zval *arg, uint32_t arg_num)
-{
-       if (EXPECTED(Z_TYPE_P(arg) == IS_STRING)) {
-               return Z_STR_P(arg);
-       } else {
-               return zend_parse_arg_str_slow(arg, arg_num);
-       }
-}
-
 static zend_always_inline bool zend_parse_arg_string(zval *arg, char **dest, size_t *dest_len, bool check_null, uint32_t arg_num)
 {
        zend_string *str;

gives me the following assembly for ZEND_FUNCTION(zend_test_is_string_marked_as_valid_utf8):

Address range 0x733e00 to 0x733e71:
   0x0000000000733e00 <+0>:	push   %rbx
   0x0000000000733e01 <+1>:	sub    $0x10,%rsp
   0x0000000000733e05 <+5>:	mov    0x2c(%rdi),%r9d
   0x0000000000733e09 <+9>:	cmp    $0x1,%r9d
   0x0000000000733e0d <+13>:	jne    0x41aeda <zif_zend_test_is_string_marked_as_valid_utf8-3247910>
   0x0000000000733e13 <+19>:	mov    %rsi,%rdx
   0x0000000000733e16 <+22>:	cmpb   $0x6,0x58(%rdi)
   0x0000000000733e1a <+26>:	jne    0x733e40 <zif_zend_test_is_string_marked_as_valid_utf8+64>
   0x0000000000733e1c <+28>:	mov    0x50(%rdi),%rax
   0x0000000000733e20 <+32>:	testb  $0x2,0x5(%rax)
   0x0000000000733e24 <+36>:	setne  %al
   0x0000000000733e27 <+39>:	movzbl %al,%eax
   0x0000000000733e2a <+42>:	add    $0x2,%eax
   0x0000000000733e2d <+45>:	mov    %eax,0x8(%rdx)
   0x0000000000733e30 <+48>:	add    $0x10,%rsp
   0x0000000000733e34 <+52>:	pop    %rbx
   0x0000000000733e35 <+53>:	ret
   0x0000000000733e36 <+54>:	cs nopw 0x0(%rax,%rax,1)
   0x0000000000733e40 <+64>:	mov    %rsi,0x8(%rsp)
   0x0000000000733e45 <+69>:	lea    0x50(%rdi),%rbx
   0x0000000000733e49 <+73>:	mov    $0x1,%esi
   0x0000000000733e4e <+78>:	add    $0x50,%rdi
   0x0000000000733e52 <+82>:	mov    %r9d,0x4(%rsp)
   0x0000000000733e57 <+87>:	call   0x7af5c0 <zend_parse_arg_str_slow>
   0x0000000000733e5c <+92>:	mov    0x4(%rsp),%r9d
   0x0000000000733e61 <+97>:	test   %rax,%rax
   0x0000000000733e64 <+100>:	je     0x41aebe <zif_zend_test_is_string_marked_as_valid_utf8.cold>
   0x0000000000733e6a <+106>:	mov    0x8(%rsp),%rdx
   0x0000000000733e6f <+111>:	jmp    0x733e20 <zif_zend_test_is_string_marked_as_valid_utf8+32>
Address range 0x41aebe to 0x41aef7:
   0x000000000041aebe <-3247938>:	mov    $0x9,%edi
   0x000000000041aec3 <-3247933>:	mov    $0x4,%ecx
   0x000000000041aec8 <-3247928>:	add    $0x10,%rsp
   0x000000000041aecc <-3247924>:	mov    %rbx,%r8
   0x000000000041aecf <-3247921>:	xor    %edx,%edx
   0x000000000041aed1 <-3247919>:	mov    %r9d,%esi
   0x000000000041aed4 <-3247916>:	pop    %rbx
   0x000000000041aed5 <-3247915>:	jmp    0x41f8d1 <zend_wrong_parameter_error>
   0x000000000041aeda <-3247910>:	mov    $0x1,%edi
   0x000000000041aedf <-3247905>:	mov    $0x1,%esi
   0x000000000041aee4 <-3247900>:	xor    %ebx,%ebx
   0x000000000041aee6 <-3247898>:	call   0x41f39f <zend_wrong_parameters_count_error>
   0x000000000041aeeb <-3247893>:	xor    %r9d,%r9d
   0x000000000041aeee <-3247890>:	mov    $0x1,%edi
   0x000000000041aef3 <-3247885>:	xor    %ecx,%ecx
   0x000000000041aef5 <-3247883>:	jmp    0x41aec8 <zif_zend_test_is_string_marked_as_valid_utf8-3247928>

Arguably I don't know anything about codegen and assembly, but it doesn't seem equivalent to me?

@bwoebi
Copy link
Member

bwoebi commented Aug 4, 2025

The new code has one branch less in the hot path, but does an extra push/pop (no idea why). I'd say the saved branch is better though.
So I'd go for that, given that it's less code.

@nielsdos
Copy link
Member

nielsdos commented Aug 4, 2025

The motivation in my original PR was to get rid of the stack protector overhead, something not showcased in the assembly here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants