Skip to content

Proposal: Add iterable\any(iterable $input, ?callable $cb=null), all(...), none(...), find(...), reduce(...) #6053

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Prev Previous commit
Next Next commit
Implement iterator\reduce($carry, $item): mixed
Semantics are similar to array_reduce
  • Loading branch information
TysonAndre committed Jun 1, 2021
commit 06310e1ee5866bee5f802ad524bf099900bd5cd7
125 changes: 99 additions & 26 deletions ext/standard/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -5731,36 +5731,11 @@ PHP_FUNCTION(array_product)
}
/* }}} */

/* {{{ Iteratively reduce the array to a single value via the callback. */
PHP_FUNCTION(array_reduce)
static zend_always_inline void php_array_reduce(HashTable *htbl, zend_fcall_info fci, zend_fcall_info_cache fci_cache, zval* return_value) /* {{{ */
{
zval *input;
zval args[2];
zval *operand;
zval retval;
zend_fcall_info fci;
zend_fcall_info_cache fci_cache = empty_fcall_info_cache;
zval *initial = NULL;
HashTable *htbl;

ZEND_PARSE_PARAMETERS_START(2, 3)
Z_PARAM_ARRAY(input)
Z_PARAM_FUNC(fci, fci_cache)
Z_PARAM_OPTIONAL
Z_PARAM_ZVAL(initial)
ZEND_PARSE_PARAMETERS_END();


if (ZEND_NUM_ARGS() > 2) {
ZVAL_COPY(return_value, initial);
} else {
ZVAL_NULL(return_value);
}

/* (zval **)input points to an element of argument stack
* the base pointer of which is subject to change.
* thus we need to keep the pointer to the hashtable for safety */
htbl = Z_ARRVAL_P(input);

if (zend_hash_num_elements(htbl) == 0) {
return;
Expand Down Expand Up @@ -5788,6 +5763,69 @@ PHP_FUNCTION(array_reduce)
}
} ZEND_HASH_FOREACH_END();
}

typedef struct {
zend_fcall_info fci;
zend_fcall_info_cache fcc;
zval args[2];
} traversable_reduce_data;

static int php_traversable_reduce_elem(zend_object_iterator *iter, void *puser) /* {{{ */
{
traversable_reduce_data *reduce_data = puser;
zend_fcall_info *fci = &reduce_data->fci;
ZEND_ASSERT(ZEND_FCI_INITIALIZED(*fci));

zval *operand = iter->funcs->get_current_data(iter);
ZVAL_COPY_VALUE(&fci->params[0], fci->retval);
ZVAL_COPY(&fci->params[1], operand);
ZVAL_NULL(fci->retval);
int result = zend_call_function(&reduce_data->fci, &reduce_data->fcc);
zval_ptr_dtor(operand);
zval_ptr_dtor(&fci->params[0]);
if (UNEXPECTED(result == FAILURE || EG(exception))) {
return ZEND_HASH_APPLY_STOP;
}
return ZEND_HASH_APPLY_KEEP;
}

static zend_always_inline void php_traversable_reduce(zval *obj, zend_fcall_info fci, zend_fcall_info_cache fci_cache, zval* return_value) /* {{{ */
{
zval args[2];
traversable_reduce_data reduce_data;
reduce_data.fci = fci;
reduce_data.fci.retval = return_value;
reduce_data.fci.param_count = 2;
reduce_data.fci.params = args;
reduce_data.fcc = fci_cache;

spl_iterator_apply(obj, php_traversable_reduce_elem, (void*)&reduce_data);
}
/* }}} */

/* {{{ Iteratively reduce the array to a single value via the callback. */
PHP_FUNCTION(array_reduce)
{
zval *input;
zend_fcall_info fci;
zend_fcall_info_cache fci_cache = empty_fcall_info_cache;
zval *initial = NULL;

ZEND_PARSE_PARAMETERS_START(2, 3)
Z_PARAM_ARRAY(input)
Z_PARAM_FUNC(fci, fci_cache)
Z_PARAM_OPTIONAL
Z_PARAM_ZVAL(initial)
ZEND_PARSE_PARAMETERS_END();

if (ZEND_NUM_ARGS() > 2) {
ZVAL_COPY(return_value, initial);
} else {
ZVAL_NULL(return_value);
}

php_array_reduce(Z_ARRVAL_P(input), fci, fci_cache, return_value);
}
/* }}} */

/* {{{ Filters elements from the array via the callback. */
Expand Down Expand Up @@ -6346,3 +6384,38 @@ PHP_FUNCTION(none)
php_iterable_until(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 1);
}
/* }}} */

/* {{{ Reduces values */
PHP_FUNCTION(reduce)
{
zval *input;
zend_fcall_info fci;
zend_fcall_info_cache fci_cache = empty_fcall_info_cache;
zval *initial = NULL;

ZEND_PARSE_PARAMETERS_START(2, 3)
Z_PARAM_ITERABLE(input)
Z_PARAM_FUNC(fci, fci_cache)
Z_PARAM_OPTIONAL
Z_PARAM_ZVAL(initial)
ZEND_PARSE_PARAMETERS_END();

if (ZEND_NUM_ARGS() > 2) {
ZVAL_COPY(return_value, initial);
} else {
ZVAL_NULL(return_value);
}

switch (Z_TYPE_P(input)) {
case IS_ARRAY:
php_array_reduce(Z_ARRVAL_P(input), fci, fci_cache, return_value);
return;
case IS_OBJECT: {
ZEND_ASSERT(instanceof_function(Z_OBJCE_P(input), zend_ce_traversable));
php_traversable_reduce(input, fci, fci_cache, return_value);
return;
}
EMPTY_SWITCH_DEFAULT_CASE();
}
}
/* }}} */
2 changes: 2 additions & 0 deletions ext/standard/basic_functions.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -1522,4 +1522,6 @@ function all(iterable $iterable, ?callable $callback = null): bool {}

function none(iterable $iterable, ?callable $callback = null): bool {}

function reduce(iterable $iterable, callable $callback, mixed $initial = null): mixed {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I strongly prefer removing $initial and adding fold which always requires it:

function fold(iterable $iterable, callable $callback, mixed $initial): mixed {}
function reduce(iterable $iterable, callable $callback): mixed {}

The fold function doesn't throw on empty, and the reduce will. This pattern exists in other languages, such as Kotlin.

I'm happy to do this work if you'll agree.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like the inconsistency with array_reduce (which has optional $initial=null) would have more objectors for making it harder to learn the language or switch code from array_reduce to *reduce intuitively for beginners.

Copy link
Contributor

@morrisonlevi morrisonlevi Jun 7, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an error condition in reduce where there is not an initial value and an empty iterable, and it should throw because there is no legal value we can return that isn't already possible in the reduction. We should not repeat the mistakes of the past. You argue in another comment that find is useful in other languages, and yet you don't buy that same argument here? What gives?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at this again, I'd agree my earlier proposal for reduce was a mistake and it's worth changing, adding fold and either removing reduce entirely or requiring a non-empty array.

The other argument was about including a function, not a change

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can work on fold tonight.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you okay with these signatures and semantics for fold and reduce?

I imagine there is some discussion to be had for naming the parameters to make sure named parameters is a good experience, so let me know what you think. The name $into I picked from Swift. I used $by because it's short but not an abbreviation like acc; my quick glance in other languages' docs didn't turn up anything better.

namespace Iterable;

/**
 * @template Element
 * @template Result
 * @param iterable<Element> $seq
 * @param Result $into
 * @param callable(Result, Element): Result $by
 * @return Result
 */
function fold(iterable $seq, mixed $into, callable $by): mixed {
    foreach ($seq as $value) {
        $into = $by($into, $value);
    }
    return $into;
}

/** Private helper, wouldn't actually be exposed.
 * @template Key
 * @template Value
 * @param iterable<Key, Value> $seq
 * @return \Iterator<Key, Value>
 */
function to_iterator(iterable $seq): \Iterator {
    if (\is_array($seq)) {
        return new \ArrayIterator($seq);
    } elseif ($seq instanceof \Iterator) {
        return $seq;
    } else {
        assert($seq instanceof \IteratorAggregate);
        return namespace\to_iterator($seq->getIterator());
    }
}


/**
 * @template Element
 * @param iterable<Element> $seq
 * @param callable(Element, Element): Element $by
 * @return Element
 * @throws \ValueError if $seq does not have at least 1 element
 */
function reduce(iterable $seq, callable $by): mixed {
    $iterator = namespace\to_iterator($seq);
    $iterator->rewind();
    if (!$iterator->valid()) {
        throw new \ValueError("parameter \$seq to reduce... was empty");
    }

    $into = $iterator->current();
    for ($iterator->next(); $iterator->valid(); $iterator->next()) {
        $into = $by($into, $iterator->current());
    }
    return $into;
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For fold, I think having the callback third would be hard to remember when it's second in other reduce() function

The inner implementation seem reasonable enough. I assume the ArrayObject is just for illustrating the behavior and not the internal implementation

$seq seems like an harder to remember naming choice compared to $array/$iterable used elsewhere - https://www.php.net/manual/en/function.iterator-apply.php and https://www.php.net/manual/en/function.array-walk.php - especially for non-english speakers

PHP's already using $initial for https://www.php.net/manual/en/function.array-reduce.php and I don't see a strong reason to switch to a different name - initial's been used elsewhere (e.g. https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/Reduce)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The inner implementation seem reasonable enough. I assume the ArrayObject is just for illustrating the behavior and not the internal implementation

The ArrayIterator is just for showing behavior, yes. Notably, this will not pass NULL as the first parameter to the callback on the very first time it is called, unlike array_reduce and what this PR currently does.

As an example with the data set [1, 3, 5, 7]:

$result = reduce([1, 3, 5, 7], function ($into, $value) {
    $retval = $into + $value;
    echo "fn ({$into}, {$value}) => {$retval}\n";
    return $retval;
});

This will print:

fn (1, 3) => 4
fn (4, 5) => 9
fn (9, 7) => 16

And not:

fn(, 1) => 1
fn (1, 3) => 4
fn (4, 5) => 9
fn (9, 7) => 16

Copy link
Contributor

@morrisonlevi morrisonlevi Jun 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I implemented the changes to reduce on this branch: https://github.com/morrisonlevi/php-src/tree/levi/any-all-iterable-checks. For some reason it wouldn't let me select your fork as the merge-base, so I didn't open a PR, but you can look at the last two commits. I did not yet add fold.


}
10 changes: 9 additions & 1 deletion ext/standard/basic_functions_arginfo.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
* Stub hash: 59dd3527ae66c463cb59b6cc20b41d7fbdcc0ff0 */
* Stub hash: abc9b998fe34151e0bd8b5dbbea97ada44484a08 */

ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_set_time_limit, 0, 1, _IS_BOOL, 0)
ZEND_ARG_TYPE_INFO(0, seconds, IS_LONG, 0)
Expand Down Expand Up @@ -2227,6 +2227,12 @@ ZEND_END_ARG_INFO()

#define arginfo_iterable_none arginfo_iterable_any

ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_iterable_reduce, 0, 2, IS_MIXED, 0)
ZEND_ARG_TYPE_INFO(0, iterable, IS_ITERABLE, 0)
ZEND_ARG_TYPE_INFO(0, callback, IS_CALLABLE, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, initial, IS_MIXED, 0, "null")
ZEND_END_ARG_INFO()


ZEND_FUNCTION(set_time_limit);
ZEND_FUNCTION(header_register_callback);
Expand Down Expand Up @@ -2851,6 +2857,7 @@ ZEND_FUNCTION(sapi_windows_generate_ctrl_event);
ZEND_FUNCTION(any);
ZEND_FUNCTION(all);
ZEND_FUNCTION(none);
ZEND_FUNCTION(reduce);


static const zend_function_entry ext_functions[] = {
Expand Down Expand Up @@ -3507,6 +3514,7 @@ static const zend_function_entry ext_functions[] = {
ZEND_NS_FE("iterable", any, arginfo_iterable_any)
ZEND_NS_FE("iterable", all, arginfo_iterable_all)
ZEND_NS_FE("iterable", none, arginfo_iterable_none)
ZEND_NS_FE("iterable", reduce, arginfo_iterable_reduce)
ZEND_FE_END
};

Expand Down
34 changes: 34 additions & 0 deletions ext/standard/tests/iterable/reduce_array.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
--TEST--
Test reduce() function
--FILE--
<?php

use function iterable\reduce;

/*
Prototype: mixed iterable\reduce(array $array, callable($carry, $item): mixed $callback);
Description: Iterate over iterable and reduce
*/

function dump_reduce(...$args) {
try {
var_dump(reduce(...$args));
} catch (Error $e) {
printf("Caught %s: %s\n", $e::class, $e->getMessage());
}
}

// The result of strtolower is locale-dependent, meaning that it cannot be converted to a constant by opcache.
dump_reduce([]);
dump_reduce([], function () {}, strtolower('TEST'));
dump_reduce(['x', 'y', 'z'], function ($carry, $item) { $carry .= $item; return $carry; }, strtolower('TEST'));
dump_reduce([strtolower('WORLD'), '!'], function ($carry, $item) { $carry .= $item; return $carry; }, strtolower('HELLO'));
dump_reduce([strtolower('WORLD')], function (string $carry, string $item): string { return $carry . $item; }, strtolower('HELLO'));

?>
--EXPECT--
Caught ArgumentCountError: iterable\reduce() expects at least 2 arguments, 1 given
string(4) "test"
string(7) "testxyz"
string(11) "helloworld!"
string(10) "helloworld"
54 changes: 54 additions & 0 deletions ext/standard/tests/iterable/reduce_traversable.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
--TEST--
Test reduce() function on Traversable
--FILE--
<?php

use function iterable\reduce;

/*
Prototype: mixed iterable\reduce(array $array, callable($carry, $item): mixed $callback);
Description: Iterate over iterable and reduce
*/

function dump_reduce(...$args) {
try {
var_dump(reduce(...$args));
} catch (Error $e) {
printf("Caught %s: %s\n", $e::class, $e->getMessage());
}
}

function generate_strings() {
yield strtoupper('Hello');
yield ' ';
yield strtoupper('World!');
return strtoupper('UNUSED');
}

// The result of strtolower is locale-dependent, meaning that it cannot be converted to a constant by opcache. Also, test reference counting.
dump_reduce(new ArrayObject([]));
dump_reduce(new ArrayObject([]), function () {}, strtolower('TEST'));
dump_reduce(new ArrayObject(['x', 'y', 'z']), function ($carry, $item) { $carry .= $item; return $carry; }, strtolower('TEST'));
dump_reduce(new ArrayObject([strtolower('WORLD'), '!']), function ($carry, $item) { $carry .= $item; return $carry; }, strtolower('HELLO'));
dump_reduce(new ArrayObject([strtolower('WORLD')]), function (string $carry, string $item): string { return $carry . $item; }, strtolower('HELLO'));
dump_reduce(generate_strings(), function (string $carry, string $item): string { return $carry . $item; }, '');
dump_reduce(generate_strings(), function ($carry, $item): string { $item = $carry . $item; unset($carry);return $item; }, '');
// Passing by reference is not supported.
dump_reduce(generate_strings(), function (string &$carry, string $item): string { $carry .= $item; return $carry;}, '');

?>
--EXPECTF--
Caught ArgumentCountError: iterable\reduce() expects at least 2 arguments, 1 given
string(4) "test"
string(7) "testxyz"
string(11) "helloworld!"
string(10) "helloworld"
string(12) "HELLO WORLD!"
string(12) "HELLO WORLD!"

Warning: {closure}(): Argument #1 ($carry) must be passed by reference, value given in %s on line 12

Warning: {closure}(): Argument #1 ($carry) must be passed by reference, value given in %s on line 12

Warning: {closure}(): Argument #1 ($carry) must be passed by reference, value given in %s on line 12
string(12) "HELLO WORLD!"