Skip to content

Commit 118e544

Browse files
committed
- Added tabulate dependency.
- Minor documentation changes. - iter: Added unnest and batch_by_group, and deprecated batch_iter. - misc: added bin_array.
1 parent b4a9355 commit 118e544

File tree

6 files changed

+57
-4
lines changed

6 files changed

+57
-4
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,16 @@ This package contains convenient functions not found in Python's Standard Librar
2121
It also contains a variety of convenience functions for the
2222
[numba](https://numba.pydata.org/) JIT compiler library.
2323

24+
25+
26+
## Installation:
27+
28+
```pip install generic-util```
29+
30+
31+
32+
## Overview
33+
2434
See the [documentation][rtd-link] for details, but functions are grouped as follows:
2535
- `Generic_Util.benchmarking`: functions covering typical code-timing scenarios,
2636
such as a "with" statement context, an n-executions timer,

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ dependencies = [
2828
"numpy>=1.23.5",
2929
"numba>=0.56.4",
3030
"sortedcontainers>=2.4.0",
31+
"tabulate>=0.8.0"
3132
]
3233

3334
[project.optional-dependencies]

src/Generic_Util/benchmarking.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ def time_n(f: Callable, n = 2, *args, **kwargs):
5353
# - how often the function at each position won (to see whether ordering had a big effect)
5454
# - how well each function performed in each sample size
5555
# - no need to check how smaller-sample calls were better, as they should be worse than large-sample ones
56-
# - MAYBE NOT: option to check whether the outputs of (single) calls to each function are equal
57-
# - Difficulty: not all objects are amenable to ==; could ask for the comparison function, but finnicky
56+
# - USEFUL BUT NEEDS WARNING: option to check whether the outputs of (single) calls to each function are equal
57+
# - Difficulty: not all objects are amenable to ==; could ask for the comparison function, or simply warn about this
5858
def compare_implementations(fs_with_shared_args: dict[str, Callable], n = 200, wait = 1, verbose = True,
5959
fs_with_own_args: dict[str, tuple[Callable, list, dict]] = None, args: list = None, kwargs: dict = None):
6060
'''Benchmark multiple implementations of the same function called n times (each with the same args and kwargs), with a break between functions.

src/Generic_Util/iter.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from itertools import chain, combinations, islice
77
from functools import reduce
88
from collections import defaultdict
9+
from collections.abc import MutableMapping
910
from sortedcontainers import SortedKeyList
1011
import operator as op
1112
import numpy as np
@@ -39,6 +40,18 @@ def deep_extract(xss_: Iterable[Iterable], *key_path) -> Generator:
3940
for k in key_path: level = level[k]
4041
return deep_flatten(level)
4142

43+
def unnest(nested_dict, prefix = '', sep = '.'):
44+
'''Recursively unnest dictionaries and lists in a dictionary, concatenating keys with sep. All keys are assumed to be strings.
45+
Example: {'a': [{'b': 2}, {'c': 3}], 'd': 4, 'e': {'f': 6}} -> {'a.0.b': 2, 'a.1.c': 3, 'd': 4, 'e.f': 6}'''
46+
out = {}
47+
for k, v in nested_dict.items():
48+
new_k = prefix + sep + k if prefix else k
49+
if isinstance(v, MutableMapping): out.update(unnest(v, new_k, sep=sep))
50+
elif isinstance(v, list):
51+
for i, vi in enumerate(v): out.update(unnest(vi, new_k + sep + str(i), sep=sep))
52+
else: out[new_k] = v
53+
return out
54+
4255

4356

4457
## Iterable-Combining (and Combinatory) Functions
@@ -273,7 +286,10 @@ def intersperse_val(xs: Sequence[_a], y: _a, n: int, prepend = False, append = F
273286
## Batching functions
274287

275288
def batch_iter(n: int, xs: Iterable[_a]) -> Generator[_a, None, None]:
276-
'''Batch an iterable in batches of size n (possibly except the last). If len(xs) is knowable use batch_seq instead.'''
289+
'''*Soft deprecation* for Python>=3.12 since itertools now contains batched(iterable, n).
290+
Batch an iterable in batches of size n (possibly except the last). If len(xs) is knowable use batch_seq instead.'''
291+
from warnings import warn
292+
warn('batch_iter() is deprecated from Python>=3.12 and will be removed in a future release.\nUse itertools.batched(iterable, n) instead.', DeprecationWarning, stacklevel=2)
277293
iterator = iter(xs)
278294
while batch := list(islice(iterator, n)): yield batch
279295

@@ -342,4 +358,22 @@ def batch_seq_by_into(by: Callable[[_a], float], k: int, xs: Sequence[_a], keep_
342358
count += weight
343359
if batch: yield batch
344360

361+
def batch_by_group(by: Callable[[_a], Any], n: int, xs: Iterable[_a]) -> Generator[_a, None, None]:
362+
'''Batch an iterable into batches of length <= n in which not two elements are from the same group as determined with by.
363+
Note: the order of items in and across the batches is reversed from the original iterable;
364+
might want to reverse it beforehand (or have a deque.popleft toggle?)'''
365+
groups = defaultdict(list)
366+
for x in xs: groups[by(x)].append(x)
367+
368+
while any(groups.values()):
369+
batch = []
370+
for grp in list(groups.keys()): # casting to list makes it a copy, so not affected by .pop
371+
if len(batch) >= n:
372+
break
373+
if groups[grp]:
374+
batch.append(groups[grp].pop())
375+
else:
376+
del groups[grp]
377+
yield batch
378+
345379

src/Generic_Util/misc.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,11 @@ def min_max(xs: Sequence[_a]) -> tuple[_a, _a]:
3636
return min, max
3737

3838

39+
40+
## Binary Functions
41+
42+
def bin_array(n: int) -> list[int]:
43+
'''Returns the binary representation of an integer as a list of binary values WITHOUT converting to string'''
44+
return [(n >> i) & 1 for i in range(n.bit_length()-1, -1, -1)]
45+
46+

src/Generic_Util/numba/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969
chA = ch[::1] # Same as typeof(np.array(['a', '']))
7070
chA2 = ch[:, ::1] # Same as typeof(np.array([['a', ''],['','b']]))
7171

72-
ch_NP = '<U1'
72+
ch_NP = np.dtype('<U1')
7373
chA_NP = cA2_NP = NDArray[ch_NP]
7474

7575
def nTup(*args): return Tuple(args)

0 commit comments

Comments
 (0)