Skip to content

Commit dbed9ba

Browse files
authored
Add Expanding Primitives (#2343)
* Add ExpandingCount, ExpandingMin, ExpandingMean, ExpandingMax, ExpandingSTD, and ExpandingTrend primitives
1 parent e308921 commit dbed9ba

File tree

15 files changed

+811
-97
lines changed

15 files changed

+811
-97
lines changed

docs/source/api_reference.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,12 @@ Time Series Transform Primitives
286286
.. autosummary::
287287
:toctree: generated/
288288

289+
ExpandingCount
290+
ExpandingMax
291+
ExpandingMean
292+
ExpandingMin
293+
ExpandingSTD
294+
ExpandingTrend
289295
Lag
290296
RollingCount
291297
RollingMax

docs/source/release_notes.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@ Release Notes
66
Future Release
77
==============
88
* Enhancements
9+
* Add ``ExpandingCount``, ``ExpandingMin``, ``ExpandingMean``, ``ExpandingMax``, ``ExpandingSTD``, and ``ExpandingTrend`` primitives (:pr:`2343`)
910
* Fixes
1011
* Changes
1112
* Documentation Changes
1213
* Testing Changes
1314
* Fix version comparison in ``test_holiday_out_of_range`` (:pr:`2382`)
1415

1516
Thanks to the following people for contributing to this release:
16-
:user:`thehomebrewnerd`
17+
:user:`sbadithe`, :user:`thehomebrewnerd`
1718

1819
v1.18.0 Nov 15, 2022
1920
====================

featuretools/primitives/standard/transform/time_series/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,11 @@
2323
from featuretools.primitives.standard.transform.time_series.rolling_trend import (
2424
RollingTrend,
2525
)
26+
from featuretools.primitives.standard.transform.time_series.expanding import (
27+
ExpandingCount,
28+
ExpandingMax,
29+
ExpandingMean,
30+
ExpandingMin,
31+
ExpandingSTD,
32+
ExpandingTrend,
33+
)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from featuretools.primitives.standard.transform.time_series.expanding.expanding_count import (
2+
ExpandingCount,
3+
)
4+
from featuretools.primitives.standard.transform.time_series.expanding.expanding_max import (
5+
ExpandingMax,
6+
)
7+
from featuretools.primitives.standard.transform.time_series.expanding.expanding_mean import (
8+
ExpandingMean,
9+
)
10+
from featuretools.primitives.standard.transform.time_series.expanding.expanding_min import (
11+
ExpandingMin,
12+
)
13+
from featuretools.primitives.standard.transform.time_series.expanding.expanding_std import (
14+
ExpandingSTD,
15+
)
16+
from featuretools.primitives.standard.transform.time_series.expanding.expanding_trend import (
17+
ExpandingTrend,
18+
)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import numpy as np
2+
from woodwork.column_schema import ColumnSchema
3+
from woodwork.logical_types import Datetime, IntegerNullable
4+
5+
from featuretools.primitives.base.transform_primitive_base import TransformPrimitive
6+
from featuretools.primitives.standard.transform.time_series.utils import (
7+
_apply_gap_for_expanding_primitives,
8+
)
9+
10+
11+
class ExpandingCount(TransformPrimitive):
12+
"""Computes the expanding count of events over a given window.
13+
14+
Description:
15+
Given a list of datetimes, returns an expanding count starting
16+
at the row `gap` rows away from the current row. An expanding
17+
primitive calculates the value of a primitive for a given time
18+
with all the data available up to the corresponding point in time.
19+
20+
Input datetimes should be monotonic.
21+
22+
Args:
23+
gap (int, optional): Specifies a gap backwards from each instance before the
24+
usable data begins. Corresponds to number of rows. Defaults to 1.
25+
min_periods (int, optional): Minimum number of observations required for performing calculations
26+
over the window. Defaults to 1.
27+
28+
29+
Examples:
30+
>>> import pandas as pd
31+
>>> expanding_count = ExpandingCount()
32+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
33+
>>> expanding_count(times).tolist()
34+
[nan, 1.0, 2.0, 3.0, 4.0]
35+
36+
We can also control the gap before the expanding calculation.
37+
38+
>>> import pandas as pd
39+
>>> expanding_count = ExpandingCount(gap=0)
40+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
41+
>>> expanding_count(times).tolist()
42+
[1.0, 2.0, 3.0, 4.0, 5.0]
43+
44+
We can also control the minimum number of periods required for the rolling calculation.
45+
46+
>>> import pandas as pd
47+
>>> expanding_count = ExpandingCount(min_periods=3)
48+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
49+
>>> expanding_count(times).tolist()
50+
[nan, nan, nan, 3.0, 4.0]
51+
"""
52+
53+
name = "expanding_count"
54+
input_types = [ColumnSchema(logical_type=Datetime, semantic_tags={"time_index"})]
55+
return_type = ColumnSchema(logical_type=IntegerNullable, semantic_tags={"numeric"})
56+
uses_full_dataframe = True
57+
58+
def __init__(self, gap=1, min_periods=1):
59+
self.gap = gap
60+
self.min_periods = min_periods
61+
62+
def get_function(self):
63+
def expanding_count(datetime_series):
64+
datetime_series = _apply_gap_for_expanding_primitives(
65+
datetime_series,
66+
self.gap,
67+
)
68+
count_series = datetime_series.expanding(
69+
min_periods=self.min_periods,
70+
).count()
71+
num_nans = self.gap + self.min_periods - 1
72+
count_series[range(num_nans)] = np.nan
73+
return count_series
74+
75+
return expanding_count
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import pandas as pd
2+
from woodwork.column_schema import ColumnSchema
3+
from woodwork.logical_types import Datetime
4+
5+
from featuretools.primitives.base.transform_primitive_base import TransformPrimitive
6+
from featuretools.primitives.standard.transform.time_series.utils import (
7+
_apply_gap_for_expanding_primitives,
8+
)
9+
10+
11+
class ExpandingMax(TransformPrimitive):
12+
"""Computes the expanding maximum of events over a given window.
13+
14+
Description:
15+
Given a list of datetimes, returns an expanding maximum starting
16+
at the row `gap` rows away from the current row. An expanding
17+
primitive calculates the value of a primitive for a given time
18+
with all the data available up to the corresponding point in time.
19+
20+
Input datetimes should be monotonic.
21+
22+
Args:
23+
gap (int, optional): Specifies a gap backwards from each instance before the
24+
usable data begins. Corresponds to number of rows. Defaults to 1.
25+
min_periods (int, optional): Minimum number of observations required for performing calculations
26+
over the window. Defaults to 1.
27+
28+
29+
Examples:
30+
>>> import pandas as pd
31+
>>> expanding_min = ExpandingMax()
32+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
33+
>>> expanding_min(times, [2, 4, 6, 7, 2]).tolist()
34+
[nan, 2.0, 4.0, 6.0, 7.0]
35+
36+
We can also control the gap before the expanding calculation.
37+
38+
>>> import pandas as pd
39+
>>> expanding_min = ExpandingMax(gap=0)
40+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
41+
>>> expanding_min(times, [2, 4, 6, 7, 2]).tolist()
42+
[2.0, 4.0, 6.0, 7.0, 7.0]
43+
44+
We can also control the minimum number of periods required for the rolling calculation.
45+
46+
>>> import pandas as pd
47+
>>> expanding_min = ExpandingMax(min_periods=3)
48+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
49+
>>> expanding_min(times, [2, 4, 6, 7, 2]).tolist()
50+
[nan, nan, nan, 6.0, 7.0]
51+
"""
52+
53+
name = "expanding_max"
54+
input_types = [
55+
ColumnSchema(logical_type=Datetime, semantic_tags={"time_index"}),
56+
ColumnSchema(semantic_tags={"numeric"}),
57+
]
58+
return_type = ColumnSchema(semantic_tags={"numeric"})
59+
uses_full_dataframe = True
60+
61+
def __init__(self, gap=1, min_periods=1):
62+
self.gap = gap
63+
self.min_periods = min_periods
64+
65+
def get_function(self):
66+
def expanding_max(datetime, numeric):
67+
x = pd.Series(numeric.values, index=datetime)
68+
x = _apply_gap_for_expanding_primitives(x, self.gap)
69+
return x.expanding(min_periods=self.min_periods).max().values
70+
71+
return expanding_max
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import pandas as pd
2+
from woodwork.column_schema import ColumnSchema
3+
from woodwork.logical_types import Datetime, Double
4+
5+
from featuretools.primitives.base.transform_primitive_base import TransformPrimitive
6+
from featuretools.primitives.standard.transform.time_series.utils import (
7+
_apply_gap_for_expanding_primitives,
8+
)
9+
10+
11+
class ExpandingMean(TransformPrimitive):
12+
"""Computes the expanding mean of events over a given window.
13+
14+
Description:
15+
Given a list of datetimes, returns an expanding mean starting
16+
at the row `gap` rows away from the current row. An expanding
17+
primitive calculates the value of a primitive for a given time
18+
with all the data available up to the corresponding point in time.
19+
20+
Input datetimes should be monotonic.
21+
22+
Args:
23+
gap (int, optional): Specifies a gap backwards from each instance before the
24+
usable data begins. Corresponds to number of rows. Defaults to 1.
25+
min_periods (int, optional): Minimum number of observations required for performing calculations
26+
over the window. Defaults to 1.
27+
28+
29+
Examples:
30+
>>> import pandas as pd
31+
>>> expanding_mean = ExpandingMean()
32+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
33+
>>> expanding_mean(times, [5, 4, 3, 2, 1]).tolist()
34+
[nan, 5.0, 4.5, 4.0, 3.5]
35+
36+
We can also control the gap before the expanding calculation.
37+
38+
>>> import pandas as pd
39+
>>> expanding_mean = ExpandingMean(gap=0)
40+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
41+
>>> expanding_mean(times, [5, 4, 3, 2, 1]).tolist()
42+
[5.0, 4.5, 4.0, 3.5, 3.0]
43+
44+
We can also control the minimum number of periods required for the rolling calculation.
45+
46+
>>> import pandas as pd
47+
>>> expanding_mean = ExpandingMean(min_periods=3)
48+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
49+
>>> expanding_mean(times, [5, 4, 3, 2, 1]).tolist()
50+
[nan, nan, nan, 4.0, 3.5]
51+
"""
52+
53+
name = "expanding_mean"
54+
input_types = [
55+
ColumnSchema(logical_type=Datetime, semantic_tags={"time_index"}),
56+
ColumnSchema(semantic_tags={"numeric"}),
57+
]
58+
return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"})
59+
uses_full_dataframe = True
60+
61+
def __init__(self, gap=1, min_periods=1):
62+
self.gap = gap
63+
self.min_periods = min_periods
64+
65+
def get_function(self):
66+
def expanding_mean(datetime, numeric):
67+
x = pd.Series(numeric.values, index=datetime)
68+
x = _apply_gap_for_expanding_primitives(x, self.gap)
69+
return x.expanding(min_periods=self.min_periods).mean().values
70+
71+
return expanding_mean
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import pandas as pd
2+
from woodwork.column_schema import ColumnSchema
3+
from woodwork.logical_types import Datetime
4+
5+
from featuretools.primitives.base.transform_primitive_base import TransformPrimitive
6+
from featuretools.primitives.standard.transform.time_series.utils import (
7+
_apply_gap_for_expanding_primitives,
8+
)
9+
10+
11+
class ExpandingMin(TransformPrimitive):
12+
"""Computes the expanding minimum of events over a given window.
13+
14+
Description:
15+
Given a list of datetimes, returns an expanding minimum starting
16+
at the row `gap` rows away from the current row. An expanding
17+
primitive calculates the value of a primitive for a given time
18+
with all the data available up to the corresponding point in time.
19+
20+
Input datetimes should be monotonic.
21+
22+
Args:
23+
gap (int, optional): Specifies a gap backwards from each instance before the
24+
usable data begins. Corresponds to number of rows. Defaults to 1.
25+
min_periods (int, optional): Minimum number of observations required for performing calculations
26+
over the window. Defaults to 1.
27+
28+
Examples:
29+
>>> import pandas as pd
30+
>>> expanding_min = ExpandingMin()
31+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
32+
>>> expanding_min(times, [5, 4, 3, 2, 1]).tolist()
33+
[nan, 5.0, 4.0, 3.0, 2.0]
34+
35+
We can also control the gap before the expanding calculation.
36+
37+
>>> import pandas as pd
38+
>>> expanding_min = ExpandingMin(gap=0)
39+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
40+
>>> expanding_min(times, [5, 4, 3, 2, 1]).tolist()
41+
[5.0, 4.0, 3.0, 2.0, 1.0]
42+
43+
We can also control the minimum number of periods required for the rolling calculation.
44+
45+
>>> import pandas as pd
46+
>>> expanding_min = ExpandingMin(min_periods=3)
47+
>>> times = pd.date_range(start='2019-01-01', freq='1min', periods=5)
48+
>>> expanding_min(times, [5, 4, 3, 2, 1]).tolist()
49+
[nan, nan, nan, 3.0, 2.0]
50+
"""
51+
52+
name = "expanding_min"
53+
input_types = [
54+
ColumnSchema(logical_type=Datetime, semantic_tags={"time_index"}),
55+
ColumnSchema(semantic_tags={"numeric"}),
56+
]
57+
return_type = ColumnSchema(semantic_tags={"numeric"})
58+
uses_full_dataframe = True
59+
60+
def __init__(self, gap=1, min_periods=1):
61+
self.gap = gap
62+
self.min_periods = min_periods
63+
64+
def get_function(self):
65+
def expanding_min(datetime, numeric):
66+
x = pd.Series(numeric.values, index=datetime)
67+
x = _apply_gap_for_expanding_primitives(x, self.gap)
68+
return x.expanding(min_periods=self.min_periods).min().values
69+
70+
return expanding_min

0 commit comments

Comments
 (0)