-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Start adding GRW RV Op and distribution #5298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
041a0ab
3b2f904
5afb4d2
c7a69e0
7ac5d1c
cb9fdcf
ece9522
233f7f0
8fe6e21
63af10d
92aae53
18362fa
3af1b59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,15 +12,20 @@ | |
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from typing import Tuple, Union | ||
|
||
import aesara.tensor as at | ||
import numpy as np | ||
|
||
from aesara import scan | ||
from scipy import stats | ||
from aesara.tensor.random.op import RandomVariable | ||
|
||
from pymc.distributions import distribution, multivariate | ||
from pymc.aesaraf import change_rv_size, floatX, intX | ||
from pymc.distributions import distribution, logprob, multivariate | ||
from pymc.distributions.continuous import Flat, Normal, get_tau_sigma | ||
from pymc.distributions.dist_math import check_parameters | ||
from pymc.distributions.shape_utils import to_tuple | ||
from pymc.util import check_dist_not_registered | ||
|
||
__all__ = [ | ||
"AR1", | ||
|
@@ -33,6 +38,195 @@ | |
] | ||
|
||
|
||
class GaussianRandomWalkRV(RandomVariable): | ||
""" | ||
GaussianRandomWalk Random Variable | ||
""" | ||
|
||
name = "GaussianRandomWalk" | ||
ndim_supp = 1 | ||
ndims_params = [0, 0, 0, 0] | ||
dtype = "floatX" | ||
_print_name = ("GaussianRandomWalk", "\\operatorname{GaussianRandomWalk}") | ||
|
||
def make_node(self, rng, size, dtype, mu, sigma, init, steps): | ||
canyon289 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
steps = at.as_tensor_variable(steps) | ||
if not steps.ndim == 0 or not steps.dtype.startswith("int"): | ||
raise ValueError("steps must be an integer scalar (ndim=0).") | ||
|
||
return super().make_node(rng, size, dtype, mu, sigma, init, steps) | ||
|
||
def _supp_shape_from_params(self, dist_params, reop_param_idx=0, param_shapes=None): | ||
steps = dist_params[3] | ||
|
||
return (steps + 1,) | ||
|
||
@classmethod | ||
def rng_fn( | ||
cls, | ||
rng: np.random.RandomState, | ||
mu: Union[np.ndarray, float], | ||
sigma: Union[np.ndarray, float], | ||
init: float, | ||
steps: int, | ||
size: Tuple[int], | ||
) -> np.ndarray: | ||
"""Gaussian Random Walk generator. | ||
|
||
The init value is drawn from the Normal distribution with the same sigma as the | ||
innovations. | ||
|
||
Notes | ||
----- | ||
Currently does not support custom init distribution | ||
|
||
Parameters | ||
---------- | ||
rng: np.random.RandomState | ||
canyon289 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Numpy random number generator | ||
mu: array_like | ||
Random walk mean | ||
sigma: np.ndarray | ||
Standard deviation of innovation (sigma > 0) | ||
init: float | ||
Initialization value for GaussianRandomWalk | ||
steps: int | ||
Length of random walk, must be greater than 1. Returned array will be of size+1 to | ||
account as first value is initial value | ||
size: int | ||
The number of Random Walk time series generated | ||
|
||
Returns | ||
------- | ||
ndarray | ||
""" | ||
|
||
if steps < 1: | ||
raise ValueError("Steps must be greater than 0") | ||
|
||
# If size is None then the returned series should be (*implied_dims, 1+steps) | ||
if size is None: | ||
# broadcast parameters with each other to find implied dims | ||
bcast_shape = np.broadcast_shapes( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does broadcast shape here do specifically that the code before was not doing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It makes sure that when
canyon289 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
np.asarray(mu).shape, | ||
np.asarray(sigma).shape, | ||
np.asarray(init).shape, | ||
) | ||
dist_shape = (*bcast_shape, int(steps)) | ||
|
||
# If size is None then the returned series should be (*size, 1+steps) | ||
else: | ||
init_size = (*size, 1) | ||
dist_shape = (*size, int(steps)) | ||
|
||
innovations = rng.normal(loc=mu, scale=sigma, size=dist_shape) | ||
grw = np.concatenate([init[..., None], innovations], axis=-1) | ||
return np.cumsum(grw, axis=-1) | ||
|
||
|
||
gaussianrandomwalk = GaussianRandomWalkRV() | ||
|
||
|
||
class GaussianRandomWalk(distribution.Continuous): | ||
r"""Random Walk with Normal innovations | ||
|
||
Parameters | ||
---------- | ||
mu : tensor_like of float | ||
innovation drift, defaults to 0.0 | ||
sigma : tensor_like of float, optional | ||
sigma > 0, innovation standard deviation, defaults to 1.0 | ||
init : Univariate PyMC distribution | ||
Univariate distribution of the initial value, created with the `.dist()` API. | ||
Defaults to Normal with same `mu` and `sigma` as the GaussianRandomWalk | ||
steps : int | ||
Number of steps in Gaussian Random Walks (steps > 0). | ||
""" | ||
|
||
rv_op = gaussianrandomwalk | ||
|
||
def __new__(cls, name, mu=0.0, sigma=1.0, init=None, steps=None, **kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I asked this question before but do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They can have the same signature. I don't know if other distributions usually show or hide size in kwargs. Only thing we care about here is getting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it ever the case theyll have vastly different signatures? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if init is not None: | ||
check_dist_not_registered(init) | ||
return super().__new__(cls, name, mu, sigma, init, steps, **kwargs) | ||
|
||
@classmethod | ||
def dist( | ||
cls, mu=0.0, sigma=1.0, init=None, steps=None, size=None, **kwargs | ||
) -> at.TensorVariable: | ||
|
||
mu = at.as_tensor_variable(floatX(mu)) | ||
sigma = at.as_tensor_variable(floatX(sigma)) | ||
if steps is None: | ||
raise ValueError("Must specify steps parameter") | ||
steps = at.as_tensor_variable(intX(steps)) | ||
|
||
shape = kwargs.get("shape", None) | ||
if size is None and shape is None: | ||
init_size = None | ||
else: | ||
init_size = to_tuple(size) if size is not None else to_tuple(shape)[:-1] | ||
|
||
# If no scalar distribution is passed then initialize with a Normal of same mu and sigma | ||
if init is None: | ||
init = Normal.dist(mu, sigma, size=init_size) | ||
else: | ||
if not ( | ||
isinstance(init, at.TensorVariable) | ||
and init.owner is not None | ||
and isinstance(init.owner.op, RandomVariable) | ||
and init.owner.op.ndim_supp == 0 | ||
): | ||
raise TypeError("init must be a univariate distribution variable") | ||
|
||
if init_size is not None: | ||
init = change_rv_size(init, init_size) | ||
else: | ||
# If not explicit, size is determined by the shapes of mu, sigma, and init | ||
bcast_shape = at.broadcast_arrays(mu, sigma, init)[0].shape | ||
init = change_rv_size(init, bcast_shape) | ||
|
||
canyon289 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Ignores logprob of init var because that's accounted for in the logp method | ||
init.tag.ignore_logprob = True | ||
|
||
return super().dist([mu, sigma, init, steps], size=size, **kwargs) | ||
|
||
def logp( | ||
value: at.Variable, | ||
mu: at.Variable, | ||
sigma: at.Variable, | ||
init: at.Variable, | ||
steps: at.Variable, | ||
) -> at.TensorVariable: | ||
"""Calculate log-probability of Gaussian Random Walk distribution at specified value. | ||
|
||
Parameters | ||
---------- | ||
value: at.Variable, | ||
mu: at.Variable, | ||
sigma: at.Variable, | ||
init: at.Variable, Not used | ||
steps: at.Variable, Not used | ||
|
||
Returns | ||
------- | ||
TensorVariable | ||
""" | ||
|
||
# Calculate initialization logp | ||
init_logp = logprob.logp(init, value[..., 0]) | ||
canyon289 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Make time series stationary around the mean value | ||
stationary_series = value[..., 1:] - value[..., :-1] | ||
series_logp = logprob.logp(Normal.dist(mu, sigma), stationary_series) | ||
|
||
return check_parameters( | ||
init_logp + series_logp.sum(axis=-1), | ||
steps > 0, | ||
msg="steps > 0", | ||
) | ||
|
||
|
||
class AR1(distribution.Continuous): | ||
""" | ||
Autoregressive process with 1 lag. | ||
|
@@ -171,125 +365,6 @@ def logp(self, value): | |
return at.sum(innov_like) + at.sum(init_like) | ||
|
||
|
||
class GaussianRandomWalk(distribution.Continuous): | ||
r"""Random Walk with Normal innovations | ||
|
||
Note that this is mainly a user-friendly wrapper to enable an easier specification | ||
of GRW. You are not restricted to use only Normal innovations but can use any | ||
distribution: just use `aesara.tensor.cumsum()` to create the random walk behavior. | ||
|
||
Parameters | ||
---------- | ||
mu : tensor_like of float, default 0 | ||
canyon289 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
innovation drift, defaults to 0.0 | ||
For vector valued `mu`, first dimension must match shape of the random walk, and | ||
the first element will be discarded (since there is no innovation in the first timestep) | ||
sigma : tensor_like of float, optional | ||
`sigma` > 0, innovation standard deviation (only required if `tau` is not specified) | ||
For vector valued `sigma`, first dimension must match shape of the random walk, and | ||
the first element will be discarded (since there is no innovation in the first timestep) | ||
tau : tensor_like of float, optional | ||
`tau` > 0, innovation precision (only required if `sigma` is not specified) | ||
For vector valued `tau`, first dimension must match shape of the random walk, and | ||
the first element will be discarded (since there is no innovation in the first timestep) | ||
init : pymc.Distribution, optional | ||
distribution for initial value (Defaults to Flat()) | ||
""" | ||
|
||
def __init__(self, tau=None, init=None, sigma=None, mu=0.0, *args, **kwargs): | ||
kwargs.setdefault("shape", 1) | ||
super().__init__(*args, **kwargs) | ||
if sum(self.shape) == 0: | ||
raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!") | ||
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) | ||
self.tau = at.as_tensor_variable(tau) | ||
sigma = at.as_tensor_variable(sigma) | ||
self.sigma = sigma | ||
self.mu = at.as_tensor_variable(mu) | ||
self.init = init or Flat.dist() | ||
self.mean = at.as_tensor_variable(0.0) | ||
|
||
def _mu_and_sigma(self, mu, sigma): | ||
"""Helper to get mu and sigma if they are high dimensional.""" | ||
if sigma.ndim > 0: | ||
sigma = sigma[1:] | ||
if mu.ndim > 0: | ||
mu = mu[1:] | ||
return mu, sigma | ||
|
||
def logp(self, x): | ||
""" | ||
Calculate log-probability of Gaussian Random Walk distribution at specified value. | ||
|
||
Parameters | ||
---------- | ||
x : numeric | ||
Value for which log-probability is calculated. | ||
|
||
Returns | ||
------- | ||
TensorVariable | ||
""" | ||
if x.ndim > 0: | ||
x_im1 = x[:-1] | ||
x_i = x[1:] | ||
mu, sigma = self._mu_and_sigma(self.mu, self.sigma) | ||
innov_like = Normal.dist(mu=x_im1 + mu, sigma=sigma).logp(x_i) | ||
return self.init.logp(x[0]) + at.sum(innov_like) | ||
return self.init.logp(x) | ||
|
||
def random(self, point=None, size=None): | ||
"""Draw random values from GaussianRandomWalk. | ||
|
||
Parameters | ||
---------- | ||
point : dict or Point, optional | ||
Dict of variable values on which random values are to be | ||
conditioned (uses default point if not specified). | ||
size : int, optional | ||
Desired size of random sample (returns one sample if not | ||
specified). | ||
|
||
Returns | ||
------- | ||
array | ||
""" | ||
# sigma, mu = distribution.draw_values([self.sigma, self.mu], point=point, size=size) | ||
# return distribution.generate_samples( | ||
# self._random, | ||
# sigma=sigma, | ||
# mu=mu, | ||
# size=size, | ||
# dist_shape=self.shape, | ||
# not_broadcast_kwargs={"sample_shape": to_tuple(size)}, | ||
# ) | ||
pass | ||
|
||
def _random(self, sigma, mu, size, sample_shape): | ||
"""Implement a Gaussian random walk as a cumulative sum of normals. | ||
axis = len(size) - 1 denotes the axis along which cumulative sum would be calculated. | ||
This might need to be corrected in future when issue #4010 is fixed. | ||
""" | ||
if size[len(sample_shape)] == sample_shape: | ||
axis = len(sample_shape) | ||
else: | ||
axis = len(size) - 1 | ||
rv = stats.norm(mu, sigma) | ||
data = rv.rvs(size).cumsum(axis=axis) | ||
data = np.array(data) | ||
|
||
# the following lines center the random walk to start at the origin. | ||
if len(data.shape) > 1: | ||
for i in range(data.shape[0]): | ||
data[i] = data[i] - data[i][0] | ||
else: | ||
data = data - data[0] | ||
return data | ||
|
||
def _distr_parameters_for_repr(self): | ||
return ["mu", "sigma"] | ||
|
||
|
||
class GARCH11(distribution.Continuous): | ||
r""" | ||
GARCH(1,1) with Normal innovations. The model is specified by | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Curious why is this function needed now? Is this check already not covered here?
https://github.com/pymc-devs/pymc/pull/5298/files#diff-5fece42aa9647732b360b6c0e13de089a8408c68ae38b650fba38a1c40322229R102
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That line is a value check, this is a shape check (to confirm it is a scalar)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why this function then? I'm assuming make_node is called when the aesara graph is being made?
In other words why cant this logic be moved to where the value check is? Not saying it should be, just trying to understand :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This check happens immediately when the variable is created, while that other one only when/if the variable is evaluated.
We don't want to allow the user to even initialize such variables, whereas we can't do that for the value of the steps, because it might be symbolic and not known until the function is evaluated.