Skip to content

Commit 4f1f11c

Browse files
committed
Use "unobserved" as imputed variable suffix instead of "missing"
1 parent e1fd175 commit 4f1f11c

File tree

5 files changed

+43
-35
lines changed

5 files changed

+43
-35
lines changed

pymc/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1423,7 +1423,7 @@ def make_obs_var(
14231423
self.observed_RVs.append(observed_rv)
14241424

14251425
# Register FreeRV corresponding to unobserved components
1426-
self.register_rv(unobserved_rv, f"{name}_missing", transform=transform)
1426+
self.register_rv(unobserved_rv, f"{name}_unobserved", transform=transform)
14271427

14281428
# Register Deterministic that combines observed and missing
14291429
# Note: This can widely increase memory consumption during sampling for large datasets

tests/backends/test_arviz.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,10 +338,10 @@ def test_missing_data_model(self):
338338
)
339339

340340
# make sure that data is really missing
341-
assert "y_missing" in model.named_vars
341+
assert "y_unobserved" in model.named_vars
342342

343343
test_dict = {
344-
"posterior": ["x", "y_missing"],
344+
"posterior": ["x", "y_unobserved"],
345345
"observed_data": ["y_observed"],
346346
"log_likelihood": ["y_observed"],
347347
}

tests/test_model.py

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -357,13 +357,13 @@ def test_missing_data(self):
357357
gf = m.logp_dlogp_function()
358358
gf._extra_are_set = True
359359

360-
assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type
360+
assert m["x2_unobserved"].type == gf._extra_vars_shared["x2_unobserved"].type
361361

362362
# The dtype of the merged observed/missing deterministic should match the RV dtype
363363
assert m.deterministics[0].type.dtype == x2.type.dtype
364364

365365
point = m.initial_point(random_seed=None).copy()
366-
del point["x2_missing"]
366+
del point["x2_unobserved"]
367367

368368
res = [gf(DictToArrayBijection.map(Point(point, model=m))) for i in range(5)]
369369

@@ -566,7 +566,7 @@ def test_make_obs_var():
566566
assert masked_output != fake_distribution
567567
assert not isinstance(masked_output, RandomVariable)
568568
# Ensure it has missing values
569-
assert {"testing_inputs_missing"} == {v.name for v in fake_model.value_vars}
569+
assert {"testing_inputs_unobserved"} == {v.name for v in fake_model.value_vars}
570570
assert {"testing_inputs", "testing_inputs_observed"} == {
571571
v.name for v in fake_model.observed_RVs
572572
}
@@ -1221,7 +1221,7 @@ def test_missing_basic(self, missing_data):
12211221
with pytest.warns(ImputationWarning):
12221222
_ = pm.Normal("y", x, 1, observed=missing_data)
12231223

1224-
assert "y_missing" in model.named_vars
1224+
assert "y_unobserved" in model.named_vars
12251225

12261226
test_point = model.initial_point()
12271227
assert not np.isnan(model.compile_logp()(test_point))
@@ -1238,7 +1238,7 @@ def test_missing_with_predictors(self):
12381238
with pytest.warns(ImputationWarning):
12391239
y = pm.Normal("y", x * predictors, 1, observed=data)
12401240

1241-
assert "y_missing" in model.named_vars
1241+
assert "y_unobserved" in model.named_vars
12421242

12431243
test_point = model.initial_point()
12441244
assert not np.isnan(model.compile_logp()(test_point))
@@ -1278,17 +1278,19 @@ def test_interval_missing_observations(self):
12781278
with pytest.warns(ImputationWarning):
12791279
theta2 = pm.Normal("theta2", mu=theta1, observed=obs2)
12801280

1281-
assert isinstance(model.rvs_to_transforms[model["theta1_missing"]], IntervalTransform)
1281+
assert isinstance(
1282+
model.rvs_to_transforms[model["theta1_unobserved"]], IntervalTransform
1283+
)
12821284
assert model.rvs_to_transforms[model["theta1_observed"]] is None
12831285

12841286
prior_trace = pm.sample_prior_predictive(random_seed=rng, return_inferencedata=False)
12851287
assert set(prior_trace.keys()) == {
12861288
"theta1",
12871289
"theta1_observed",
1288-
"theta1_missing",
1290+
"theta1_unobserved",
12891291
"theta2",
12901292
"theta2_observed",
1291-
"theta2_missing",
1293+
"theta2_unobserved",
12921294
}
12931295

12941296
# Make sure the observed + missing combined deterministics have the
@@ -1303,14 +1305,16 @@ def test_interval_missing_observations(self):
13031305
# Make sure the missing parts of the combined deterministic matches the
13041306
# sampled missing and observed variable values
13051307
assert (
1306-
np.mean(prior_trace["theta1"][:, obs1.mask] - prior_trace["theta1_missing"]) == 0.0
1308+
np.mean(prior_trace["theta1"][:, obs1.mask] - prior_trace["theta1_unobserved"])
1309+
== 0.0
13071310
)
13081311
assert (
13091312
np.mean(prior_trace["theta1"][:, ~obs1.mask] - prior_trace["theta1_observed"])
13101313
== 0.0
13111314
)
13121315
assert (
1313-
np.mean(prior_trace["theta2"][:, obs2.mask] - prior_trace["theta2_missing"]) == 0.0
1316+
np.mean(prior_trace["theta2"][:, obs2.mask] - prior_trace["theta2_unobserved"])
1317+
== 0.0
13141318
)
13151319
assert (
13161320
np.mean(prior_trace["theta2"][:, ~obs2.mask] - prior_trace["theta2_observed"])
@@ -1326,18 +1330,22 @@ def test_interval_missing_observations(self):
13261330
)
13271331
assert set(trace.varnames) == {
13281332
"theta1",
1329-
"theta1_missing",
1330-
"theta1_missing_interval__",
1333+
"theta1_unobserved",
1334+
"theta1_unobserved_interval__",
13311335
"theta2",
1332-
"theta2_missing",
1336+
"theta2_unobserved",
13331337
}
13341338

13351339
# Make sure that the missing values are newly generated samples and that
13361340
# the observed and deterministic match
1337-
assert np.all(0 < trace["theta1_missing"].mean(0))
1338-
assert np.all(0 < trace["theta2_missing"].mean(0))
1339-
assert np.isclose(np.mean(trace["theta1"][:, obs1.mask] - trace["theta1_missing"]), 0)
1340-
assert np.isclose(np.mean(trace["theta2"][:, obs2.mask] - trace["theta2_missing"]), 0)
1341+
assert np.all(0 < trace["theta1_unobserved"].mean(0))
1342+
assert np.all(0 < trace["theta2_unobserved"].mean(0))
1343+
assert np.isclose(
1344+
np.mean(trace["theta1"][:, obs1.mask] - trace["theta1_unobserved"]), 0
1345+
)
1346+
assert np.isclose(
1347+
np.mean(trace["theta2"][:, obs2.mask] - trace["theta2_unobserved"]), 0
1348+
)
13411349

13421350
# Make sure that the observed values are unchanged
13431351
assert np.allclose(np.var(trace["theta1"][:, ~obs1.mask], 0), 0.0)
@@ -1378,7 +1386,7 @@ def test_missing_logp1(self):
13781386
with pytest.warns(ImputationWarning):
13791387
x = pm.Gamma("x", 1, 1, observed=[1, 1, 1, np.nan])
13801388

1381-
logp_val = m2.compile_logp()({"x_missing_log__": np.array([0])})
1389+
logp_val = m2.compile_logp()({"x_unobserved_log__": np.array([0])})
13821390
assert logp_val == -4.0
13831391

13841392
def test_missing_logp2(self):
@@ -1394,7 +1402,7 @@ def test_missing_logp2(self):
13941402
"theta2", mu=theta1, observed=np.array([np.nan, np.nan, 2, np.nan, 4])
13951403
)
13961404
m_missing_logp = m_missing.compile_logp()(
1397-
{"theta1_missing": [2, 4], "theta2_missing": [0, 1, 3]}
1405+
{"theta1_unobserved": [2, 4], "theta2_unobserved": [0, 1, 3]}
13981406
)
13991407

14001408
assert m_logp == m_missing_logp
@@ -1407,15 +1415,15 @@ def test_missing_multivariate_separable(self):
14071415
a=[1, 2, 3],
14081416
observed=np.array([[0.3, 0.3, 0.4], [np.nan, np.nan, np.nan]]),
14091417
)
1410-
assert (m_miss["x_missing"].owner.op, pm.Dirichlet)
1418+
assert (m_miss["x_unobserved"].owner.op, pm.Dirichlet)
14111419
assert (m_miss["x_observed"].owner.op, pm.Dirichlet)
14121420

14131421
with pm.Model() as m_unobs:
14141422
x = pm.Dirichlet("x", a=[1, 2, 3], shape=(1, 3))
14151423

14161424
inp_vals = simplex.forward(np.array([[0.3, 0.3, 0.4]])).eval()
14171425
np.testing.assert_allclose(
1418-
m_miss.compile_logp(jacobian=False)({"x_missing_simplex__": inp_vals}),
1426+
m_miss.compile_logp(jacobian=False)({"x_unobserved_simplex__": inp_vals}),
14191427
m_unobs.compile_logp(jacobian=False)({"x_simplex__": inp_vals}) * 2,
14201428
)
14211429

@@ -1428,12 +1436,12 @@ def test_missing_multivariate_unseparable(self):
14281436
observed=np.array([[0.3, 0.3, np.nan], [np.nan, np.nan, 0.4]]),
14291437
)
14301438

1431-
assert isinstance(m_miss["x_missing"].owner.op, PartialObservedRV)
1439+
assert isinstance(m_miss["x_unobserved"].owner.op, PartialObservedRV)
14321440
assert isinstance(m_miss["x_observed"].owner.op, PartialObservedRV)
14331441

14341442
inp_values = np.array([0.3, 0.3, 0.4])
14351443
np.testing.assert_allclose(
1436-
m_miss.compile_logp()({"x_missing": [0.4, 0.3, 0.3]}),
1444+
m_miss.compile_logp()({"x_unobserved": [0.4, 0.3, 0.3]}),
14371445
st.dirichlet.logpdf(inp_values, [1, 2, 3]) * 2,
14381446
)
14391447

@@ -1451,7 +1459,7 @@ def test_missing_vector_parameter(self):
14511459
assert np.all(x_draws[:, 0] < 0)
14521460
assert np.all(x_draws[:, 1] > 0)
14531461
assert np.isclose(
1454-
m.compile_logp()({"x_missing": np.array([-10, 10, -10, 10])}),
1462+
m.compile_logp()({"x_unobserved": np.array([-10, 10, -10, 10])}),
14551463
st.norm(scale=0.1).logpdf(0) * 6,
14561464
)
14571465

@@ -1470,7 +1478,7 @@ def test_missing_symmetric(self):
14701478
x_obs_rv = m["x_observed"]
14711479
x_obs_vv = m.rvs_to_values[x_obs_rv]
14721480

1473-
x_unobs_rv = m["x_missing"]
1481+
x_unobs_rv = m["x_unobserved"]
14741482
x_unobs_vv = m.rvs_to_values[x_unobs_rv]
14751483

14761484
logp = transformed_conditional_logp(
@@ -1506,7 +1514,7 @@ def test_symbolic_random_variable(self):
15061514
observed=data,
15071515
)
15081516
np.testing.assert_almost_equal(
1509-
model.compile_logp()({"x_missing": [0] * 3}),
1517+
model.compile_logp()({"x_unobserved": [0] * 3}),
15101518
st.norm.logcdf(0) * 10,
15111519
)
15121520

tests/test_model_graph.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,13 @@ def model_with_imputations():
145145

146146
compute_graph = {
147147
"a": set(),
148-
"L_missing": {"a"},
148+
"L_unobserved": {"a"},
149149
"L_observed": {"a"},
150-
"L": {"L_missing", "L_observed"},
150+
"L": {"L_unobserved", "L_observed"},
151151
}
152152
plates = {
153153
"": {"a"},
154-
"2": {"L_missing"},
154+
"2": {"L_unobserved"},
155155
"10": {"L_observed"},
156156
"12": {"L"},
157157
}

tests/tuning/test_starting.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,9 @@ def test_find_MAP_issue_4488():
146146
y = pm.Deterministic("y", x + 1)
147147
map_estimate = find_MAP()
148148

149-
assert not set.difference({"x_missing", "x_missing_log__", "y"}, set(map_estimate.keys()))
150-
np.testing.assert_allclose(map_estimate["x_missing"], 0.2, rtol=1e-4, atol=1e-4)
151-
np.testing.assert_allclose(map_estimate["y"], [2.0, map_estimate["x_missing"][0] + 1])
149+
assert not set.difference({"x_unobserved", "x_unobserved_log__", "y"}, set(map_estimate.keys()))
150+
np.testing.assert_allclose(map_estimate["x_unobserved"], 0.2, rtol=1e-4, atol=1e-4)
151+
np.testing.assert_allclose(map_estimate["y"], [2.0, map_estimate["x_unobserved"][0] + 1])
152152

153153

154154
def test_find_MAP_warning_non_free_RVs():

0 commit comments

Comments
 (0)