Refactor tests, fix geweke bug, set random seed

colin · colin · commit 53eaac804522 · 2016-09-05T15:53:31.000-04:00
diff --git a/pymc3/diagnostics.py b/pymc3/diagnostics.py
@@ -1,8 +1,7 @@
 """Convergence diagnostics and model validation"""
 
 import numpy as np
-from .stats import autocorr, autocov, statfunc
-from copy import copy
+from .stats import statfunc
 
 __all__ = ['geweke', 'gelman_rubin', 'effective_n']
 
@@ -54,6 +53,12 @@ def geweke(x, first=.1, last=.5, intervals=20):
         return [geweke(y, first, last, intervals) for y in np.transpose(x)]
 
     # Filter out invalid intervals
+    for interval in (first, last):
+        if interval <= 0 or interval >= 1:
+            raise ValueError(
+                "Invalid intervals for Geweke convergence analysis",
+                (first,
+                 last))
     if first + last >= 1:
         raise ValueError(
             "Invalid intervals for Geweke convergence analysis",
@@ -66,18 +71,20 @@ def geweke(x, first=.1, last=.5, intervals=20):
     # Last index value
     end = len(x) - 1
 
+    # Start intervals going up to the <last>% of the chain
+    last_start_idx = (1 - last) * end
+
     # Calculate starting indices
-    sindices = np.arange(0, end // 2, step=int((end / 2) / (intervals - 1)))
+    start_indices = np.arange(0, int(last_start_idx), step=int((last_start_idx) / (intervals - 1)))
 
     # Loop over start indices
-    for start in sindices:
-
+    for start in start_indices:
         # Calculate slices
         first_slice = x[start: start + int(first * (end - start))]
         last_slice = x[int(end - last * (end - start)):]
 
-        z = (first_slice.mean() - last_slice.mean())
-        z /= np.sqrt(first_slice.std() ** 2 + last_slice.std() ** 2)
+        z = first_slice.mean() - last_slice.mean()
+        z /= np.sqrt(first_slice.var() + last_slice.var())
 
         zscores.append([start, z])
 
@@ -177,7 +184,7 @@ def effective_n(mtrace):
     mtrace : MultiTrace
       A MultiTrace object containing parallel traces (minimum 2)
       of one or more stochastic parameters.
-    
+
     Returns
     -------
     n_eff : float
@@ -191,13 +198,13 @@ def effective_n(mtrace):
       .. math:: \hat{n}_{eff} = \frac{mn}}{1 + 2 \sum_{t=1}^T \hat{\rho}_t}
 
     where :math:`\hat{\rho}_t` is the estimated autocorrelation at lag t, and T
-    is the first odd positive integer for which the sum :math:`\hat{\rho}_{T+1} + \hat{\rho}_{T+1}` 
+    is the first odd positive integer for which the sum :math:`\hat{\rho}_{T+1} + \hat{\rho}_{T+1}`
     is negative.
 
     References
     ----------
     Gelman et al. (2014)"""
-    
+
     if mtrace.nchains < 2:
         raise ValueError(
             'Calculation of effective sample size requires multiple chains of the same length.')
@@ -226,32 +233,32 @@ def calc_vhat(x):
             rotated_indices = np.roll(np.arange(x.ndim), 1)
             # Now iterate over the dimension of the variable
             return np.squeeze([calc_vhat(xi) for xi in x.transpose(rotated_indices)])
-    
+
     def calc_n_eff(x):
-        
+
         m, n = x.shape
-        
+
         negative_autocorr = False
         t = 1
-        
+
         Vhat = calc_vhat(x)
-        
-        variogram = lambda t: (sum(sum((x[j][i] - x[j][i-t])**2 
+
+        variogram = lambda t: (sum(sum((x[j][i] - x[j][i-t])**2
                             for i in range(t,n)) for j in range(m)) / (m*(n - t)))
-        
+
         rho = np.ones(n)
         # Iterate until the sum of consecutive estimates of autocorrelation is negative
         while not negative_autocorr and (t < n):
-        
+
             rho[t] = 1. - variogram(t)/(2.*Vhat)
-        
+
             if not t % 2:
                 negative_autocorr = sum(rho[t-1:t+1]) < 0
-        
+
             t += 1
-            
+
         return int(m*n / (1. + 2*rho[1:t].sum()))
-    
+
     n_eff = {}
     for var in mtrace.varnames:
 
diff --git a/pymc3/tests/test_diagnostics.py b/pymc3/tests/test_diagnostics.py
@@ -1,65 +1,107 @@
-from ..theanof import inputvars
-from ..model import Model, modelcontext
+import unittest
+
+from numpy.testing import assert_allclose, assert_array_less
+
+from ..model import Model
 from ..step_methods import Slice, Metropolis, NUTS
 from ..distributions import Normal
 from ..tuning import find_MAP
 from ..sampling import sample
 from ..diagnostics import effective_n, geweke, gelman_rubin
 from pymc3.examples import disaster_model as dm
-from numpy import all, isclose
 
-def test_gelman_rubin(n=1000):
 
-    with dm.model:
-        # Run sampler
-        step1 = Slice([dm.early_mean, dm.late_mean])
-        step2 = Metropolis([dm.switchpoint])
-        start = {'early_mean': 2., 'late_mean': 3., 'switchpoint': 50}
-        ptrace = sample(n, [step1, step2], start, njobs=2,
-                        random_seed=[1, 3])
+class TestGelmanRubin(unittest.TestCase):
+    good_ratio = 1.1
+
+    def get_ptrace(self, n_samples):
+        with dm.model:
+            # Run sampler
+            step1 = Slice([dm.early_mean, dm.late_mean])
+            step2 = Metropolis([dm.switchpoint])
+            start = {'early_mean': 2., 'late_mean': 3., 'switchpoint': 50}
+            ptrace = sample(n_samples, [step1, step2], start, njobs=2,
+                            random_seed=[1, 3])
+        return ptrace
+
+    def test_good(self):
+        """Confirm Gelman-Rubin statistic is close to 1 for a reasonable number of samples."""
+        n_samples = 1000
+        rhat = gelman_rubin(self.get_ptrace(n_samples))
+        self.assertTrue(all(1 / self.good_ratio < r < self.good_ratio for r in rhat.values()))
+
+    def test_bad(self):
+        """Confirm Gelman-Rubin statistic is far from 1 for a small number of samples."""
+        n_samples = 10
+        rhat = gelman_rubin(self.get_ptrace(n_samples))
+        self.assertFalse(all(1 / self.good_ratio < r < self.good_ratio for r in rhat.values()))
+
+
+class TestDiagnostics(unittest.TestCase):
+    def get_switchpoint(self, n_samples):
+        with dm.model:
+            # Run sampler
+            step1 = Slice([dm.early_mean, dm.late_mean])
+            step2 = Metropolis([dm.switchpoint])
+            trace = sample(n_samples, [step1, step2], progressbar=False,
+                           random_seed=1)
+        return trace['switchpoint']
 
-    rhat = gelman_rubin(ptrace)
+    def test_geweke_negative(self):
+        """Confirm Geweke diagnostic is larger than 1 for a small number of samples."""
+        n_samples = 200
+        n_intervals = 20
+        switchpoint = self.get_switchpoint(n_samples)
+        first = 0.1
+        last = 0.7
+        # returns (intervalsx2) matrix, with first row start indexes, second z-scores
+        z_switch = geweke(switchpoint, first=first, last=last, intervals=n_intervals)
 
-    assert all([r < 1.5 for r in rhat.values()])
+        # These z-scores should be larger, since there are not many samples.
+        self.assertGreater(max(abs(z_switch[:, 1])), 1)
 
+    def test_geweke_positive(self):
+        """Confirm Geweke diagnostic is smaller than 1 for a reasonable number of samples."""
+        n_samples = 2000
+        n_intervals = 20
+        switchpoint = self.get_switchpoint(n_samples)
 
-def test_geweke(n=3000):
+        with self.assertRaises(ValueError):
+            # first and last must be between 0 and 1
+            geweke(switchpoint, first=-0.3, last=1.1, intervals=n_intervals)
 
-    with dm.model:
-        # Run sampler
-        step1 = Slice([dm.early_mean, dm.late_mean])
-        step2 = Metropolis([dm.switchpoint])
-        trace = sample(n, [step1, step2], progressbar=False,
-                       random_seed=1)
+        with self.assertRaises(ValueError):
+            # first and last must add to < 1
+            geweke(switchpoint, first=0.3, last=0.7, intervals=n_intervals)
 
-    z_switch = geweke(trace['switchpoint'], last=.5, intervals=20)
+        first = 0.1
+        last = 0.7
+        # returns (intervalsx2) matrix, with first row start indexes, second z-scores
+        z_switch = geweke(switchpoint, first=first, last=last, intervals=n_intervals)
+        start = z_switch[:, 0]
+        z_scores = z_switch[:, 1]
 
-    # Ensure `intervals` argument is honored
-    assert len(z_switch) == 20
+        # Ensure `intervals` argument is honored
+        self.assertEqual(z_switch.shape[0], n_intervals)
 
-    # Ensure `last` argument is honored
-    assert z_switch[-1, 0] < (n / 2)
+        # Start index should not be in the last <last>% of samples
+        assert_array_less(start, (1 - last) * n_samples)
 
-    # These should all be z-scores
-    print(max(abs(z_switch[:, 1])))
-    assert max(abs(z_switch[:, 1])) < 1
+        # These z-scores should be small, since there are more samples.
+        self.assertLess(max(abs(z_scores)), 1)
 
+    def test_effective_n(self):
+        """Check effective sample size is equal to number of samples when initializing with MAP"""
+        n_jobs = 3
+        n_samples = 100
 
-def test_effective_n(k=3, n=1000):
-    """Unit test for effective sample size"""
-    
-    model = Model()
-    with model:
-        x = Normal('x', 0, 1., shape=5)
+        with Model():
+            Normal('x', 0, 1., shape=5)
 
-        # start sampling at the MAP
-        start = find_MAP()
+            # start sampling at the MAP
+            start = find_MAP()
+            step = NUTS(scaling=start)
+            ptrace = sample(n_samples, step, start, njobs=n_jobs, random_seed=42)
 
-        step = NUTS(scaling=start)
-    
-        ptrace = sample(n, step, start, njobs=k,
-                        random_seed=42)
-        
-    n_eff = effective_n(ptrace)['x']
-    
-    assert isclose(n_eff, k*n, 2).all()
+        n_effective = effective_n(ptrace)['x']
+        assert_allclose(n_effective, n_jobs * n_samples, 2)
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -18,11 +18,13 @@
 import numpy as np
 from numpy.testing import assert_almost_equal
 from numpy.linalg import inv
+import numpy.random as nr
 
 from scipy import integrate
 import scipy.stats.distributions  as sp
 import scipy.stats
 
+nr.seed(20160905)
 
 class Domain(object):
     def __init__(self, vals, dtype=None, edges=None, shape=None):