fix tests for linear models

ddbourgin · ddbourgin · commit 41e6a325b80c · 2021-12-25T23:08:09.000-05:00
diff --git a/numpy_ml/linear_models/linear_regression.py b/numpy_ml/linear_models/linear_regression.py
@@ -122,7 +122,8 @@ def update(self, X, y, weights=None):
         X, y = np.atleast_2d(X), np.atleast_2d(y)
 
         X1, Y1 = X.shape[0], y.shape[0]
-        weights = np.ones(X1) if weights is None else np.squeeze(np.atleast_1d(weights))
+        weights = np.ones(X1) if weights is None else np.atleast_1d(weights)
+        weights = np.squeeze(weights) if weights.size > 1 else weights
 
         err_str = f"weights must have shape ({X1},) but got {weights.shape}"
         assert weights.shape == (X1,), err_str
@@ -187,7 +188,8 @@ def fit(self, X, y, weights=None):
         """
         N = X.shape[0]
 
-        weights = np.ones(N) if weights is None else np.squeeze(np.atleast_1d(weights))
+        weights = np.ones(N) if weights is None else np.atleast_1d(weights)
+        weights = np.squeeze(weights) if weights.size > 1 else weights
         err_str = f"weights must have shape ({N},) but got {weights.shape}"
         assert weights.shape == (N,), err_str
 
@@ -200,7 +202,7 @@ def fit(self, X, y, weights=None):
             X = np.c_[np.sqrt(weights), X]
 
         self.sigma_inv = np.linalg.pinv(X.T @ X)
-        self.beta = np.atleast_2d(self.sigma_inv @ X.T @ y).T
+        self.beta = np.atleast_2d(self.sigma_inv @ X.T @ y)
 
         self._is_fit = True
         return self
@@ -223,4 +225,5 @@ def predict(self, X):
         # convert X to a design matrix if we're fitting an intercept
         if self.fit_intercept:
             X = np.c_[np.ones(X.shape[0]), X]
-        return np.dot(X, self.beta)
+        return X @ self.beta
+        #  return np.dot(X, self.beta)
diff --git a/numpy_ml/tests/test_linear_regression.py b/numpy_ml/tests/test_linear_regression.py
@@ -1,7 +1,6 @@
 # flake8: noqa
 import numpy as np
 
-import statsmodels.api as sm
 from sklearn.linear_model import LinearRegression as LinearRegressionGold
 
 from numpy_ml.linear_models import LinearRegression
diff --git a/numpy_ml/tests/test_naive_bayes.py b/numpy_ml/tests/test_naive_bayes.py
@@ -1,3 +1,4 @@
+# flake8: noqa
 import numpy as np
 from sklearn import datasets
 from sklearn.model_selection import train_test_split
@@ -13,6 +14,7 @@ def test_GaussianNB(N=10):
     N = np.inf if N is None else N
 
     i = 1
+    eps = np.finfo(float).eps
     while i < N + 1:
         n_ex = np.random.randint(1, 300)
         n_feats = np.random.randint(1, 100)
@@ -33,29 +35,29 @@ def test_GaussianNB(N=10):
 
         sk_preds = sklearn_NB.predict(X_test)
 
-        for i in range(len(NB.labels)):
+        for j in range(len(NB.labels)):
             P = NB.parameters
-            jointi = np.log(sklearn_NB.class_prior_[i])
-            jointi_mine = np.log(P["prior"][i])
+            jointi = np.log(sklearn_NB.class_prior_[j])
+            jointi_mine = np.log(P["prior"][j])
 
             np.testing.assert_almost_equal(jointi, jointi_mine)
 
-            n_ij = -0.5 * np.sum(np.log(2.0 * np.pi * sklearn_NB.sigma_[i, :]))
-            n_ij_mine = -0.5 * np.sum(np.log(2.0 * np.pi * P["sigma"][i]))
+            n_jk = -0.5 * np.sum(np.log(2.0 * np.pi * sklearn_NB.sigma_[j, :] + eps))
+            n_jk_mine = -0.5 * np.sum(np.log(2.0 * np.pi * P["sigma"][j] + eps))
 
-            np.testing.assert_almost_equal(n_ij_mine, n_ij)
+            np.testing.assert_almost_equal(n_jk_mine, n_jk)
 
-            n_ij2 = n_ij - 0.5 * np.sum(
-                ((X_test - sklearn_NB.theta_[i, :]) ** 2) / (sklearn_NB.sigma_[i, :]), 1
+            n_jk2 = n_jk - 0.5 * np.sum(
+                ((X_test - sklearn_NB.theta_[j, :]) ** 2) / (sklearn_NB.sigma_[j, :]), 1
             )
 
-            n_ij2_mine = n_ij_mine - 0.5 * np.sum(
-                ((X_test - P["mean"][i]) ** 2) / (P["sigma"][i]), 1
+            n_jk2_mine = n_jk_mine - 0.5 * np.sum(
+                ((X_test - P["mean"][j]) ** 2) / (P["sigma"][j]), 1
             )
-            np.testing.assert_almost_equal(n_ij2_mine, n_ij2, decimal=4)
+            np.testing.assert_almost_equal(n_jk2_mine, n_jk2, decimal=4)
 
-            llh = jointi + n_ij2
-            llh_mine = jointi_mine + n_ij2_mine
+            llh = jointi + n_jk2
+            llh_mine = jointi_mine + n_jk2_mine
 
             np.testing.assert_almost_equal(llh_mine, llh, decimal=4)
 
diff --git a/numpy_ml/tests/test_nn.py b/numpy_ml/tests/test_nn.py
@@ -14,8 +14,6 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-import tensorflow.keras.datasets.mnist as mnist
-
 from numpy_ml.neural_nets.utils import (
     calc_pad_dims_2D,
     conv2D_naive,
@@ -2308,6 +2306,7 @@ def test_conv(N=15):
 
 def fit_VAE():
     # for testing
+    import tensorflow.keras.datasets.mnist as mnist
     from numpy_ml.neural_nets.models.vae import BernoulliVAE
 
     np.random.seed(12345)
diff --git a/numpy_ml/tests/test_preprocessing.py b/numpy_ml/tests/test_preprocessing.py
@@ -248,7 +248,7 @@ def test_mel_filterbank(N=15):
             n_fft=window_len,
             n_mels=n_filters,
             htk=True,
-            norm=norm if norm == 1 else None,
+            norm="slaney" if norm == 1 else None,
         )
 
         np.testing.assert_almost_equal(mine, theirs)
diff --git a/numpy_ml/tests/test_utils.py b/numpy_ml/tests/test_utils.py
@@ -10,15 +10,21 @@
 from sklearn.metrics.pairwise import polynomial_kernel as sk_poly
 
 
-from numpy_ml.utils.distance_metrics import euclidean
+from numpy_ml.utils.distance_metrics import (
+    hamming,
+    euclidean,
+    chebyshev,
+    manhattan,
+    minkowski,
+)
 from numpy_ml.utils.kernels import LinearKernel, PolynomialKernel, RBFKernel
 from numpy_ml.utils.data_structures import BallTree
 from numpy_ml.utils.graphs import (
+    Edge,
     DiGraph,
     UndirectedGraph,
-    Edge,
-    random_unweighted_graph,
     random_DAG,
+    random_unweighted_graph,
 )
 
 #######################################################################
@@ -110,6 +116,63 @@ def test_euclidean(N=1):
         i += 1
 
 
+def test_hamming(N=1):
+    np.random.seed(12345)
+    i = 0
+    while i < N:
+        N = np.random.randint(1, 100)
+        x = (np.random.rand(N) * 100).round().astype(int)
+        y = (np.random.rand(N) * 100).round().astype(int)
+        mine = hamming(x, y)
+        theirs = scipy.spatial.distance.hamming(x, y)
+        np.testing.assert_almost_equal(mine, theirs)
+        print("PASSED")
+        i += 1
+
+
+def test_minkowski(N=1):
+    np.random.seed(12345)
+    i = 0
+    while i < N:
+        N = np.random.randint(1, 100)
+        p = 1 + np.random.rand() * 10
+        x = np.random.rand(N)
+        y = np.random.rand(N)
+        mine = minkowski(x, y, p)
+        theirs = scipy.spatial.distance.minkowski(x, y, p)
+        np.testing.assert_almost_equal(mine, theirs)
+        print("PASSED")
+        i += 1
+
+
+def test_chebyshev(N=1):
+    np.random.seed(12345)
+    i = 0
+    while i < N:
+        N = np.random.randint(1, 100)
+        x = np.random.rand(N)
+        y = np.random.rand(N)
+        mine = chebyshev(x, y)
+        theirs = scipy.spatial.distance.chebyshev(x, y)
+        np.testing.assert_almost_equal(mine, theirs)
+        print("PASSED")
+        i += 1
+
+
+def test_manhattan(N=1):
+    np.random.seed(12345)
+    i = 0
+    while i < N:
+        N = np.random.randint(1, 100)
+        x = np.random.rand(N)
+        y = np.random.rand(N)
+        mine = manhattan(x, y)
+        theirs = scipy.spatial.distance.cityblock(x, y)
+        np.testing.assert_almost_equal(mine, theirs)
+        print("PASSED")
+        i += 1
+
+
 #######################################################################
 #                           Data Structures                           #
 #######################################################################

Original file line number	Diff line number	Diff line change
`@@ -248,7 +248,7 @@ def test_mel_filterbank(N=15):`
`248`	`248`	`n_fft=window_len,`
`249`	`249`	`n_mels=n_filters,`
`250`	`250`	`htk=True,`
`251`		`- norm=norm if norm == 1 else None,`
	`251`	`+ norm="slaney" if norm == 1 else None,`
`252`	`252`	`)`
`253`	`253`
`254`	`254`	`np.testing.assert_almost_equal(mine, theirs)`